file2md 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +293 -0
- package/dist/index.d.ts +33 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +153 -0
- package/dist/index.js.map +1 -0
- package/dist/parsers/docx-parser.d.ts +20 -0
- package/dist/parsers/docx-parser.d.ts.map +1 -0
- package/dist/parsers/docx-parser.js +237 -0
- package/dist/parsers/docx-parser.js.map +1 -0
- package/dist/parsers/pdf-parser.d.ts +8 -0
- package/dist/parsers/pdf-parser.d.ts.map +1 -0
- package/dist/parsers/pdf-parser.js +98 -0
- package/dist/parsers/pdf-parser.js.map +1 -0
- package/dist/parsers/pptx-parser.d.ts +21 -0
- package/dist/parsers/pptx-parser.d.ts.map +1 -0
- package/dist/parsers/pptx-parser.js +264 -0
- package/dist/parsers/pptx-parser.js.map +1 -0
- package/dist/parsers/xlsx-parser.d.ts +19 -0
- package/dist/parsers/xlsx-parser.d.ts.map +1 -0
- package/dist/parsers/xlsx-parser.js +267 -0
- package/dist/parsers/xlsx-parser.js.map +1 -0
- package/dist/types/errors.d.ts +52 -0
- package/dist/types/errors.d.ts.map +1 -0
- package/dist/types/errors.js +76 -0
- package/dist/types/errors.js.map +1 -0
- package/dist/types/index.d.ts +5 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +5 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/interfaces.d.ts +228 -0
- package/dist/types/interfaces.d.ts.map +1 -0
- package/dist/types/interfaces.js +10 -0
- package/dist/types/interfaces.js.map +1 -0
- package/dist/utils/chart-extractor.d.ts +44 -0
- package/dist/utils/chart-extractor.d.ts.map +1 -0
- package/dist/utils/chart-extractor.js +258 -0
- package/dist/utils/chart-extractor.js.map +1 -0
- package/dist/utils/image-extractor.d.ts +50 -0
- package/dist/utils/image-extractor.d.ts.map +1 -0
- package/dist/utils/image-extractor.js +136 -0
- package/dist/utils/image-extractor.js.map +1 -0
- package/dist/utils/layout-parser.d.ts +55 -0
- package/dist/utils/layout-parser.d.ts.map +1 -0
- package/dist/utils/layout-parser.js +244 -0
- package/dist/utils/layout-parser.js.map +1 -0
- package/dist/utils/pdf-extractor.d.ts +46 -0
- package/dist/utils/pdf-extractor.d.ts.map +1 -0
- package/dist/utils/pdf-extractor.js +235 -0
- package/dist/utils/pdf-extractor.js.map +1 -0
- package/package.json +70 -0
@@ -0,0 +1,228 @@
|
|
1
|
+
import type { Buffer } from 'node:buffer';
|
2
|
+
/**
|
3
|
+
* Options for document conversion
|
4
|
+
*/
|
5
|
+
export interface ConvertOptions {
|
6
|
+
/** Directory to save extracted images. Defaults to 'images' */
|
7
|
+
readonly imageDir?: string;
|
8
|
+
/** Whether to preserve document layout as much as possible. Defaults to true */
|
9
|
+
readonly preserveLayout?: boolean;
|
10
|
+
/** Whether to extract charts and convert them to markdown tables. Defaults to true */
|
11
|
+
readonly extractCharts?: boolean;
|
12
|
+
/** Whether to extract images from documents. Defaults to true */
|
13
|
+
readonly extractImages?: boolean;
|
14
|
+
/** Maximum number of pages to process for PDFs. Defaults to unlimited */
|
15
|
+
readonly maxPages?: number;
|
16
|
+
}
|
17
|
+
/**
|
18
|
+
* Metadata about the converted document
|
19
|
+
*/
|
20
|
+
export interface DocumentMetadata {
|
21
|
+
/** Original file type detected */
|
22
|
+
readonly fileType: string;
|
23
|
+
/** MIME type of the original file */
|
24
|
+
readonly mimeType: string;
|
25
|
+
/** Number of pages/sheets/slides processed */
|
26
|
+
readonly pageCount: number;
|
27
|
+
/** Number of images extracted */
|
28
|
+
readonly imageCount: number;
|
29
|
+
/** Number of charts extracted */
|
30
|
+
readonly chartCount: number;
|
31
|
+
/** Processing time in milliseconds */
|
32
|
+
readonly processingTime: number;
|
33
|
+
/** Additional format-specific metadata */
|
34
|
+
readonly additional?: Record<string, unknown>;
|
35
|
+
}
|
36
|
+
/**
|
37
|
+
* Information about an extracted image
|
38
|
+
*/
|
39
|
+
export interface ImageData {
|
40
|
+
/** Original path/reference in the document */
|
41
|
+
readonly originalPath: string;
|
42
|
+
/** Path where the image was saved */
|
43
|
+
readonly savedPath: string;
|
44
|
+
/** Base path for relative references */
|
45
|
+
readonly basePath?: string;
|
46
|
+
/** Image dimensions if available */
|
47
|
+
readonly dimensions?: {
|
48
|
+
readonly width: number;
|
49
|
+
readonly height: number;
|
50
|
+
};
|
51
|
+
/** Image format (png, jpg, etc.) */
|
52
|
+
readonly format?: string;
|
53
|
+
/** Size of the image file in bytes */
|
54
|
+
readonly size?: number;
|
55
|
+
}
|
56
|
+
/**
|
57
|
+
* Information about an extracted chart
|
58
|
+
*/
|
59
|
+
export interface ChartData {
|
60
|
+
/** Type of chart (bar, line, pie, scatter, etc.) */
|
61
|
+
readonly type: ChartType;
|
62
|
+
/** Chart title */
|
63
|
+
readonly title: string;
|
64
|
+
/** Chart data series */
|
65
|
+
readonly series: readonly ChartSeries[];
|
66
|
+
/** Category labels */
|
67
|
+
readonly categories: readonly string[];
|
68
|
+
/** Additional chart metadata */
|
69
|
+
readonly metadata?: Record<string, unknown>;
|
70
|
+
}
|
71
|
+
/**
|
72
|
+
* Supported chart types
|
73
|
+
*/
|
74
|
+
export type ChartType = 'bar' | 'line' | 'pie' | 'scatter' | 'area' | 'column' | 'unknown';
|
75
|
+
/**
|
76
|
+
* Data series in a chart
|
77
|
+
*/
|
78
|
+
export interface ChartSeries {
|
79
|
+
/** Series name */
|
80
|
+
readonly name: string;
|
81
|
+
/** Data values */
|
82
|
+
readonly values: readonly number[];
|
83
|
+
/** Categories for this series (if different from chart categories) */
|
84
|
+
readonly categories?: readonly string[];
|
85
|
+
}
|
86
|
+
/**
|
87
|
+
* Cell data in a table
|
88
|
+
*/
|
89
|
+
export interface CellData {
|
90
|
+
/** Cell text content */
|
91
|
+
text: string;
|
92
|
+
/** Whether the cell text is bold */
|
93
|
+
bold: boolean;
|
94
|
+
/** Whether the cell text is italic */
|
95
|
+
italic: boolean;
|
96
|
+
/** Text alignment */
|
97
|
+
alignment: TextAlignment;
|
98
|
+
/** Background color (hex code) */
|
99
|
+
backgroundColor?: string;
|
100
|
+
/** Number of columns this cell spans */
|
101
|
+
colSpan: number;
|
102
|
+
/** Number of rows this cell spans */
|
103
|
+
rowSpan: number;
|
104
|
+
/** Whether this cell is part of a merged cell */
|
105
|
+
merged?: boolean;
|
106
|
+
}
|
107
|
+
/**
|
108
|
+
* Row data in a table
|
109
|
+
*/
|
110
|
+
export interface RowData {
|
111
|
+
/** Cells in this row */
|
112
|
+
cells: CellData[];
|
113
|
+
/** Row height if available */
|
114
|
+
height?: number;
|
115
|
+
}
|
116
|
+
/**
|
117
|
+
* Table structure
|
118
|
+
*/
|
119
|
+
export interface TableData {
|
120
|
+
/** All rows in the table */
|
121
|
+
rows: RowData[];
|
122
|
+
/** Table caption/title */
|
123
|
+
caption?: string;
|
124
|
+
/** Table width if available */
|
125
|
+
width?: number;
|
126
|
+
}
|
127
|
+
/**
|
128
|
+
* Text alignment options
|
129
|
+
*/
|
130
|
+
export type TextAlignment = 'left' | 'center' | 'right' | 'justify';
|
131
|
+
/**
|
132
|
+
* Position information for layout elements
|
133
|
+
*/
|
134
|
+
export interface Position {
|
135
|
+
/** X coordinate */
|
136
|
+
x: number;
|
137
|
+
/** Y coordinate */
|
138
|
+
y: number;
|
139
|
+
/** Width of the element */
|
140
|
+
width?: number;
|
141
|
+
/** Height of the element */
|
142
|
+
height?: number;
|
143
|
+
}
|
144
|
+
/**
|
145
|
+
* Layout element types
|
146
|
+
*/
|
147
|
+
export type ElementType = 'text' | 'image' | 'table' | 'chart' | 'shape' | 'unknown';
|
148
|
+
/**
|
149
|
+
* Layout element with positioning
|
150
|
+
*/
|
151
|
+
export interface LayoutElement {
|
152
|
+
/** Type of element */
|
153
|
+
readonly type: ElementType;
|
154
|
+
/** Element content */
|
155
|
+
readonly content: string | TableData | ImageData | ChartData;
|
156
|
+
/** Position of the element */
|
157
|
+
readonly position?: Position;
|
158
|
+
/** Additional formatting information */
|
159
|
+
readonly formatting?: Record<string, unknown>;
|
160
|
+
}
|
161
|
+
/**
|
162
|
+
* List item data
|
163
|
+
*/
|
164
|
+
export interface ListItem {
|
165
|
+
/** List item text */
|
166
|
+
readonly text: string;
|
167
|
+
/** Nesting level (0-based) */
|
168
|
+
readonly level: number;
|
169
|
+
/** Whether the text is bold */
|
170
|
+
readonly bold?: boolean;
|
171
|
+
/** Whether the text is italic */
|
172
|
+
readonly italic?: boolean;
|
173
|
+
/** Child list items */
|
174
|
+
readonly children?: readonly ListItem[];
|
175
|
+
}
|
176
|
+
/**
|
177
|
+
* List data structure
|
178
|
+
*/
|
179
|
+
export interface ListData {
|
180
|
+
/** Whether this is an ordered (numbered) list */
|
181
|
+
readonly isOrdered: boolean;
|
182
|
+
/** List items */
|
183
|
+
readonly items: readonly ListItem[];
|
184
|
+
}
|
185
|
+
/**
|
186
|
+
* Page information for PDFs
|
187
|
+
*/
|
188
|
+
export interface PageData {
|
189
|
+
/** Page number (1-based) */
|
190
|
+
readonly pageNumber: number;
|
191
|
+
/** Path to the page image */
|
192
|
+
readonly imagePath: string;
|
193
|
+
/** Full path to the image file */
|
194
|
+
readonly fullPath: string;
|
195
|
+
/** Page dimensions */
|
196
|
+
readonly dimensions?: {
|
197
|
+
readonly width: number;
|
198
|
+
readonly height: number;
|
199
|
+
};
|
200
|
+
}
|
201
|
+
/**
|
202
|
+
* Result of document conversion
|
203
|
+
*/
|
204
|
+
export interface ConversionResult {
|
205
|
+
/** Generated markdown content */
|
206
|
+
readonly markdown: string;
|
207
|
+
/** Extracted images */
|
208
|
+
readonly images: readonly ImageData[];
|
209
|
+
/** Extracted charts */
|
210
|
+
readonly charts: readonly ChartData[];
|
211
|
+
/** Document metadata */
|
212
|
+
readonly metadata: DocumentMetadata;
|
213
|
+
}
|
214
|
+
/**
|
215
|
+
* Input type for conversion function
|
216
|
+
*/
|
217
|
+
export type ConvertInput = string | Buffer;
|
218
|
+
/**
|
219
|
+
* Supported MIME types
|
220
|
+
*/
|
221
|
+
export declare const SUPPORTED_MIME_TYPES: {
|
222
|
+
readonly PDF: "application/pdf";
|
223
|
+
readonly DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
|
224
|
+
readonly XLSX: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
|
225
|
+
readonly PPTX: "application/vnd.openxmlformats-officedocument.presentationml.presentation";
|
226
|
+
};
|
227
|
+
export type SupportedMimeType = typeof SUPPORTED_MIME_TYPES[keyof typeof SUPPORTED_MIME_TYPES];
|
228
|
+
//# sourceMappingURL=interfaces.d.ts.map
|
@@ -0,0 +1 @@
|
|
1
|
+
{"version":3,"file":"interfaces.d.ts","sourceRoot":"","sources":["../../src/types/interfaces.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAE1C;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,+DAA+D;IAC/D,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,gFAAgF;IAChF,QAAQ,CAAC,cAAc,CAAC,EAAE,OAAO,CAAC;IAClC,sFAAsF;IACtF,QAAQ,CAAC,aAAa,CAAC,EAAE,OAAO,CAAC;IACjC,iEAAiE;IACjE,QAAQ,CAAC,aAAa,CAAC,EAAE,OAAO,CAAC;IACjC,yEAAyE;IACzE,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,kCAAkC;IAClC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,qCAAqC;IACrC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,8CAA8C;IAC9C,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,iCAAiC;IACjC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,iCAAiC;IACjC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,sCAAsC;IACtC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,0CAA0C;IAC1C,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC/C;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,8CAA8C;IAC9C,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,qCAAqC;IACrC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,wCAAwC;IACxC,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,oCAAoC;IACpC,QAAQ,CAAC,UAAU,CAAC,EAAE;QACpB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;QACvB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;KACzB,CAAC;IACF,oCAAoC;IACpC,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,sCAAsC;IACtC,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,oDAAoD;IACpD,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC;IACzB,kBAAkB;IAClB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,wBAAwB;IACxB,QAAQ,CAAC,MAAM,EAAE,SAAS,WAAW,EAAE,CAAC;IACxC,sBAAsB;IACtB,QAAQ,CAAC,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;IACvC,gCAAgC;IAChC,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC7C;AAED;;GAEG;AACH,MAAM,MAAM,SAAS,GAAG,KAAK,GAAG,MAAM,GAAG,KAAK,GAAG,SAAS,GAAG,MAAM,GAAG,QAAQ,GAAG,SAAS,CAAC;AAE3F;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,kBAAkB;IAClB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,kBAAkB;IAClB,QAAQ,CAAC,MAAM,EAAE,SAAS,MAAM,EAAE,CAAC;IACnC,sEAAsE;IACtE,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;CACzC;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,wBAAwB;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,oCAAoC;IACpC,IAAI,EAAE,OAAO,CAAC;IACd,sCAAsC;IACtC,MAAM,EAAE,OAAO,CAAC;IAChB,qBAAqB;IACrB,SAAS,EAAE,aAAa,CAAC;IACzB,kCAAkC;IAClC,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,wCAAwC;IACxC,OAAO,EAAE,MAAM,CAAC;IAChB,qCAAqC;IACrC,OAAO,EAAE,MAAM,CAAC;IAChB,iDAAiD;IACjD,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,OAAO;IACtB,wBAAwB;IACxB,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,8BAA8B;IAC9B,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,4BAA4B;IAC5B,IAAI,EAAE,OAAO,EAAE,CAAC;IAChB,0BAA0B;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,+BAA+B;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,MAAM,aAAa,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,SAAS,CAAC;AAEpE;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,mBAAmB;IACnB,CAAC,EAAE,MAAM,CAAC;IACV,mBAAmB;IACnB,CAAC,EAAE,MAAM,CAAC;IACV,2BAA2B;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,4BAA4B;IAC5B,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,OAAO,GAAG,OAAO,GAAG,OAAO,GAAG,OAAO,GAAG,SAAS,CAAC;AAErF;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,sBAAsB;IACtB,QAAQ,CAAC,IAAI,EAAE,WAAW,CAAC;IAC3B,sBAAsB;IACtB,QAAQ,CAAC,OAAO,EAAE,MAAM,GAAG,SAAS,GAAG,SAAS,GAAG,SAAS,CAAC;IAC7D,8BAA8B;IAC9B,QAAQ,CAAC,QAAQ,CAAC,EAAE,QAAQ,CAAC;IAC7B,wCAAwC;IACxC,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC/C;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,qBAAqB;IACrB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,8BAA8B;IAC9B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,+BAA+B;IAC/B,QAAQ,CAAC,IAAI,CAAC,EAAE,OAAO,CAAC;IACxB,iCAAiC;IACjC,QAAQ,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC;IAC1B,uBAAuB;IACvB,QAAQ,CAAC,QAAQ,CAAC,EAAE,SAAS,QAAQ,EAAE,CAAC;CACzC;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,iDAAiD;IACjD,QAAQ,CAAC,SAAS,EAAE,OAAO,CAAC;IAC5B,iBAAiB;IACjB,QAAQ,CAAC,KAAK,EAAE,SAAS,QAAQ,EAAE,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,4BAA4B;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,6BAA6B;IAC7B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,kCAAkC;IAClC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,sBAAsB;IACtB,QAAQ,CAAC,UAAU,CAAC,EAAE;QACpB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;QACvB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;KACzB,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,iCAAiC;IACjC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,uBAAuB;IACvB,QAAQ,CAAC,MAAM,EAAE,SAAS,SAAS,EAAE,CAAC;IACtC,uBAAuB;IACvB,QAAQ,CAAC,MAAM,EAAE,SAAS,SAAS,EAAE,CAAC;IACtC,wBAAwB;IACxB,QAAQ,CAAC,QAAQ,EAAE,gBAAgB,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,MAAM,GAAG,MAAM,CAAC;AAE3C;;GAEG;AACH,eAAO,MAAM,oBAAoB;;;;;CAKvB,CAAC;AAEX,MAAM,MAAM,iBAAiB,GAAG,OAAO,oBAAoB,CAAC,MAAM,OAAO,oBAAoB,CAAC,CAAC"}
|
@@ -0,0 +1,10 @@
|
|
1
|
+
/**
|
2
|
+
* Supported MIME types
|
3
|
+
*/
|
4
|
+
export const SUPPORTED_MIME_TYPES = {
|
5
|
+
PDF: 'application/pdf',
|
6
|
+
DOCX: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
7
|
+
XLSX: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
8
|
+
PPTX: 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
9
|
+
};
|
10
|
+
//# sourceMappingURL=interfaces.js.map
|
@@ -0,0 +1 @@
|
|
1
|
+
{"version":3,"file":"interfaces.js","sourceRoot":"","sources":["../../src/types/interfaces.ts"],"names":[],"mappings":"AA4OA;;GAEG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAG;IAClC,GAAG,EAAE,iBAAiB;IACtB,IAAI,EAAE,yEAAyE;IAC/E,IAAI,EAAE,mEAAmE;IACzE,IAAI,EAAE,2EAA2E;CACzE,CAAC"}
|
@@ -0,0 +1,44 @@
|
|
1
|
+
import type JSZip from 'jszip';
|
2
|
+
import type { ChartData } from '../types/interfaces.js';
|
3
|
+
import type { ImageExtractor } from './image-extractor.js';
|
4
|
+
interface ExtractedChart {
|
5
|
+
readonly originalPath: string;
|
6
|
+
readonly data: ChartData;
|
7
|
+
readonly basePath: string;
|
8
|
+
}
|
9
|
+
export declare class ChartExtractor {
|
10
|
+
private readonly imageExtractor;
|
11
|
+
private chartCounter;
|
12
|
+
constructor(imageExtractor: ImageExtractor);
|
13
|
+
/**
|
14
|
+
* Extract charts from a ZIP archive (DOCX, XLSX, PPTX)
|
15
|
+
*/
|
16
|
+
extractChartsFromZip(zip: JSZip, basePath?: string): Promise<readonly ExtractedChart[]>;
|
17
|
+
/**
|
18
|
+
* Parse a chart XML file
|
19
|
+
*/
|
20
|
+
private parseChart;
|
21
|
+
private extractTextFromTitle;
|
22
|
+
private extractBarChartData;
|
23
|
+
private extractLineChartData;
|
24
|
+
private extractPieChartData;
|
25
|
+
private extractScatterChartData;
|
26
|
+
private extractGenericChartData;
|
27
|
+
/**
|
28
|
+
* Format chart data as markdown
|
29
|
+
*/
|
30
|
+
formatChartAsMarkdown(chartData: ChartData): string;
|
31
|
+
private formatBarLineChart;
|
32
|
+
private formatPieChart;
|
33
|
+
private formatGenericChart;
|
34
|
+
/**
|
35
|
+
* Reset internal counters
|
36
|
+
*/
|
37
|
+
reset(): void;
|
38
|
+
/**
|
39
|
+
* Get current chart counter
|
40
|
+
*/
|
41
|
+
get currentChartCount(): number;
|
42
|
+
}
|
43
|
+
export {};
|
44
|
+
//# sourceMappingURL=chart-extractor.d.ts.map
|
@@ -0,0 +1 @@
|
|
1
|
+
{"version":3,"file":"chart-extractor.d.ts","sourceRoot":"","sources":["../../src/utils/chart-extractor.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,KAAK,MAAM,OAAO,CAAC;AAE/B,OAAO,KAAK,EAAE,SAAS,EAA0B,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAE3D,UAAU,cAAc;IACtB,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC;IACzB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;CAC3B;AAyDD,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAiB;IAChD,OAAO,CAAC,YAAY,CAAa;gBAErB,cAAc,EAAE,cAAc;IAI1C;;OAEG;IACG,oBAAoB,CAAC,GAAG,EAAE,KAAK,EAAE,QAAQ,GAAE,MAAW,GAAG,OAAO,CAAC,SAAS,cAAc,EAAE,CAAC;IAiCjG;;OAEG;YACW,UAAU;IA8DxB,OAAO,CAAC,oBAAoB;IAoB5B,OAAO,CAAC,mBAAmB;IAI3B,OAAO,CAAC,oBAAoB;IAI5B,OAAO,CAAC,mBAAmB;IAI3B,OAAO,CAAC,uBAAuB;IAI/B,OAAO,CAAC,uBAAuB;IA+C/B;;OAEG;IACH,qBAAqB,CAAC,SAAS,EAAE,SAAS,GAAG,MAAM;IAuBnD,OAAO,CAAC,kBAAkB;IAqC1B,OAAO,CAAC,cAAc;IAqBtB,OAAO,CAAC,kBAAkB;IAgB1B;;OAEG;IACH,KAAK,IAAI,IAAI;IAIb;;OAEG;IACH,IAAI,iBAAiB,IAAI,MAAM,CAE9B;CACF"}
|
@@ -0,0 +1,258 @@
|
|
1
|
+
import { parseStringPromise } from 'xml2js';
|
2
|
+
import { ChartExtractionError } from '../types/errors.js';
|
3
|
+
export class ChartExtractor {
|
4
|
+
imageExtractor;
|
5
|
+
chartCounter = 0;
|
6
|
+
constructor(imageExtractor) {
|
7
|
+
this.imageExtractor = imageExtractor;
|
8
|
+
}
|
9
|
+
/**
|
10
|
+
* Extract charts from a ZIP archive (DOCX, XLSX, PPTX)
|
11
|
+
*/
|
12
|
+
async extractChartsFromZip(zip, basePath = '') {
|
13
|
+
const charts = [];
|
14
|
+
zip.forEach((relativePath, file) => {
|
15
|
+
// Look for chart files
|
16
|
+
if (relativePath.includes('/charts/') && relativePath.endsWith('.xml')) {
|
17
|
+
charts.push({
|
18
|
+
path: relativePath,
|
19
|
+
file: file,
|
20
|
+
basePath: basePath
|
21
|
+
});
|
22
|
+
}
|
23
|
+
});
|
24
|
+
const extractedCharts = [];
|
25
|
+
for (const chart of charts) {
|
26
|
+
try {
|
27
|
+
const chartData = await this.parseChart(chart.file);
|
28
|
+
if (chartData) {
|
29
|
+
extractedCharts.push({
|
30
|
+
originalPath: chart.path,
|
31
|
+
data: chartData,
|
32
|
+
basePath: chart.basePath
|
33
|
+
});
|
34
|
+
}
|
35
|
+
}
|
36
|
+
catch (error) {
|
37
|
+
console.warn(`Failed to extract chart ${chart.path}:`, error instanceof Error ? error.message : 'Unknown error');
|
38
|
+
}
|
39
|
+
}
|
40
|
+
return extractedCharts;
|
41
|
+
}
|
42
|
+
/**
|
43
|
+
* Parse a chart XML file
|
44
|
+
*/
|
45
|
+
async parseChart(chartFile) {
|
46
|
+
try {
|
47
|
+
const xmlContent = await chartFile.async('string');
|
48
|
+
const result = await parseStringPromise(xmlContent);
|
49
|
+
const chartData = {
|
50
|
+
type: 'unknown',
|
51
|
+
title: '',
|
52
|
+
series: [],
|
53
|
+
categories: []
|
54
|
+
};
|
55
|
+
// Extract chart type
|
56
|
+
if (result['c:chartSpace']) {
|
57
|
+
const chart = result['c:chartSpace'][0]['c:chart'][0];
|
58
|
+
// Extract title
|
59
|
+
if (chart['c:title']?.[0]?.['c:tx']) {
|
60
|
+
chartData.title = this.extractTextFromTitle(chart['c:title'][0]['c:tx'][0]);
|
61
|
+
}
|
62
|
+
// Extract plot area
|
63
|
+
if (chart['c:plotArea']) {
|
64
|
+
const plotArea = chart['c:plotArea'][0];
|
65
|
+
// Determine chart type and extract data
|
66
|
+
if (plotArea['c:barChart']) {
|
67
|
+
chartData.type = 'bar';
|
68
|
+
const { series, categories } = this.extractBarChartData(plotArea['c:barChart'][0]);
|
69
|
+
chartData.series = series;
|
70
|
+
chartData.categories = categories;
|
71
|
+
}
|
72
|
+
else if (plotArea['c:lineChart']) {
|
73
|
+
chartData.type = 'line';
|
74
|
+
const { series, categories } = this.extractLineChartData(plotArea['c:lineChart'][0]);
|
75
|
+
chartData.series = series;
|
76
|
+
chartData.categories = categories;
|
77
|
+
}
|
78
|
+
else if (plotArea['c:pieChart']) {
|
79
|
+
chartData.type = 'pie';
|
80
|
+
const { series, categories } = this.extractPieChartData(plotArea['c:pieChart'][0]);
|
81
|
+
chartData.series = series;
|
82
|
+
chartData.categories = categories;
|
83
|
+
}
|
84
|
+
else if (plotArea['c:scatterChart']) {
|
85
|
+
chartData.type = 'scatter';
|
86
|
+
const { series, categories } = this.extractScatterChartData(plotArea['c:scatterChart'][0]);
|
87
|
+
chartData.series = series;
|
88
|
+
chartData.categories = categories;
|
89
|
+
}
|
90
|
+
}
|
91
|
+
}
|
92
|
+
return chartData;
|
93
|
+
}
|
94
|
+
catch (error) {
|
95
|
+
const message = error instanceof Error ? error.message : 'Unknown error';
|
96
|
+
throw new ChartExtractionError(`Failed to parse chart: ${message}`, error);
|
97
|
+
}
|
98
|
+
}
|
99
|
+
extractTextFromTitle(titleData) {
|
100
|
+
// Simplified title extraction - in a real implementation, this would need more robust typing
|
101
|
+
try {
|
102
|
+
const title = titleData;
|
103
|
+
if (title?.['c:rich']?.[0]?.['a:p']) {
|
104
|
+
const paragraphs = title['c:rich'][0]['a:p'];
|
105
|
+
let titleText = '';
|
106
|
+
for (const para of paragraphs) {
|
107
|
+
if (para?.['a:r']?.[0]?.['a:t']?.[0]) {
|
108
|
+
titleText += para['a:r'][0]['a:t'][0] + ' ';
|
109
|
+
}
|
110
|
+
}
|
111
|
+
return titleText.trim();
|
112
|
+
}
|
113
|
+
}
|
114
|
+
catch {
|
115
|
+
// Ignore parsing errors for title
|
116
|
+
}
|
117
|
+
return '';
|
118
|
+
}
|
119
|
+
extractBarChartData(barChart) {
|
120
|
+
return this.extractGenericChartData(barChart);
|
121
|
+
}
|
122
|
+
extractLineChartData(lineChart) {
|
123
|
+
return this.extractGenericChartData(lineChart);
|
124
|
+
}
|
125
|
+
extractPieChartData(pieChart) {
|
126
|
+
return this.extractGenericChartData(pieChart);
|
127
|
+
}
|
128
|
+
extractScatterChartData(scatterChart) {
|
129
|
+
return this.extractGenericChartData(scatterChart);
|
130
|
+
}
|
131
|
+
extractGenericChartData(chartData) {
|
132
|
+
const series = [];
|
133
|
+
let allCategories = [];
|
134
|
+
if (chartData['c:ser']) {
|
135
|
+
for (const seriesData of chartData['c:ser']) {
|
136
|
+
const seriesInfo = {
|
137
|
+
name: '',
|
138
|
+
values: [],
|
139
|
+
categories: undefined
|
140
|
+
};
|
141
|
+
// Extract series name
|
142
|
+
if (seriesData['c:tx']?.[0]?.['c:strRef']?.[0]?.['c:strCache']?.[0]?.['c:pt']?.[0]) {
|
143
|
+
seriesInfo.name = seriesData['c:tx'][0]['c:strRef'][0]['c:strCache'][0]['c:pt'][0]['c:v'][0];
|
144
|
+
}
|
145
|
+
// Extract values
|
146
|
+
if (seriesData['c:val']?.[0]?.['c:numRef']?.[0]?.['c:numCache']?.[0]?.['c:pt']) {
|
147
|
+
for (const pt of seriesData['c:val'][0]['c:numRef'][0]['c:numCache'][0]['c:pt']) {
|
148
|
+
seriesInfo.values.push(parseFloat(pt['c:v'][0]) || 0);
|
149
|
+
}
|
150
|
+
}
|
151
|
+
// Extract categories for this series
|
152
|
+
if (seriesData['c:cat']?.[0]?.['c:strRef']?.[0]?.['c:strCache']?.[0]?.['c:pt']) {
|
153
|
+
const categories = [];
|
154
|
+
for (const pt of seriesData['c:cat'][0]['c:strRef'][0]['c:strCache'][0]['c:pt']) {
|
155
|
+
categories.push(pt['c:v'][0]);
|
156
|
+
}
|
157
|
+
seriesInfo.categories = categories;
|
158
|
+
if (allCategories.length === 0) {
|
159
|
+
allCategories = categories;
|
160
|
+
}
|
161
|
+
}
|
162
|
+
series.push(seriesInfo);
|
163
|
+
}
|
164
|
+
}
|
165
|
+
return { series, categories: allCategories };
|
166
|
+
}
|
167
|
+
/**
|
168
|
+
* Format chart data as markdown
|
169
|
+
*/
|
170
|
+
formatChartAsMarkdown(chartData) {
|
171
|
+
this.chartCounter++;
|
172
|
+
let markdown = `#### Chart ${this.chartCounter}: ${chartData.title || chartData.type.toUpperCase() + ' Chart'}\n\n`;
|
173
|
+
if (chartData.series.length === 0) {
|
174
|
+
return markdown + '*No chart data available*\n\n';
|
175
|
+
}
|
176
|
+
switch (chartData.type) {
|
177
|
+
case 'bar':
|
178
|
+
case 'line':
|
179
|
+
markdown += this.formatBarLineChart(chartData);
|
180
|
+
break;
|
181
|
+
case 'pie':
|
182
|
+
markdown += this.formatPieChart(chartData);
|
183
|
+
break;
|
184
|
+
default:
|
185
|
+
markdown += this.formatGenericChart(chartData);
|
186
|
+
}
|
187
|
+
return markdown + '\n';
|
188
|
+
}
|
189
|
+
formatBarLineChart(chartData) {
|
190
|
+
let markdown = '| Category |';
|
191
|
+
// Add series headers
|
192
|
+
for (const series of chartData.series) {
|
193
|
+
markdown += ` ${series.name || 'Series'} |`;
|
194
|
+
}
|
195
|
+
markdown += '\n';
|
196
|
+
// Add separator
|
197
|
+
markdown += '| --- |';
|
198
|
+
for (let i = 0; i < chartData.series.length; i++) {
|
199
|
+
markdown += ' --- |';
|
200
|
+
}
|
201
|
+
markdown += '\n';
|
202
|
+
// Find maximum number of categories
|
203
|
+
const maxCategories = Math.max(chartData.categories.length, ...chartData.series.map(s => s.categories?.length || 0));
|
204
|
+
// Add data rows
|
205
|
+
for (let i = 0; i < maxCategories; i++) {
|
206
|
+
const category = chartData.categories[i] || chartData.series[0]?.categories?.[i] || `Item ${i + 1}`;
|
207
|
+
markdown += `| ${category} |`;
|
208
|
+
for (const series of chartData.series) {
|
209
|
+
const value = series.values[i] || 0;
|
210
|
+
markdown += ` ${value} |`;
|
211
|
+
}
|
212
|
+
markdown += '\n';
|
213
|
+
}
|
214
|
+
return markdown;
|
215
|
+
}
|
216
|
+
formatPieChart(chartData) {
|
217
|
+
const series = chartData.series[0];
|
218
|
+
if (!series)
|
219
|
+
return '*No pie chart data*\n';
|
220
|
+
let markdown = '| Category | Value | Percentage |\n';
|
221
|
+
markdown += '| --- | --- | --- |\n';
|
222
|
+
const total = series.values.reduce((sum, val) => sum + val, 0);
|
223
|
+
const categories = series.categories || chartData.categories;
|
224
|
+
for (let i = 0; i < Math.min(categories.length, series.values.length); i++) {
|
225
|
+
const category = categories[i];
|
226
|
+
const value = series.values[i] || 0;
|
227
|
+
const percentage = total > 0 ? ((value / total) * 100).toFixed(1) : '0';
|
228
|
+
markdown += `| ${category} | ${value} | ${percentage}% |\n`;
|
229
|
+
}
|
230
|
+
return markdown;
|
231
|
+
}
|
232
|
+
formatGenericChart(chartData) {
|
233
|
+
let markdown = `*${chartData.type.toUpperCase()} chart with ${chartData.series.length} series*\n\n`;
|
234
|
+
for (let i = 0; i < chartData.series.length; i++) {
|
235
|
+
const series = chartData.series[i];
|
236
|
+
markdown += `**Series ${i + 1}: ${series.name}**\n`;
|
237
|
+
markdown += `Values: ${series.values.join(', ')}\n`;
|
238
|
+
if (series.categories && series.categories.length > 0) {
|
239
|
+
markdown += `Categories: ${series.categories.join(', ')}\n`;
|
240
|
+
}
|
241
|
+
markdown += '\n';
|
242
|
+
}
|
243
|
+
return markdown;
|
244
|
+
}
|
245
|
+
/**
|
246
|
+
* Reset internal counters
|
247
|
+
*/
|
248
|
+
reset() {
|
249
|
+
this.chartCounter = 0;
|
250
|
+
}
|
251
|
+
/**
|
252
|
+
* Get current chart counter
|
253
|
+
*/
|
254
|
+
get currentChartCount() {
|
255
|
+
return this.chartCounter;
|
256
|
+
}
|
257
|
+
}
|
258
|
+
//# sourceMappingURL=chart-extractor.js.map
|
@@ -0,0 +1 @@
|
|
1
|
+
{"version":3,"file":"chart-extractor.js","sourceRoot":"","sources":["../../src/utils/chart-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,QAAQ,CAAC;AAI5C,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAgE1D,MAAM,OAAO,cAAc;IACR,cAAc,CAAiB;IACxC,YAAY,GAAW,CAAC,CAAC;IAEjC,YAAY,cAA8B;QACxC,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,oBAAoB,CAAC,GAAU,EAAE,WAAmB,EAAE;QAC1D,MAAM,MAAM,GAAoB,EAAE,CAAC;QAEnC,GAAG,CAAC,OAAO,CAAC,CAAC,YAAY,EAAE,IAAI,EAAE,EAAE;YACjC,uBAAuB;YACvB,IAAI,YAAY,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBACvE,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,YAAY;oBAClB,IAAI,EAAE,IAAI;oBACV,QAAQ,EAAE,QAAQ;iBACnB,CAAC,CAAC;YACL,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,MAAM,eAAe,GAAqB,EAAE,CAAC;QAC7C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,IAAI,CAAC;gBACH,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACpD,IAAI,SAAS,EAAE,CAAC;oBACd,eAAe,CAAC,IAAI,CAAC;wBACnB,YAAY,EAAE,KAAK,CAAC,IAAI;wBACxB,IAAI,EAAE,SAAS;wBACf,QAAQ,EAAE,KAAK,CAAC,QAAQ;qBACzB,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAAC,OAAO,KAAc,EAAE,CAAC;gBACxB,OAAO,CAAC,IAAI,CAAC,2BAA2B,KAAK,CAAC,IAAI,GAAG,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;YACnH,CAAC;QACH,CAAC;QAED,OAAO,eAAe,CAAC;IACzB,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,UAAU,CAAC,SAA4B;QACnD,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,MAAM,SAAS,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YACnD,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,UAAU,CAAmB,CAAC;YAEtE,MAAM,SAAS,GAKX;gBACF,IAAI,EAAE,SAAS;gBACf,KAAK,EAAE,EAAE;gBACT,MAAM,EAAE,EAAE;gBACV,UAAU,EAAE,EAAE;aACf,CAAC;YAEF,qBAAqB;YACrB,IAAI,MAAM,CAAC,cAAc,CAAC,EAAE,CAAC;gBAC3B,MAAM,KAAK,GAAG,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;gBAEtD,gBAAgB;gBAChB,IAAI,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC;oBACpC,SAAS,CAAC,KAAK,GAAG,IAAI,CAAC,oBAAoB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC9E,CAAC;gBAED,oBAAoB;gBACpB,IAAI,KAAK,CAAC,YAAY,CAAC,EAAE,CAAC;oBACxB,MAAM,QAAQ,GAAG,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC;oBAExC,wCAAwC;oBACxC,IAAI,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;wBAC3B,SAAS,CAAC,IAAI,GAAG,KAAK,CAAC;wBACvB,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,CAAC,CAAoB,CAAC,CAAC;wBACtG,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC;wBAC1B,SAAS,CAAC,UAAU,GAAG,UAAU,CAAC;oBACpC,CAAC;yBAAM,IAAI,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;wBACnC,SAAS,CAAC,IAAI,GAAG,MAAM,CAAC;wBACxB,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC,oBAAoB,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,CAAoB,CAAC,CAAC;wBACxG,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC;wBAC1B,SAAS,CAAC,UAAU,GAAG,UAAU,CAAC;oBACpC,CAAC;yBAAM,IAAI,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;wBAClC,SAAS,CAAC,IAAI,GAAG,KAAK,CAAC;wBACvB,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,CAAC,CAAoB,CAAC,CAAC;wBACtG,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC;wBAC1B,SAAS,CAAC,UAAU,GAAG,UAAU,CAAC;oBACpC,CAAC;yBAAM,IAAI,QAAQ,CAAC,gBAAgB,CAAC,EAAE,CAAC;wBACtC,SAAS,CAAC,IAAI,GAAG,SAAS,CAAC;wBAC3B,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC,uBAAuB,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAoB,CAAC,CAAC;wBAC9G,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC;wBAC1B,SAAS,CAAC,UAAU,GAAG,UAAU,CAAC;oBACpC,CAAC;gBACH,CAAC;YACH,CAAC;YAED,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,OAAO,KAAc,EAAE,CAAC;YACxB,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;YACzE,MAAM,IAAI,oBAAoB,CAAC,0BAA0B,OAAO,EAAE,EAAE,KAAc,CAAC,CAAC;QACtF,CAAC;IACH,CAAC;IAEO,oBAAoB,CAAC,SAAkB;QAC7C,6FAA6F;QAC7F,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,SAAgB,CAAC;YAC/B,IAAI,KAAK,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC;gBACpC,MAAM,UAAU,GAAG,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;gBAC7C,IAAI,SAAS,GAAG,EAAE,CAAC;gBACnB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;oBAC9B,IAAI,IAAI,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBACrC,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC;oBAC9C,CAAC;gBACH,CAAC;gBACD,OAAO,SAAS,CAAC,IAAI,EAAE,CAAC;YAC1B,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,kCAAkC;QACpC,CAAC;QACD,OAAO,EAAE,CAAC;IACZ,CAAC;IAEO,mBAAmB,CAAC,QAAyB;QACnD,OAAO,IAAI,CAAC,uBAAuB,CAAC,QAAQ,CAAC,CAAC;IAChD,CAAC;IAEO,oBAAoB,CAAC,SAA0B;QACrD,OAAO,IAAI,CAAC,uBAAuB,CAAC,SAAS,CAAC,CAAC;IACjD,CAAC;IAEO,mBAAmB,CAAC,QAAyB;QACnD,OAAO,IAAI,CAAC,uBAAuB,CAAC,QAAQ,CAAC,CAAC;IAChD,CAAC;IAEO,uBAAuB,CAAC,YAA6B;QAC3D,OAAO,IAAI,CAAC,uBAAuB,CAAC,YAAY,CAAC,CAAC;IACpD,CAAC;IAEO,uBAAuB,CAAC,SAA0B;QACxD,MAAM,MAAM,GAAkB,EAAE,CAAC;QACjC,IAAI,aAAa,GAAa,EAAE,CAAC;QAEjC,IAAI,SAAS,CAAC,OAAO,CAAC,EAAE,CAAC;YACvB,KAAK,MAAM,UAAU,IAAI,SAAS,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC5C,MAAM,UAAU,GAIZ;oBACF,IAAI,EAAE,EAAE;oBACR,MAAM,EAAE,EAAE;oBACV,UAAU,EAAE,SAAS;iBACtB,CAAC;gBAEF,sBAAsB;gBACtB,IAAI,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnF,UAAU,CAAC,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC/F,CAAC;gBAED,iBAAiB;gBACjB,IAAI,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC;oBAC/E,KAAK,MAAM,EAAE,IAAI,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;wBAChF,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;oBACxD,CAAC;gBACH,CAAC;gBAED,qCAAqC;gBACrC,IAAI,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC;oBAC/E,MAAM,UAAU,GAAa,EAAE,CAAC;oBAChC,KAAK,MAAM,EAAE,IAAI,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;wBAChF,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;oBAChC,CAAC;oBACD,UAAU,CAAC,UAAU,GAAG,UAAU,CAAC;oBACnC,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;wBAC/B,aAAa,GAAG,UAAU,CAAC;oBAC7B,CAAC;gBACH,CAAC;gBAED,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QAED,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,aAAa,EAAE,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,qBAAqB,CAAC,SAAoB;QACxC,IAAI,CAAC,YAAY,EAAE,CAAC;QACpB,IAAI,QAAQ,GAAG,cAAc,IAAI,CAAC,YAAY,KAAK,SAAS,CAAC,KAAK,IAAI,SAAS,CAAC,IAAI,CAAC,WAAW,EAAE,GAAG,QAAQ,MAAM,CAAC;QAEpH,IAAI,SAAS,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClC,OAAO,QAAQ,GAAG,+BAA+B,CAAC;QACpD,CAAC;QAED,QAAQ,SAAS,CAAC,IAAI,EAAE,CAAC;YACvB,KAAK,KAAK,CAAC;YACX,KAAK,MAAM;gBACT,QAAQ,IAAI,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,CAAC;gBAC/C,MAAM;YACR,KAAK,KAAK;gBACR,QAAQ,IAAI,IAAI,CAAC,cAAc,CAAC,SAAS,CAAC,CAAC;gBAC3C,MAAM;YACR;gBACE,QAAQ,IAAI,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,CAAC;QACnD,CAAC;QAED,OAAO,QAAQ,GAAG,IAAI,CAAC;IACzB,CAAC;IAEO,kBAAkB,CAAC,SAAoB;QAC7C,IAAI,QAAQ,GAAG,cAAc,CAAC;QAE9B,qBAAqB;QACrB,KAAK,MAAM,MAAM,IAAI,SAAS,CAAC,MAAM,EAAE,CAAC;YACtC,QAAQ,IAAI,IAAI,MAAM,CAAC,IAAI,IAAI,QAAQ,IAAI,CAAC;QAC9C,CAAC;QACD,QAAQ,IAAI,IAAI,CAAC;QAEjB,gBAAgB;QAChB,QAAQ,IAAI,SAAS,CAAC;QACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjD,QAAQ,IAAI,QAAQ,CAAC;QACvB,CAAC;QACD,QAAQ,IAAI,IAAI,CAAC;QAEjB,oCAAoC;QACpC,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAC5B,SAAS,CAAC,UAAU,CAAC,MAAM,EAC3B,GAAG,SAAS,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC,CAAC,CACxD,CAAC;QAEF,gBAAgB;QAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,QAAQ,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACpG,QAAQ,IAAI,KAAK,QAAQ,IAAI,CAAC;YAE9B,KAAK,MAAM,MAAM,IAAI,SAAS,CAAC,MAAM,EAAE,CAAC;gBACtC,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBACpC,QAAQ,IAAI,IAAI,KAAK,IAAI,CAAC;YAC5B,CAAC;YACD,QAAQ,IAAI,IAAI,CAAC;QACnB,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAEO,cAAc,CAAC,SAAoB;QACzC,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QACnC,IAAI,CAAC,MAAM;YAAE,OAAO,uBAAuB,CAAC;QAE5C,IAAI,QAAQ,GAAG,qCAAqC,CAAC;QACrD,QAAQ,IAAI,uBAAuB,CAAC;QAEpC,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC;QAC/D,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,IAAI,SAAS,CAAC,UAAU,CAAC;QAE7D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3E,MAAM,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YACpC,MAAM,UAAU,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;YAExE,QAAQ,IAAI,KAAK,QAAQ,MAAM,KAAK,MAAM,UAAU,OAAO,CAAC;QAC9D,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAEO,kBAAkB,CAAC,SAAoB;QAC7C,IAAI,QAAQ,GAAG,IAAI,SAAS,CAAC,IAAI,CAAC,WAAW,EAAE,eAAe,SAAS,CAAC,MAAM,CAAC,MAAM,cAAc,CAAC;QAEpG,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjD,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;YACnC,QAAQ,IAAI,YAAY,CAAC,GAAG,CAAC,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC;YACpD,QAAQ,IAAI,WAAW,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;YACpD,IAAI,MAAM,CAAC,UAAU,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtD,QAAQ,IAAI,eAAe,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;YAC9D,CAAC;YACD,QAAQ,IAAI,IAAI,CAAC;QACnB,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,IAAI,iBAAiB;QACnB,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;CACF"}
|
@@ -0,0 +1,50 @@
|
|
1
|
+
import type JSZip from 'jszip';
|
2
|
+
import type { Buffer } from 'node:buffer';
|
3
|
+
import type { ImageData } from '../types/interfaces.js';
|
4
|
+
export declare class ImageExtractor {
|
5
|
+
private readonly outputDir;
|
6
|
+
private imageCounter;
|
7
|
+
private readonly extractedImages;
|
8
|
+
constructor(outputDir?: string);
|
9
|
+
/**
|
10
|
+
* Extract images from a ZIP archive (DOCX, XLSX, PPTX)
|
11
|
+
*/
|
12
|
+
extractImagesFromZip(zip: JSZip, basePath?: string): Promise<readonly ImageData[]>;
|
13
|
+
/**
|
14
|
+
* Save an image buffer to disk
|
15
|
+
*/
|
16
|
+
saveImage(buffer: Buffer, originalPath: string, basePath?: string): Promise<string | null>;
|
17
|
+
/**
|
18
|
+
* Check if a file path represents an image
|
19
|
+
*/
|
20
|
+
isImageFile(filePath: string): boolean;
|
21
|
+
/**
|
22
|
+
* Get image format from file extension
|
23
|
+
*/
|
24
|
+
private getImageFormat;
|
25
|
+
/**
|
26
|
+
* Get markdown reference for an image by its original path
|
27
|
+
*/
|
28
|
+
getImageReference(originalPath: string, basePath?: string): string | null;
|
29
|
+
/**
|
30
|
+
* Create markdown image reference
|
31
|
+
*/
|
32
|
+
getImageMarkdown(description?: string, imagePath?: string): string;
|
33
|
+
/**
|
34
|
+
* Reset the image counter and clear extracted images map
|
35
|
+
*/
|
36
|
+
reset(): void;
|
37
|
+
/**
|
38
|
+
* Get the output directory for images
|
39
|
+
*/
|
40
|
+
get imageDirectory(): string;
|
41
|
+
/**
|
42
|
+
* Get the current image counter
|
43
|
+
*/
|
44
|
+
get currentImageCount(): number;
|
45
|
+
/**
|
46
|
+
* Get all extracted image mappings
|
47
|
+
*/
|
48
|
+
get extractedImageMappings(): ReadonlyMap<string, string>;
|
49
|
+
}
|
50
|
+
//# sourceMappingURL=image-extractor.d.ts.map
|
@@ -0,0 +1 @@
|
|
1
|
+
{"version":3,"file":"image-extractor.d.ts","sourceRoot":"","sources":["../../src/utils/image-extractor.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,KAAK,MAAM,OAAO,CAAC;AAC/B,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAE1C,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAC;AAGxD,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,YAAY,CAAa;IACjC,OAAO,CAAC,QAAQ,CAAC,eAAe,CAA6B;gBAEjD,SAAS,GAAE,MAAiB;IASxC;;OAEG;IACG,oBAAoB,CAAC,GAAG,EAAE,KAAK,EAAE,QAAQ,GAAE,MAAW,GAAG,OAAO,CAAC,SAAS,SAAS,EAAE,CAAC;IAwC5F;;OAEG;IACG,SAAS,CAAC,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,EAAE,QAAQ,GAAE,MAAW,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;IAoBpG;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO;IAUtC;;OAEG;IACH,OAAO,CAAC,cAAc;IAKtB;;OAEG;IACH,iBAAiB,CAAC,YAAY,EAAE,MAAM,EAAE,QAAQ,GAAE,MAAW,GAAG,MAAM,GAAG,IAAI;IAS7E;;OAEG;IACH,gBAAgB,CAAC,WAAW,GAAE,MAAgB,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,MAAM;IAO3E;;OAEG;IACH,KAAK,IAAI,IAAI;IAKb;;OAEG;IACH,IAAI,cAAc,IAAI,MAAM,CAE3B;IAED;;OAEG;IACH,IAAI,iBAAiB,IAAI,MAAM,CAE9B;IAED;;OAEG;IACH,IAAI,sBAAsB,IAAI,WAAW,CAAC,MAAM,EAAE,MAAM,CAAC,CAExD;CACF"}
|