@minded-ai/mindedjs 3.0.8-beta.12 → 3.1.9-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/dist/cli/index.js +2 -9
  2. package/dist/cli/index.js.map +1 -1
  3. package/dist/cli/runCommand.d.ts +1 -1
  4. package/dist/cli/runCommand.d.ts.map +1 -1
  5. package/dist/cli/runCommand.js +31 -23
  6. package/dist/cli/runCommand.js.map +1 -1
  7. package/dist/index.d.ts +2 -1
  8. package/dist/index.d.ts.map +1 -1
  9. package/dist/index.js +6 -3
  10. package/dist/index.js.map +1 -1
  11. package/dist/internalTools/documentExtraction/documentExtraction.d.ts +112 -102
  12. package/dist/internalTools/documentExtraction/documentExtraction.d.ts.map +1 -1
  13. package/dist/internalTools/documentExtraction/documentExtraction.js +146 -705
  14. package/dist/internalTools/documentExtraction/documentExtraction.js.map +1 -1
  15. package/dist/internalTools/documentExtraction/extractStructuredData.d.ts +57 -0
  16. package/dist/internalTools/documentExtraction/extractStructuredData.d.ts.map +1 -0
  17. package/dist/internalTools/documentExtraction/extractStructuredData.js +121 -0
  18. package/dist/internalTools/documentExtraction/extractStructuredData.js.map +1 -0
  19. package/dist/internalTools/documentExtraction/parseDocumentLocal.d.ts +16 -0
  20. package/dist/internalTools/documentExtraction/parseDocumentLocal.d.ts.map +1 -0
  21. package/dist/internalTools/documentExtraction/parseDocumentLocal.js +547 -0
  22. package/dist/internalTools/documentExtraction/parseDocumentLocal.js.map +1 -0
  23. package/dist/internalTools/documentExtraction/parseDocumentManaged.d.ts +13 -0
  24. package/dist/internalTools/documentExtraction/parseDocumentManaged.d.ts.map +1 -0
  25. package/dist/internalTools/documentExtraction/parseDocumentManaged.js +150 -0
  26. package/dist/internalTools/documentExtraction/parseDocumentManaged.js.map +1 -0
  27. package/dist/nodes/addAppToolNode.d.ts.map +1 -1
  28. package/dist/nodes/addAppToolNode.js +20 -1
  29. package/dist/nodes/addAppToolNode.js.map +1 -1
  30. package/dist/toolsLibrary/classifier.d.ts +2 -2
  31. package/dist/toolsLibrary/parseDocument.d.ts +11 -10
  32. package/dist/toolsLibrary/parseDocument.d.ts.map +1 -1
  33. package/dist/toolsLibrary/parseDocument.js +33 -189
  34. package/dist/toolsLibrary/parseDocument.js.map +1 -1
  35. package/dist/toolsLibrary/withBrowserSession.d.ts.map +1 -1
  36. package/dist/toolsLibrary/withBrowserSession.js +70 -2
  37. package/dist/toolsLibrary/withBrowserSession.js.map +1 -1
  38. package/dist/types/Flows.types.d.ts +1 -0
  39. package/dist/types/Flows.types.d.ts.map +1 -1
  40. package/dist/types/Flows.types.js.map +1 -1
  41. package/dist/utils/schemaUtils.js +1 -1
  42. package/dist/utils/schemaUtils.js.map +1 -1
  43. package/docs/tooling/document-processing.md +235 -174
  44. package/package.json +2 -1
  45. package/src/cli/index.ts +2 -10
  46. package/src/cli/runCommand.ts +31 -25
  47. package/src/index.ts +2 -1
  48. package/src/internalTools/documentExtraction/documentExtraction.ts +184 -767
  49. package/src/internalTools/documentExtraction/extractStructuredData.ts +140 -0
  50. package/src/internalTools/documentExtraction/parseDocumentLocal.ts +660 -0
  51. package/src/internalTools/documentExtraction/parseDocumentManaged.ts +152 -0
  52. package/src/nodes/addAppToolNode.ts +30 -7
  53. package/src/toolsLibrary/parseDocument.ts +38 -206
  54. package/src/toolsLibrary/withBrowserSession.ts +89 -4
  55. package/src/types/Flows.types.ts +1 -0
  56. package/src/utils/schemaUtils.ts +1 -1
@@ -0,0 +1,547 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.parseDocumentWithLocalService = parseDocumentWithLocalService;
7
+ const os_1 = __importDefault(require("os"));
8
+ const path_1 = __importDefault(require("path"));
9
+ const promises_1 = __importDefault(require("fs/promises"));
10
+ const logger_1 = require("../../utils/logger");
11
+ async function parseDocumentWithLocalService({ documentSource, isDocumentUrl, sessionId, llamaCloudApiKey, useBase64, }) {
12
+ logger_1.logger.info({
13
+ msg: '[DocumentParser] Parsing document locally',
14
+ sessionId,
15
+ documentSource,
16
+ sourceType: isDocumentUrl ? 'url' : 'path',
17
+ });
18
+ const startTime = Date.now();
19
+ try {
20
+ // Determine document source and content
21
+ const { content, fileType, fileSize } = await getDocumentContent({ documentSource, isDocumentUrl, sessionId });
22
+ // Process document content based on type
23
+ let processedContent;
24
+ if (isImageFile(fileType)) {
25
+ processedContent = await processImageDocument({
26
+ content,
27
+ llamaCloudApiKey,
28
+ fileType,
29
+ filePath: isDocumentUrl ? undefined : documentSource,
30
+ useBase64,
31
+ sessionId,
32
+ });
33
+ }
34
+ else {
35
+ processedContent = await processTextDocument({
36
+ content,
37
+ llamaCloudApiKey,
38
+ fileType,
39
+ filePath: isDocumentUrl ? undefined : documentSource,
40
+ sessionId,
41
+ });
42
+ }
43
+ logger_1.logger.info({
44
+ msg: '[DocumentParser] Document content processed',
45
+ sessionId,
46
+ fileType,
47
+ contentLength: processedContent.length,
48
+ });
49
+ const processingTime = Date.now() - startTime;
50
+ return {
51
+ rawContent: processedContent,
52
+ metadata: {
53
+ fileSize,
54
+ fileType,
55
+ processingTime,
56
+ contentLength: processedContent.length,
57
+ },
58
+ };
59
+ }
60
+ catch (err) {
61
+ logger_1.logger.error({
62
+ message: '[DocumentParser] Document processing failed',
63
+ sessionId,
64
+ err,
65
+ });
66
+ throw new Error(`Document processing failed: ${err instanceof Error ? err.message : String(err)}`);
67
+ }
68
+ }
69
+ async function getDocumentContent({ documentSource, isDocumentUrl, sessionId, }) {
70
+ // Load the document from URL
71
+ if (isDocumentUrl) {
72
+ return fetchDocumentFromUrl({ documentSource, sessionId });
73
+ }
74
+ return loadDocumentFromFile({ documentSource, sessionId });
75
+ }
76
+ async function fetchDocumentFromUrl({ documentSource, sessionId }) {
77
+ logger_1.logger.debug({
78
+ msg: '[DocumentParser] Fetching document from URL',
79
+ sessionId,
80
+ documentSource,
81
+ });
82
+ const response = await fetch(documentSource);
83
+ logger_1.logger.debug({
84
+ msg: '[DocumentParser] Document fetched from URL',
85
+ sessionId,
86
+ documentSource,
87
+ status: response.status,
88
+ ok: response.ok,
89
+ });
90
+ if (!response.ok) {
91
+ throw new Error(`Failed to fetch document from URL: ${response.statusText}`);
92
+ }
93
+ const arrayBuffer = await response.arrayBuffer();
94
+ const content = Buffer.from(arrayBuffer);
95
+ const fileType = inferFileTypeFromUrl(documentSource) || inferFileTypeFromBuffer(content);
96
+ logger_1.logger.debug({
97
+ msg: '[DocumentParser] Successfully fetched document from URL',
98
+ sessionId,
99
+ documentSource,
100
+ contentSize: content.length,
101
+ fileType,
102
+ });
103
+ return {
104
+ content,
105
+ fileType,
106
+ fileSize: content.length,
107
+ };
108
+ }
109
+ async function loadDocumentFromFile({ documentSource, sessionId }) {
110
+ logger_1.logger.debug({
111
+ msg: '[DocumentParser] Loading document from file',
112
+ sessionId,
113
+ documentSource,
114
+ });
115
+ // Check that the document file exists
116
+ try {
117
+ await promises_1.default.access(documentSource);
118
+ }
119
+ catch (_a) {
120
+ throw new Error(`Document not found: ${documentSource}`);
121
+ }
122
+ const content = await promises_1.default.readFile(documentSource);
123
+ const fileType = path_1.default.extname(documentSource).toLowerCase();
124
+ logger_1.logger.debug({
125
+ msg: '[DocumentParser] Document loaded from file',
126
+ sessionId,
127
+ documentSource,
128
+ contentSize: content.length,
129
+ fileType,
130
+ });
131
+ return {
132
+ content,
133
+ fileType,
134
+ fileSize: content.length,
135
+ };
136
+ }
137
+ /**
138
+ * Process image documents by converting them to a standardized format
139
+ */
140
+ async function processImageDocument({ content, llamaCloudApiKey, filePath, fileType, useBase64, sessionId, }) {
141
+ try {
142
+ // First, try to use LlamaParser if available for text extraction
143
+ if (filePath && llamaCloudApiKey) {
144
+ logger_1.logger.debug({
145
+ msg: '[DocumentParser] Calling parseWithLlamaCloud for image',
146
+ sessionId,
147
+ filePath,
148
+ });
149
+ const parsedContent = await parseWithLlamaCloud({ filePath, llamaCloudApiKey, sessionId });
150
+ logger_1.logger.debug({
151
+ msg: '[DocumentParser] parseWithLlamaCloud returned for image',
152
+ sessionId,
153
+ hasContent: !!parsedContent,
154
+ contentLength: parsedContent === null || parsedContent === void 0 ? void 0 : parsedContent.length,
155
+ });
156
+ if (parsedContent) {
157
+ return parsedContent;
158
+ }
159
+ }
160
+ // If no file path, create a temporary file for LlamaCloud parsing
161
+ if (!filePath && llamaCloudApiKey) {
162
+ const tempDir = os_1.default.tmpdir();
163
+ const tempFileName = `temp_${Date.now()}${fileType}`;
164
+ const tempFilePath = path_1.default.join(tempDir, tempFileName);
165
+ logger_1.logger.debug({
166
+ msg: '[DocumentParser] Creating temp file for image',
167
+ sessionId,
168
+ tempFilePath,
169
+ contentSize: content.length,
170
+ });
171
+ try {
172
+ await promises_1.default.writeFile(tempFilePath, content);
173
+ logger_1.logger.debug({
174
+ msg: '[DocumentParser] Calling parseWithLlamaCloud for temp image',
175
+ sessionId,
176
+ tempFilePath,
177
+ });
178
+ const parsedContent = await parseWithLlamaCloud({ filePath: tempFilePath, llamaCloudApiKey, sessionId });
179
+ logger_1.logger.debug({
180
+ msg: '[DocumentParser] parseWithLlamaCloud returned for temp image',
181
+ sessionId,
182
+ hasContent: !!parsedContent,
183
+ contentLength: parsedContent === null || parsedContent === void 0 ? void 0 : parsedContent.length,
184
+ });
185
+ await promises_1.default.unlink(tempFilePath);
186
+ if (parsedContent) {
187
+ return parsedContent;
188
+ }
189
+ }
190
+ catch (err) {
191
+ // Clean up temp file on error
192
+ try {
193
+ await promises_1.default.access(tempFilePath);
194
+ await promises_1.default.unlink(tempFilePath);
195
+ }
196
+ catch (_a) {
197
+ // pass
198
+ }
199
+ logger_1.logger.warn({
200
+ msg: '[DocumentParser] Failed to parse image with LlamaCloud',
201
+ sessionId,
202
+ err,
203
+ });
204
+ }
205
+ }
206
+ logger_1.logger.warn({
207
+ msg: '[DocumentParser] Sharp module not available. Using original image without optimization.',
208
+ sessionId,
209
+ fileType,
210
+ contentSize: content.length,
211
+ });
212
+ // If sharp is not available, use the original image
213
+ if (useBase64) {
214
+ // Return original image as base64
215
+ const base64Image = content.toString('base64');
216
+ const mimeType = getMimeType(fileType);
217
+ return `data:${mimeType};base64,${base64Image}`;
218
+ }
219
+ else {
220
+ // Without sharp and without base64, we cannot process the image
221
+ return `[IMAGE CONTENT - ${fileType.toUpperCase()} file. Size: ${content.length} bytes. Consider using LLAMA_CLOUD_API_KEY for text extraction or set useBase64: true]`;
222
+ }
223
+ }
224
+ catch (err) {
225
+ throw new Error(`Failed to process image document: ${err instanceof Error ? err.message : String(err)}`);
226
+ }
227
+ }
228
+ /**
229
+ * Process text-based documents using LlamaParser or fallback methods
230
+ */
231
+ async function processTextDocument({ content, llamaCloudApiKey, filePath, fileType, sessionId, }) {
232
+ // Try LlamaCloud parsing if we have a file path
233
+ if (filePath && llamaCloudApiKey) {
234
+ const parsedContent = await parseWithLlamaCloud({ filePath, llamaCloudApiKey, sessionId });
235
+ if (parsedContent) {
236
+ return parsedContent;
237
+ }
238
+ }
239
+ // If no file path but we have content and LlamaCloud API key, create a temp file
240
+ if (!filePath && llamaCloudApiKey && Buffer.isBuffer(content)) {
241
+ const tempDir = os_1.default.tmpdir();
242
+ const tempFileName = `temp_${Date.now()}${fileType || '.txt'}`;
243
+ const tempFilePath = path_1.default.join(tempDir, tempFileName);
244
+ try {
245
+ await promises_1.default.writeFile(tempFilePath, content);
246
+ const parsedContent = await parseWithLlamaCloud({ filePath: tempFilePath, llamaCloudApiKey, sessionId });
247
+ await promises_1.default.unlink(tempFilePath);
248
+ if (parsedContent) {
249
+ return parsedContent;
250
+ }
251
+ }
252
+ catch (err) {
253
+ // Clean up temp file on error
254
+ try {
255
+ await promises_1.default.access(tempFilePath);
256
+ await promises_1.default.unlink(tempFilePath);
257
+ }
258
+ catch (_a) {
259
+ // pass
260
+ }
261
+ logger_1.logger.warn({
262
+ msg: '[DocumentParser] Failed to parse text document with LlamaCloud',
263
+ sessionId,
264
+ err,
265
+ });
266
+ }
267
+ }
268
+ // Fallback: handle based on a file type
269
+ if (typeof content === 'string') {
270
+ return content;
271
+ }
272
+ // Basic text extraction for simple formats
273
+ if (['.txt', '.md', '.html', '.htm', '.xml', '.csv'].includes(fileType || '')) {
274
+ return content.toString('utf-8');
275
+ }
276
+ // For unsupported binary formats without LlamaParser
277
+ throw new Error(`Unsupported document type ${fileType}. Please provide LLAMA_CLOUD_API_KEY for advanced document processing.`);
278
+ }
279
+ /**
280
+ * Parse document using LlamaCloud REST API
281
+ */
282
+ async function parseWithLlamaCloud({ filePath, llamaCloudApiKey, sessionId, }) {
283
+ try {
284
+ // Step 1: Upload file and start parsing
285
+ const fileContent = await promises_1.default.readFile(filePath);
286
+ const fileName = path_1.default.basename(filePath);
287
+ const mimeType = getMimeType(path_1.default.extname(filePath));
288
+ const formData = new FormData();
289
+ const blob = new Blob([new Uint8Array(fileContent)], { type: mimeType });
290
+ formData.append('file', blob, fileName);
291
+ formData.append('premium_mode', 'true');
292
+ const uploadResponse = await fetch('https://api.cloud.llamaindex.ai/api/v1/parsing/upload', {
293
+ method: 'POST',
294
+ headers: {
295
+ Accept: 'application/json',
296
+ Authorization: `Bearer ${llamaCloudApiKey}`,
297
+ },
298
+ body: formData,
299
+ });
300
+ if (!uploadResponse.ok) {
301
+ const errorText = await uploadResponse.text();
302
+ throw new Error(`Failed to upload file: ${uploadResponse.status} - ${errorText}`);
303
+ }
304
+ const uploadResult = await uploadResponse.json();
305
+ const jobId = uploadResult.id || uploadResult.job_id;
306
+ if (!jobId) {
307
+ throw new Error('No job ID returned from upload');
308
+ }
309
+ logger_1.logger.info({
310
+ msg: '[DocumentParser] File uploaded to LlamaCloud',
311
+ sessionId,
312
+ jobId,
313
+ fileName,
314
+ });
315
+ // Step 2: Poll for job completion
316
+ let attempts = 0;
317
+ const maxAttempts = 60; // 60 attempts with 2 second delay = 2 minutes max
318
+ const pollDelay = 2000; // 2 seconds
319
+ while (attempts < maxAttempts) {
320
+ const statusResponse = await fetch(`https://api.cloud.llamaindex.ai/api/v1/parsing/job/${jobId}`, {
321
+ method: 'GET',
322
+ headers: {
323
+ Accept: 'application/json',
324
+ Authorization: `Bearer ${llamaCloudApiKey}`,
325
+ },
326
+ });
327
+ if (!statusResponse.ok) {
328
+ throw new Error(`Failed to check job status: ${statusResponse.status}`);
329
+ }
330
+ const statusResult = await statusResponse.json();
331
+ const status = statusResult.status || statusResult.job_status;
332
+ if (status === 'SUCCESS' || status === 'COMPLETED' || status === 'completed') {
333
+ // Step 3: Retrieve results in Markdown
334
+ // Create an AbortController for timeout
335
+ const controller = new AbortController();
336
+ const timeout = setTimeout(() => controller.abort(), 20000); // 20 second timeout
337
+ let resultResponse;
338
+ try {
339
+ resultResponse = await fetch(`https://api.cloud.llamaindex.ai/api/v1/parsing/job/${jobId}/result/markdown`, {
340
+ method: 'GET',
341
+ headers: {
342
+ Accept: 'application/json',
343
+ Authorization: `Bearer ${llamaCloudApiKey}`,
344
+ },
345
+ signal: controller.signal,
346
+ });
347
+ }
348
+ catch (fetchError) {
349
+ clearTimeout(timeout);
350
+ if (fetchError instanceof Error && fetchError.name === 'AbortError') {
351
+ throw new Error('Timeout fetching results from LlamaCloud after 20 seconds');
352
+ }
353
+ throw fetchError;
354
+ }
355
+ clearTimeout(timeout);
356
+ if (!resultResponse.ok) {
357
+ const errorText = await resultResponse.text();
358
+ throw new Error(`Failed to retrieve results: ${resultResponse.status} - ${errorText}`);
359
+ }
360
+ let resultData;
361
+ try {
362
+ // Read response using manual stream reading (more reliable than text())
363
+ let responseText;
364
+ if (resultResponse.body) {
365
+ const reader = resultResponse.body.getReader();
366
+ const chunks = [];
367
+ let totalLength = 0;
368
+ try {
369
+ while (true) {
370
+ const { done, value } = await reader.read();
371
+ if (done)
372
+ break;
373
+ if (value) {
374
+ chunks.push(value);
375
+ totalLength += value.length;
376
+ }
377
+ }
378
+ // Combine chunks
379
+ const combined = new Uint8Array(totalLength);
380
+ let offset = 0;
381
+ for (const chunk of chunks) {
382
+ combined.set(chunk, offset);
383
+ offset += chunk.length;
384
+ }
385
+ responseText = new TextDecoder().decode(combined);
386
+ }
387
+ finally {
388
+ reader.releaseLock();
389
+ }
390
+ }
391
+ else {
392
+ responseText = await resultResponse.text();
393
+ }
394
+ // Try to parse as JSON, but if it fails, use the text directly
395
+ try {
396
+ resultData = JSON.parse(responseText);
397
+ }
398
+ catch (_a) {
399
+ // If it's not JSON, assume it's the markdown content directly
400
+ resultData = responseText;
401
+ }
402
+ }
403
+ catch (textError) {
404
+ logger_1.logger.error({
405
+ msg: '[DocumentParser] Failed to read response text',
406
+ sessionId,
407
+ jobId,
408
+ error: textError instanceof Error ? textError.message : String(textError),
409
+ stack: textError instanceof Error ? textError.stack : undefined,
410
+ });
411
+ throw new Error('Failed to read response from LlamaCloud');
412
+ }
413
+ logger_1.logger.debug({
414
+ msg: '[DocumentParser] Result data structure',
415
+ sessionId,
416
+ jobId,
417
+ dataType: typeof resultData,
418
+ keys: typeof resultData === 'object' && resultData !== null ? Object.keys(resultData) : [],
419
+ hasMarkdown: typeof resultData === 'object' && 'markdown' in resultData,
420
+ hasContent: typeof resultData === 'object' && 'content' in resultData,
421
+ hasText: typeof resultData === 'object' && 'text' in resultData,
422
+ });
423
+ // The API might return the markdown directly as a string or nested in an object
424
+ let markdownContent;
425
+ if (typeof resultData === 'string') {
426
+ markdownContent = resultData;
427
+ }
428
+ else {
429
+ markdownContent = resultData.markdown || resultData.content || resultData.text || '';
430
+ }
431
+ if (!markdownContent) {
432
+ logger_1.logger.error({
433
+ msg: '[DocumentParser] No content in result',
434
+ sessionId,
435
+ jobId,
436
+ resultData: JSON.stringify(resultData).substring(0, 500),
437
+ });
438
+ throw new Error('No content returned from parsing');
439
+ }
440
+ logger_1.logger.info({
441
+ msg: '[DocumentParser] Successfully parsed document with LlamaCloud',
442
+ sessionId,
443
+ jobId,
444
+ contentLength: markdownContent.length,
445
+ preview: markdownContent.substring(0, 100),
446
+ });
447
+ logger_1.logger.debug({
448
+ msg: '[DocumentParser] About to return markdown content',
449
+ sessionId,
450
+ jobId,
451
+ });
452
+ return markdownContent;
453
+ }
454
+ else if (status === 'FAILED' || status === 'ERROR' || status === 'failed') {
455
+ throw new Error(`Parsing job failed: ${statusResult.error || 'Unknown error'}`);
456
+ }
457
+ // Wait before next attempt
458
+ await new Promise((resolve) => setTimeout(resolve, pollDelay));
459
+ attempts++;
460
+ }
461
+ throw new Error('Parsing job timed out after 2 minutes');
462
+ }
463
+ catch (err) {
464
+ logger_1.logger.warn({
465
+ message: '[DocumentParser] LlamaCloud parsing failed',
466
+ sessionId,
467
+ err,
468
+ });
469
+ return null;
470
+ }
471
+ finally {
472
+ logger_1.logger.debug({
473
+ msg: '[DocumentParser] parseWithLlamaCloud finished',
474
+ sessionId,
475
+ filePath,
476
+ });
477
+ }
478
+ }
479
+ /**
480
+ * Infer file type from buffer content
481
+ */
482
+ function inferFileTypeFromBuffer(buffer) {
483
+ // Check common file signatures
484
+ const signatures = {
485
+ '89504E47': '.png',
486
+ FFD8FF: '.jpg',
487
+ '47494638': '.gif',
488
+ '25504446': '.pdf',
489
+ '504B0304': '.zip', // Also used by docx, xlsx, pptx
490
+ D0CF11E0: '.doc', // Also xls, ppt
491
+ };
492
+ const hex = buffer.toString('hex', 0, 4).toUpperCase();
493
+ for (const [signature, type] of Object.entries(signatures)) {
494
+ if (hex.startsWith(signature)) {
495
+ return type;
496
+ }
497
+ }
498
+ return '.unknown';
499
+ }
500
+ /**
501
+ * Get MIME type for file extension
502
+ */
503
+ function getMimeType(fileExtension) {
504
+ const mimeTypes = {
505
+ '.pdf': 'application/pdf',
506
+ '.doc': 'application/msword',
507
+ '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
508
+ '.txt': 'text/plain',
509
+ '.rtf': 'application/rtf',
510
+ '.jpg': 'image/jpeg',
511
+ '.jpeg': 'image/jpeg',
512
+ '.png': 'image/png',
513
+ '.gif': 'image/gif',
514
+ '.bmp': 'image/bmp',
515
+ '.webp': 'image/webp',
516
+ '.tiff': 'image/tiff',
517
+ '.xls': 'application/vnd.ms-excel',
518
+ '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
519
+ '.csv': 'text/csv',
520
+ '.html': 'text/html',
521
+ '.htm': 'text/html',
522
+ '.xml': 'application/xml',
523
+ '.md': 'text/markdown',
524
+ };
525
+ return mimeTypes[fileExtension.toLowerCase()] || 'application/octet-stream';
526
+ }
527
+ /**
528
+ * Infer file type from URL
529
+ */
530
+ function inferFileTypeFromUrl(url) {
531
+ try {
532
+ const pathname = new URL(url).pathname;
533
+ const extension = path_1.default.extname(pathname).toLowerCase();
534
+ return extension || null;
535
+ }
536
+ catch (_a) {
537
+ return null;
538
+ }
539
+ }
540
+ /**
541
+ * Check if file is an image type
542
+ */
543
+ function isImageFile(fileType) {
544
+ const imageTypes = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff'];
545
+ return imageTypes.includes(fileType.toLowerCase());
546
+ }
547
+ //# sourceMappingURL=parseDocumentLocal.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parseDocumentLocal.js","sourceRoot":"","sources":["../../../src/internalTools/documentExtraction/parseDocumentLocal.ts"],"names":[],"mappings":";;;;;AAKA,sEA0EC;AA/ED,4CAAoB;AACpB,gDAAwB;AACxB,2DAA8B;AAC9B,+CAA4C;AAErC,KAAK,UAAU,6BAA6B,CAAC,EAClD,cAAc,EACd,aAAa,EACb,SAAS,EACT,gBAAgB,EAChB,SAAS,GAOV;IACC,eAAM,CAAC,IAAI,CAAC;QACV,GAAG,EAAE,2CAA2C;QAChD,SAAS;QACT,cAAc;QACd,UAAU,EAAE,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM;KAC3C,CAAC,CAAC;IAEH,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,IAAI,CAAC;QACH,wCAAwC;QACxC,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,MAAM,kBAAkB,CAAC,EAAE,cAAc,EAAE,aAAa,EAAE,SAAS,EAAE,CAAC,CAAC;QAE/G,yCAAyC;QACzC,IAAI,gBAAwB,CAAC;QAE7B,IAAI,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC1B,gBAAgB,GAAG,MAAM,oBAAoB,CAAC;gBAC5C,OAAO;gBACP,gBAAgB;gBAChB,QAAQ;gBACR,QAAQ,EAAE,aAAa,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,cAAc;gBACpD,SAAS;gBACT,SAAS;aACV,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,gBAAgB,GAAG,MAAM,mBAAmB,CAAC;gBAC3C,OAAO;gBACP,gBAAgB;gBAChB,QAAQ;gBACR,QAAQ,EAAE,aAAa,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,cAAc;gBACpD,SAAS;aACV,CAAC,CAAC;QACL,CAAC;QAED,eAAM,CAAC,IAAI,CAAC;YACV,GAAG,EAAE,6CAA6C;YAClD,SAAS;YACT,QAAQ;YACR,aAAa,EAAE,gBAAgB,CAAC,MAAM;SACvC,CAAC,CAAC;QAEH,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QAE9C,OAAO;YACL,UAAU,EAAE,gBAAgB;YAC5B,QAAQ,EAAE;gBACR,QAAQ;gBACR,QAAQ;gBACR,cAAc;gBACd,aAAa,EAAE,gBAAgB,CAAC,MAAM;aACvC;SACF,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,eAAM,CAAC,KAAK,CAAC;YACX,OAAO,EAAE,6CAA6C;YACtD,SAAS;YACT,GAAG;SACJ,CAAC,CAAC;QACH,MAAM,IAAI,KAAK,CAAC,+BAA+B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACrG,CAAC;AACH,CAAC;AAED,KAAK,UAAU,kBAAkB,CAAC,EAChC,cAAc,EACd,aAAa,EACb,SAAS,GAKV;IAKC,6BAA6B;IAC7B,IAAI,aAAa,EAAE,CAAC;QAClB,OAAO,oBAAoB,CAAC,EAAE,cAAc,EAAE,SAAS,EAAE,CAAC,CAAC;IAC7D,CAAC;IACD,OAAO,oBAAoB,CAAC,EAAE,cAAc,EAAE,SAAS,EAAE,CAAC,CAAC;AAC7D,CAAC;AAED,KAAK,UAAU,oBAAoB,CAAC,EAAE,cAAc,EAAE,SAAS,EAAiD;IAK9G,eAAM,CAAC,KAAK,CAAC;QACX,GAAG,EAAE,6CAA6C;QAClD,SAAS;QACT,cAAc;KACf,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,cAAc,CAAC,CAAC;IAE7C,eAAM,CAAC,KAAK,CAAC;QACX,GAAG,EAAE,4CAA4C;QACjD,SAAS;QACT,cAAc;QACd,MAAM,EAAE,QAAQ,CAAC,MAAM;QACvB,EAAE,EAAE,QAAQ,CAAC,EAAE;KAChB,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CAAC,sCAAsC,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;IAC/E,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;IACjD,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACzC,MAAM,QAAQ,GAAG,oBAAoB,CAAC,cAAc,CAAC,IAAI,uBAAuB,CAAC,OAAO,CAAC,CAAC;IAE1F,eAAM,CAAC,KAAK,CAAC;QACX,GAAG,EAAE,yDAAyD;QAC9D,SAAS;QACT,cAAc;QACd,WAAW,EAAE,OAAO,CAAC,MAAM;QAC3B,QAAQ;KACT,CAAC,CAAC;IAEH,OAAO;QACL,OAAO;QACP,QAAQ;QACR,QAAQ,EAAE,OAAO,CAAC,MAAM;KACzB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,oBAAoB,CAAC,EAAE,cAAc,EAAE,SAAS,EAAiD;IAK9G,eAAM,CAAC,KAAK,CAAC;QACX,GAAG,EAAE,6CAA6C;QAClD,SAAS;QACT,cAAc;KACf,CAAC,CAAC;IAEH,sCAAsC;IACtC,IAAI,CAAC;QACH,MAAM,kBAAG,CAAC,MAAM,CAAC,cAAc,CAAC,CAAA;IAClC,CAAC;IAAC,WAAM,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,uBAAuB,cAAc,EAAE,CAAC,CAAC;IAC3D,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,kBAAG,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC;IACnD,MAAM,QAAQ,GAAG,cAAI,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,WAAW,EAAE,CAAC;IAE5D,eAAM,CAAC,KAAK,CAAC;QACX,GAAG,EAAE,4CAA4C;QACjD,SAAS;QACT,cAAc;QACd,WAAW,EAAE,OAAO,CAAC,MAAM;QAC3B,QAAQ;KACT,CAAC,CAAC;IAEH,OAAO;QACL,OAAO;QACP,QAAQ;QACR,QAAQ,EAAE,OAAO,CAAC,MAAM;KACzB,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,oBAAoB,CAAC,EAClC,OAAO,EACP,gBAAgB,EAChB,QAAQ,EACR,QAAQ,EACR,SAAS,EACT,SAAS,GAQV;IACC,IAAI,CAAC;QACH,iEAAiE;QACjE,IAAI,QAAQ,IAAI,gBAAgB,EAAE,CAAC;YACjC,eAAM,CAAC,KAAK,CAAC;gBACX,GAAG,EAAE,wDAAwD;gBAC7D,SAAS;gBACT,QAAQ;aACT,CAAC,CAAC;YACH,MAAM,aAAa,GAAG,MAAM,mBAAmB,CAAC,EAAE,QAAQ,EAAE,gBAAgB,EAAE,SAAS,EAAE,CAAC,CAAC;YAC3F,eAAM,CAAC,KAAK,CAAC;gBACX,GAAG,EAAE,yDAAyD;gBAC9D,SAAS;gBACT,UAAU,EAAE,CAAC,CAAC,aAAa;gBAC3B,aAAa,EAAE,aAAa,aAAb,aAAa,uBAAb,aAAa,CAAE,MAAM;aACrC,CAAC,CAAC;YACH,IAAI,aAAa,EAAE,CAAC;gBAClB,OAAO,aAAa,CAAC;YACvB,CAAC;QACH,CAAC;QAED,kEAAkE;QAClE,IAAI,CAAC,QAAQ,IAAI,gBAAgB,EAAE,CAAC;YAClC,MAAM,OAAO,GAAG,YAAE,CAAC,MAAM,EAAE,CAAC;YAC5B,MAAM,YAAY,GAAG,QAAQ,IAAI,CAAC,GAAG,EAAE,GAAG,QAAQ,EAAE,CAAC;YACrD,MAAM,YAAY,GAAG,cAAI,CAAC,IAAI,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;YAEtD,eAAM,CAAC,KAAK,CAAC;gBACX,GAAG,EAAE,+CAA+C;gBACpD,SAAS;gBACT,YAAY;gBACZ,WAAW,EAAE,OAAO,CAAC,MAAM;aAC5B,CAAC,CAAC;YAEH,IAAI,CAAC;gBACH,MAAM,kBAAG,CAAC,SAAS,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;gBAC3C,eAAM,CAAC,KAAK,CAAC;oBACX,GAAG,EAAE,6DAA6D;oBAClE,SAAS;oBACT,YAAY;iBACb,CAAC,CAAC;gBACH,MAAM,aAAa,GAAG,MAAM,mBAAmB,CAAC,EAAE,QAAQ,EAAE,YAAY,EAAE,gBAAgB,EAAE,SAAS,EAAE,CAAC,CAAC;gBACzG,eAAM,CAAC,KAAK,CAAC;oBACX,GAAG,EAAE,8DAA8D;oBACnE,SAAS;oBACT,UAAU,EAAE,CAAC,CAAC,aAAa;oBAC3B,aAAa,EAAE,aAAa,aAAb,aAAa,uBAAb,aAAa,CAAE,MAAM;iBACrC,CAAC,CAAC;gBACH,MAAM,kBAAG,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;gBAE/B,IAAI,aAAa,EAAE,CAAC;oBAClB,OAAO,aAAa,CAAC;gBACvB,CAAC;YACH,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,8BAA8B;gBAC9B,IAAI,CAAC;oBACH,MAAM,kBAAG,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;oBAC/B,MAAM,kBAAG,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;gBACjC,CAAC;gBAAC,WAAM,CAAC;oBACP,OAAO;gBACT,CAAC;gBACD,eAAM,CAAC,IAAI,CAAC;oBACV,GAAG,EAAE,wDAAwD;oBAC7D,SAAS;oBACT,GAAG;iBACJ,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,eAAM,CAAC,IAAI,CAAC;YACV,GAAG,EAAE,yFAAyF;YAC9F,SAAS;YACT,QAAQ;YACR,WAAW,EAAE,OAAO,CAAC,MAAM;SAC5B,CAAC,CAAC;QAEH,oDAAoD;QACpD,IAAI,SAAS,EAAE,CAAC;YACd,kCAAkC;YAClC,MAAM,WAAW,GAAG,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YAC/C,MAAM,QAAQ,GAAG,WAAW,CAAC,QAAQ,CAAC,CAAC;YACvC,OAAO,QAAQ,QAAQ,WAAW,WAAW,EAAE,CAAC;QAClD,CAAC;aAAM,CAAC;YACN,gEAAgE;YAChE,OAAO,oBAAoB,QAAQ,CAAC,WAAW,EAAE,gBAC/C,OAAO,CAAC,MACV,wFAAwF,CAAC;QAC3F,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,qCAAqC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAC3G,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,mBAAmB,CAAC,EACjC,OAAO,EACP,gBAAgB,EAChB,QAAQ,EACR,QAAQ,EACR,SAAS,GAOV;IACC,gDAAgD;IAChD,IAAI,QAAQ,IAAI,gBAAgB,EAAE,CAAC;QACjC,MAAM,aAAa,GAAG,MAAM,mBAAmB,CAAC,EAAE,QAAQ,EAAE,gBAAgB,EAAE,SAAS,EAAE,CAAC,CAAC;QAC3F,IAAI,aAAa,EAAE,CAAC;YAClB,OAAO,aAAa,CAAC;QACvB,CAAC;IACH,CAAC;IAED,iFAAiF;IACjF,IAAI,CAAC,QAAQ,IAAI,gBAAgB,IAAI,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QAC9D,MAAM,OAAO,GAAG,YAAE,CAAC,MAAM,EAAE,CAAC;QAC5B,MAAM,YAAY,GAAG,QAAQ,IAAI,CAAC,GAAG,EAAE,GAAG,QAAQ,IAAI,MAAM,EAAE,CAAC;QAC/D,MAAM,YAAY,GAAG,cAAI,CAAC,IAAI,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;QAEtD,IAAI,CAAC;YACH,MAAM,kBAAG,CAAC,SAAS,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;YAC3C,MAAM,aAAa,GAAG,MAAM,mBAAmB,CAAC,EAAE,QAAQ,EAAE,YAAY,EAAE,gBAAgB,EAAE,SAAS,EAAE,CAAC,CAAC;YACzG,MAAM,kBAAG,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;YAE/B,IAAI,aAAa,EAAE,CAAC;gBAClB,OAAO,aAAa,CAAC;YACvB,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,8BAA8B;YAC9B,IAAI,CAAC;gBACH,MAAM,kBAAG,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;gBAC/B,MAAM,kBAAG,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;YACjC,CAAC;YAAC,WAAM,CAAC;gBACP,OAAO;YACT,CAAC;YACD,eAAM,CAAC,IAAI,CAAC;gBACV,GAAG,EAAE,gEAAgE;gBACrE,SAAS;gBACT,GAAG;aACJ,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,wCAAwC;IACxC,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;QAChC,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,2CAA2C;IAC3C,IAAI,CAAC,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,QAAQ,IAAI,EAAE,CAAC,EAAE,CAAC;QAC9E,OAAO,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IACnC,CAAC;IAED,qDAAqD;IACrD,MAAM,IAAI,KAAK,CAAC,6BAA6B,QAAQ,wEAAwE,CAAC,CAAC;AACjI,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,mBAAmB,CAAC,EACjC,QAAQ,EACR,gBAAgB,EAChB,SAAS,GAKV;IACC,IAAI,CAAC;QACH,wCAAwC;QACxC,MAAM,WAAW,GAAG,MAAM,kBAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QACjD,MAAM,QAAQ,GAAG,cAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,WAAW,CAAC,cAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;QAErD,MAAM,QAAQ,GAAG,IAAI,QAAQ,EAAE,CAAC;QAChC,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,CAAC,IAAI,UAAU,CAAC,WAAW,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC;QACzE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC;QACxC,QAAQ,CAAC,MAAM,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;QAExC,MAAM,cAAc,GAAG,MAAM,KAAK,CAAC,uDAAuD,EAAE;YAC1F,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,MAAM,EAAE,kBAAkB;gBAC1B,aAAa,EAAE,UAAU,gBAAgB,EAAE;aAC5C;YACD,IAAI,EAAE,QAAQ;SACf,CAAC,CAAC;QAEH,IAAI,CAAC,cAAc,CAAC,EAAE,EAAE,CAAC;YACvB,MAAM,SAAS,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,CAAC;YAC9C,MAAM,IAAI,KAAK,CAAC,0BAA0B,cAAc,CAAC,MAAM,MAAM,SAAS,EAAE,CAAC,CAAC;QACpF,CAAC;QAED,MAAM,YAAY,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,CAAC;QACjD,MAAM,KAAK,GAAG,YAAY,CAAC,EAAE,IAAI,YAAY,CAAC,MAAM,CAAC;QAErD,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;QACpD,CAAC;QAED,eAAM,CAAC,IAAI,CAAC;YACV,GAAG,EAAE,8CAA8C;YACnD,SAAS;YACT,KAAK;YACL,QAAQ;SACT,CAAC,CAAC;QAEH,kCAAkC;QAClC,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,MAAM,WAAW,GAAG,EAAE,CAAC,CAAC,kDAAkD;QAC1E,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,YAAY;QAEpC,OAAO,QAAQ,GAAG,WAAW,EAAE,CAAC;YAC9B,MAAM,cAAc,GAAG,MAAM,KAAK,CAAC,sDAAsD,KAAK,EAAE,EAAE;gBAChG,MAAM,EAAE,KAAK;gBACb,OAAO,EAAE;oBACP,MAAM,EAAE,kBAAkB;oBAC1B,aAAa,EAAE,UAAU,gBAAgB,EAAE;iBAC5C;aACF,CAAC,CAAC;YAEH,IAAI,CAAC,cAAc,CAAC,EAAE,EAAE,CAAC;gBACvB,MAAM,IAAI,KAAK,CAAC,+BAA+B,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC;YAC1E,CAAC;YAED,MAAM,YAAY,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,CAAC;YACjD,MAAM,MAAM,GAAG,YAAY,CAAC,MAAM,IAAI,YAAY,CAAC,UAAU,CAAC;YAE9D,IAAI,MAAM,KAAK,SAAS,IAAI,MAAM,KAAK,WAAW,IAAI,MAAM,KAAK,WAAW,EAAE,CAAC;gBAC7E,uCAAuC;gBAEvC,wCAAwC;gBACxC,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;gBACzC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,KAAK,CAAC,CAAC,CAAC,oBAAoB;gBAEjF,IAAI,cAAc,CAAC;gBACnB,IAAI,CAAC;oBACH,cAAc,GAAG,MAAM,KAAK,CAAC,sDAAsD,KAAK,kBAAkB,EAAE;wBAC1G,MAAM,EAAE,KAAK;wBACb,OAAO,EAAE;4BACP,MAAM,EAAE,kBAAkB;4BAC1B,aAAa,EAAE,UAAU,gBAAgB,EAAE;yBAC5C;wBACD,MAAM,EAAE,UAAU,CAAC,MAAM;qBAC1B,CAAC,CAAC;gBACL,CAAC;gBAAC,OAAO,UAAU,EAAE,CAAC;oBACpB,YAAY,CAAC,OAAO,CAAC,CAAC;oBACtB,IAAI,UAAU,YAAY,KAAK,IAAI,UAAU,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;wBACpE,MAAM,IAAI,KAAK,CAAC,2DAA2D,CAAC,CAAC;oBAC/E,CAAC;oBACD,MAAM,UAAU,CAAC;gBACnB,CAAC;gBAED,YAAY,CAAC,OAAO,CAAC,CAAC;gBAEtB,IAAI,CAAC,cAAc,CAAC,EAAE,EAAE,CAAC;oBACvB,MAAM,SAAS,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,CAAC;oBAC9C,MAAM,IAAI,KAAK,CAAC,+BAA+B,cAAc,CAAC,MAAM,MAAM,SAAS,EAAE,CAAC,CAAC;gBACzF,CAAC;gBAED,IAAI,UAAe,CAAC;gBACpB,IAAI,CAAC;oBACH,wEAAwE;oBACxE,IAAI,YAAY,CAAC;oBACjB,IAAI,cAAc,CAAC,IAAI,EAAE,CAAC;wBACxB,MAAM,MAAM,GAAG,cAAc,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;wBAC/C,MAAM,MAAM,GAAiB,EAAE,CAAC;wBAChC,IAAI,WAAW,GAAG,CAAC,CAAC;wBAEpB,IAAI,CAAC;4BACH,OAAO,IAAI,EAAE,CAAC;gCACZ,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gCAC5C,IAAI,IAAI;oCAAE,MAAM;gCAChB,IAAI,KAAK,EAAE,CAAC;oCACV,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;oCACnB,WAAW,IAAI,KAAK,CAAC,MAAM,CAAC;gCAC9B,CAAC;4BACH,CAAC;4BAED,iBAAiB;4BACjB,MAAM,QAAQ,GAAG,IAAI,UAAU,CAAC,WAAW,CAAC,CAAC;4BAC7C,IAAI,MAAM,GAAG,CAAC,CAAC;4BACf,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gCAC3B,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;gCAC5B,MAAM,IAAI,KAAK,CAAC,MAAM,CAAC;4BACzB,CAAC;4BAED,YAAY,GAAG,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;wBACpD,CAAC;gCAAS,CAAC;4BACT,MAAM,CAAC,WAAW,EAAE,CAAC;wBACvB,CAAC;oBACH,CAAC;yBAAM,CAAC;wBACN,YAAY,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,CAAC;oBAC7C,CAAC;oBAED,+DAA+D;oBAC/D,IAAI,CAAC;wBACH,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;oBACxC,CAAC;oBAAC,WAAM,CAAC;wBACP,8DAA8D;wBAC9D,UAAU,GAAG,YAAY,CAAC;oBAC5B,CAAC;gBACH,CAAC;gBAAC,OAAO,SAAS,EAAE,CAAC;oBACnB,eAAM,CAAC,KAAK,CAAC;wBACX,GAAG,EAAE,+CAA+C;wBACpD,SAAS;wBACT,KAAK;wBACL,KAAK,EAAE,SAAS,YAAY,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC;wBACzE,KAAK,EAAE,SAAS,YAAY,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;qBAChE,CAAC,CAAC;oBACH,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;gBAC7D,CAAC;gBAED,eAAM,CAAC,KAAK,CAAC;oBACX,GAAG,EAAE,wCAAwC;oBAC7C,SAAS;oBACT,KAAK;oBACL,QAAQ,EAAE,OAAO,UAAU;oBAC3B,IAAI,EAAE,OAAO,UAAU,KAAK,QAAQ,IAAI,UAAU,KAAK,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE;oBAC1F,WAAW,EAAE,OAAO,UAAU,KAAK,QAAQ,IAAI,UAAU,IAAI,UAAU;oBACvE,UAAU,EAAE,OAAO,UAAU,KAAK,QAAQ,IAAI,SAAS,IAAI,UAAU;oBACrE,OAAO,EAAE,OAAO,UAAU,KAAK,QAAQ,IAAI,MAAM,IAAI,UAAU;iBAChE,CAAC,CAAC;gBAEH,gFAAgF;gBAChF,IAAI,eAAuB,CAAC;gBAC5B,IAAI,OAAO,UAAU,KAAK,QAAQ,EAAE,CAAC;oBACnC,eAAe,GAAG,UAAU,CAAC;gBAC/B,CAAC;qBAAM,CAAC;oBACN,eAAe,GAAG,UAAU,CAAC,QAAQ,IAAI,UAAU,CAAC,OAAO,IAAI,UAAU,CAAC,IAAI,IAAI,EAAE,CAAC;gBACvF,CAAC;gBAED,IAAI,CAAC,eAAe,EAAE,CAAC;oBACrB,eAAM,CAAC,KAAK,CAAC;wBACX,GAAG,EAAE,uCAAuC;wBAC5C,SAAS;wBACT,KAAK;wBACL,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;qBACzD,CAAC,CAAC;oBACH,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;gBACtD,CAAC;gBAED,eAAM,CAAC,IAAI,CAAC;oBACV,GAAG,EAAE,+DAA+D;oBACpE,SAAS;oBACT,KAAK;oBACL,aAAa,EAAE,eAAe,CAAC,MAAM;oBACrC,OAAO,EAAE,eAAe,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;iBAC3C,CAAC,CAAC;gBAEH,eAAM,CAAC,KAAK,CAAC;oBACX,GAAG,EAAE,mDAAmD;oBACxD,SAAS;oBACT,KAAK;iBACN,CAAC,CAAC;gBAEH,OAAO,eAAe,CAAC;YACzB,CAAC;iBAAM,IAAI,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,OAAO,IAAI,MAAM,KAAK,QAAQ,EAAE,CAAC;gBAC5E,MAAM,IAAI,KAAK,CAAC,uBAAuB,YAAY,CAAC,KAAK,IAAI,eAAe,EAAE,CAAC,CAAC;YAClF,CAAC;YAED,2BAA2B;YAC3B,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC;YAC/D,QAAQ,EAAE,CAAC;QACb,CAAC;QAED,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;IAC3D,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,eAAM,CAAC,IAAI,CAAC;YACV,OAAO,EAAE,4CAA4C;YACrD,SAAS;YACT,GAAG;SACJ,CAAC,CAAC;QACH,OAAO,IAAI,CAAC;IACd,CAAC;YAAS,CAAC;QACT,eAAM,CAAC,KAAK,CAAC;YACX,GAAG,EAAE,+CAA+C;YACpD,SAAS;YACT,QAAQ;SACT,CAAC,CAAC;IACL,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,uBAAuB,CAAC,MAAc;IAC7C,+BAA+B;IAC/B,MAAM,UAAU,GAA8B;QAC5C,UAAU,EAAE,MAAM;QAClB,MAAM,EAAE,MAAM;QACd,UAAU,EAAE,MAAM;QAClB,UAAU,EAAE,MAAM;QAClB,UAAU,EAAE,MAAM,EAAE,gCAAgC;QACpD,QAAQ,EAAE,MAAM,EAAE,gBAAgB;KACnC,CAAC;IAEF,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;IAEvD,KAAK,MAAM,CAAC,SAAS,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;QAC3D,IAAI,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YAC9B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,aAAqB;IACxC,MAAM,SAAS,GAA8B;QAC3C,MAAM,EAAE,iBAAiB;QACzB,MAAM,EAAE,oBAAoB;QAC5B,OAAO,EAAE,yEAAyE;QAClF,MAAM,EAAE,YAAY;QACpB,MAAM,EAAE,iBAAiB;QACzB,MAAM,EAAE,YAAY;QACpB,OAAO,EAAE,YAAY;QACrB,MAAM,EAAE,WAAW;QACnB,MAAM,EAAE,WAAW;QACnB,MAAM,EAAE,WAAW;QACnB,OAAO,EAAE,YAAY;QACrB,OAAO,EAAE,YAAY;QACrB,MAAM,EAAE,0BAA0B;QAClC,OAAO,EAAE,mEAAmE;QAC5E,MAAM,EAAE,UAAU;QAClB,OAAO,EAAE,WAAW;QACpB,MAAM,EAAE,WAAW;QACnB,MAAM,EAAE,iBAAiB;QACzB,KAAK,EAAE,eAAe;KACvB,CAAC;IAEF,OAAO,SAAS,CAAC,aAAa,CAAC,WAAW,EAAE,CAAC,IAAI,0BAA0B,CAAC;AAC9E,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,GAAW;IACvC,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;QACvC,MAAM,SAAS,GAAG,cAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;QACvD,OAAO,SAAS,IAAI,IAAI,CAAC;IAC3B,CAAC;IAAC,WAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,QAAgB;IACnC,MAAM,UAAU,GAAG,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IAC/E,OAAO,UAAU,CAAC,QAAQ,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;AACrD,CAAC"}
@@ -0,0 +1,13 @@
1
+ import { DocumentProcessResponse } from '../../platform/mindedConnectionTypes';
2
+ /**
3
+ * Process document using managed backend service
4
+ */
5
+ export declare function parseDocumentWithManagedService({ documentSource, isDocumentUrl, sessionId, }: {
6
+ isDocumentUrl: boolean;
7
+ documentSource: string;
8
+ sessionId: string;
9
+ }): Promise<{
10
+ rawContent: string;
11
+ metadata?: DocumentProcessResponse['metadata'];
12
+ }>;
13
+ //# sourceMappingURL=parseDocumentManaged.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parseDocumentManaged.d.ts","sourceRoot":"","sources":["../../../src/internalTools/documentExtraction/parseDocumentManaged.ts"],"names":[],"mappings":"AAEA,OAAO,EAEL,uBAAuB,EAKxB,MAAM,sCAAsC,CAAC;AAI9C;;GAEG;AACH,wBAAsB,+BAA+B,CAAC,EACpD,cAAc,EACd,aAAa,EACb,SAAS,GACV,EAAE;IACD,aAAa,EAAE,OAAO,CAAC;IACvB,cAAc,EAAE,MAAM,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;CACnB,GAAG,OAAO,CAAC;IAAE,UAAU,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,uBAAuB,CAAC,UAAU,CAAC,CAAA;CAAE,CAAC,CA6DlF"}