@dcyfr/ai-rag 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2026 DCYFR
3
+ Copyright (c) 2025-2026 DCYFR Labs (https://www.dcyfr.ai)
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
package/README.md CHANGED
@@ -1,13 +1,135 @@
1
1
  # @dcyfr/ai-rag
2
2
 
3
+ [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/dcyfr/dcyfr-ai-rag)
4
+
3
5
  > **RAG (Retrieval-Augmented Generation) framework for Node.js and TypeScript**
4
6
 
5
7
  Build production-ready RAG systems with document loading, embedding, vector stores, and semantic search.
6
8
 
7
9
  [![npm version](https://img.shields.io/npm/v/@dcyfr/ai-rag.svg)](https://www.npmjs.com/package/@dcyfr/ai-rag)
8
- [![TypeScript](https://img.shields.io/badge/TypeScript-5.3+-blue.svg)](https://www.typescriptlang.org/)
10
+ [![TypeScript](https://img.shields.io/badge/TypeScript-5.7+-blue.svg)](https://www.typescriptlang.org/)
9
11
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
10
12
 
13
+ ---
14
+ - [Document Conversion](#-document-conversion-markitdown)
15
+ - **Document Conversion** - Convert 15+ file formats (PDF, DOCX, PPTX, XLSX, images, etc.) to Markdown via MarkItDown Python library
16
+ ---
17
+
18
+ ## 📄 Document Conversion (MarkItDown)
19
+
20
+ Convert diverse document formats to LLM-optimized Markdown:
21
+
22
+ ```typescript
23
+ import { convertToMarkdown, convertBatch } from '@dcyfr/ai-rag/ingestion';
24
+
25
+ // Single document conversion
26
+ const result = await convertToMarkdown('/path/to/document.pdf', {
27
+ timeout: 45000, // 45 seconds
28
+ maxFileSize: 50 * 1024 * 1024, // 50 MB
29
+ enableLLMDescriptions: true, // Use GPT-4 Vision for image descriptions
30
+ });
31
+
32
+ console.log(result.markdown); // Converted markdown content
33
+ console.log(result.metadata); // File size, duration, page count, etc.
34
+
35
+ // Batch conversion (parallel, concurrency-controlled)
36
+ const files = ['/docs/report.pdf', '/slides/deck.pptx', '/data/sheet.xlsx'];
37
+ const results = await convertBatch(files, { timeout: 60000 });
38
+
39
+ results.forEach((r, i) => {
40
+ if (r.success) {
41
+ console.log(`✅ ${files[i]}: ${r.markdown.length} chars`);
42
+ } else {
43
+ console.error(`❌ ${files[i]}: ${r.error}`);
44
+ }
45
+ });
46
+ ```
47
+
48
+ **Supported Formats:**
49
+ - **Documents:** PDF, DOCX, PPTX, XLSX, CSV, TXT, Markdown
50
+ - **Web:** HTML, XML, JSON
51
+ - **Images:** PNG, JPG, JPEG, GIF, WEBP (with optional LLM-powered OCR)
52
+ - **Audio:** MP3, WAV, M4A (transcription)
53
+ - **Archives:** EPUB, ZIP
54
+
55
+ **Installation:**
56
+ ```bash
57
+ # Python environment required (workspace already configured)
58
+ pip install markitdown>=0.1.5
59
+
60
+ # Or use workspace .venv (pre-configured)
61
+ source /path/to/workspace/.venv/bin/activate
62
+ ```
63
+
64
+ **Performance:**
65
+ - **Latency:** 200-500ms per document (PDF/Office), <100ms (text/HTML)
66
+ - **Concurrency:** Max 3 parallel conversions (configurable)
67
+ - **Memory:** ~50-200 MB per conversion (temp files auto-cleaned)
68
+
69
+ **Error Handling:**
70
+ ```typescript
71
+ import { ConversionError, ConversionErrorType } from '@dcyfr/ai-rag/ingestion';
72
+
73
+ try {
74
+ const result = await convertToMarkdown('/path/to/file.pdf');
75
+ } catch (error) {
76
+ if (error instanceof ConversionError) {
77
+ switch (error.type) {
78
+ case ConversionErrorType.TIMEOUT:
79
+ console.error('Conversion timed out - file too large?');
80
+ break;
81
+ case ConversionErrorType.FILE_TOO_LARGE:
82
+ console.error(`File exceeds ${error.details?.maxFileSize} bytes`);
83
+ break;
84
+ case ConversionErrorType.UNSUPPORTED_FORMAT:
85
+ console.error('File format not supported by MarkItDown');
86
+ break;
87
+ default:
88
+ console.error(`Conversion failed: ${error.message}`);
89
+ }
90
+ }
91
+ }
92
+ ```
93
+
94
+ **LLM Integration (Optional):**
95
+ ```typescript
96
+ // Enable GPT-4 Vision or Claude for image descriptions
97
+ const result = await convertToMarkdown('/path/to/presentation.pptx', {
98
+ enableLLMDescriptions: true,
99
+ llmModel: 'gpt-4-vision-preview', // or 'claude-3-opus-20240229'
100
+ });
101
+
102
+ // Requires environment variables:
103
+ // OPENAI_API_KEY=sk-...
104
+ // ANTHROPIC_API_KEY=sk-ant-...
105
+ ```
106
+
107
+
108
+ ## ⚡ 30-Second Quick Start
109
+
110
+ ```bash
111
+ # Install package
112
+ npm install @dcyfr/ai-rag
113
+
114
+ # Basic usage
115
+ import { TextLoader, InMemoryVectorStore } from '@dcyfr/ai-rag';
116
+
117
+ const loader = new TextLoader();
118
+ const store = new InMemoryVectorStore();
119
+ # ✅ RAG system ready for document ingestion
120
+ ```
121
+
122
+ ---
123
+
124
+ ## 🧭 Related Packages
125
+
126
+ | Package | Purpose | Type |
127
+ |---------|---------|------|
128
+ | [@dcyfr/ai](../dcyfr-ai) | Core AI harness | npm package |
129
+ | [@dcyfr/ai-agents](../dcyfr-ai-agents) | Autonomous agents | Template |
130
+ | [@dcyfr/ai-chatbot](../dcyfr-ai-chatbot) | Chatbot template | Template |
131
+ | [dcyfr-labs](../dcyfr-labs) | Production Next.js app | Application |
132
+
11
133
  ---
12
134
 
13
135
  ## ✨ Features
@@ -57,7 +179,7 @@ import {
57
179
  // 1. Setup components
58
180
  const loader = new TextLoader();
59
181
  const embedder = new SimpleEmbeddingGenerator({ dimensions: 384 });
60
- const store = new InMemory VectorStore({
182
+ const store = new InMemoryVectorStore({
61
183
  collectionName: 'my-docs',
62
184
  embeddingDimensions: 384,
63
185
  });
package/dist/index.d.ts CHANGED
@@ -5,4 +5,5 @@ export type * from './types/index.js';
5
5
  export * from './loaders/index.js';
6
6
  export * from './stores/index.js';
7
7
  export * from './pipeline/index.js';
8
+ export * from './ingestion/index.js';
8
9
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,mBAAmB,kBAAkB,CAAC;AAGtC,cAAc,oBAAoB,CAAC;AAGnC,cAAc,mBAAmB,CAAC;AAGlC,cAAc,qBAAqB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,mBAAmB,kBAAkB,CAAC;AAGtC,cAAc,oBAAoB,CAAC;AAGnC,cAAc,mBAAmB,CAAC;AAGlC,cAAc,qBAAqB,CAAC;AAGpC,cAAc,sBAAsB,CAAC"}
package/dist/index.js CHANGED
@@ -7,4 +7,6 @@ export * from './loaders/index.js';
7
7
  export * from './stores/index.js';
8
8
  // Pipelines
9
9
  export * from './pipeline/index.js';
10
+ // Document ingestion / conversion
11
+ export * from './ingestion/index.js';
10
12
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,UAAU;AACV,cAAc,oBAAoB,CAAC;AAEnC,SAAS;AACT,cAAc,mBAAmB,CAAC;AAElC,YAAY;AACZ,cAAc,qBAAqB,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,UAAU;AACV,cAAc,oBAAoB,CAAC;AAEnC,SAAS;AACT,cAAc,mBAAmB,CAAC;AAElC,YAAY;AACZ,cAAc,qBAAqB,CAAC;AAEpC,kCAAkC;AAClC,cAAc,sBAAsB,CAAC"}
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Document ingestion module for MarkItDown integration
3
+ * @module @dcyfr/ai-rag/ingestion
4
+ */
5
+ export { convertToMarkdown, convertBatch, checkMarkItDownInstalled } from './markitdown-bridge.js';
6
+ export type { ConversionOptions, ConversionResult, SupportedFormat, SubprocessMessage, } from './types.js';
7
+ export { ConversionError, ConversionErrorType } from './types.js';
8
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/ingestion/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,iBAAiB,EAAE,YAAY,EAAE,wBAAwB,EAAE,MAAM,wBAAwB,CAAC;AACnG,YAAY,EACV,iBAAiB,EACjB,gBAAgB,EAChB,eAAe,EACf,iBAAiB,GAClB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,eAAe,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC"}
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Document ingestion module for MarkItDown integration
3
+ * @module @dcyfr/ai-rag/ingestion
4
+ */
5
+ export { convertToMarkdown, convertBatch, checkMarkItDownInstalled } from './markitdown-bridge.js';
6
+ export { ConversionError, ConversionErrorType } from './types.js';
7
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/ingestion/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,iBAAiB,EAAE,YAAY,EAAE,wBAAwB,EAAE,MAAM,wBAAwB,CAAC;AAOnG,OAAO,EAAE,eAAe,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC"}
@@ -0,0 +1,37 @@
1
+ /**
2
+ * TypeScript bridge to Python MarkItDown document converter
3
+ * @module @dcyfr/ai-rag/ingestion/markitdown-bridge
4
+ */
5
+ import type { ConversionOptions, ConversionResult } from './types.js';
6
+ /**
7
+ * Convert document to Markdown using Python MarkItDown subprocess
8
+ *
9
+ * @param filePath - Absolute path to file to convert
10
+ * @param options - Conversion options
11
+ * @returns Conversion result with markdown and metadata
12
+ *
13
+ * @throws {ConversionError} If conversion fails
14
+ *
15
+ * @example
16
+ * ```typescript
17
+ * const result = await convertToMarkdown('/path/to/document.pdf', {
18
+ * timeout: 45000,
19
+ * enableLLMDescriptions: true
20
+ * });
21
+ * console.log(result.markdown);
22
+ * ```
23
+ */
24
+ export declare function convertToMarkdown(filePath: string, options?: ConversionOptions): Promise<ConversionResult>;
25
+ /**
26
+ * Batch convert multiple documents
27
+ *
28
+ * @param filePaths - Array of file paths to convert
29
+ * @param options - Shared conversion options
30
+ * @returns Array of conversion results (same order as input)
31
+ */
32
+ export declare function convertBatch(filePaths: string[], options?: ConversionOptions): Promise<ConversionResult[]>;
33
+ /**
34
+ * Check if Python MarkItDown is installed and accessible
35
+ */
36
+ export declare function checkMarkItDownInstalled(): Promise<boolean>;
37
+ //# sourceMappingURL=markitdown-bridge.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"markitdown-bridge.d.ts","sourceRoot":"","sources":["../../src/ingestion/markitdown-bridge.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAOH,OAAO,KAAK,EACV,iBAAiB,EACjB,gBAAgB,EAGjB,MAAM,YAAY,CAAC;AAwRpB;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,iBAAsB,GAC9B,OAAO,CAAC,gBAAgB,CAAC,CAoE3B;AAED;;;;;;GAMG;AACH,wBAAsB,YAAY,CAChC,SAAS,EAAE,MAAM,EAAE,EACnB,OAAO,GAAE,iBAAsB,GAC9B,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAkC7B;AAED;;GAEG;AACH,wBAAsB,wBAAwB,IAAI,OAAO,CAAC,OAAO,CAAC,CAmBjE"}
@@ -0,0 +1,325 @@
1
+ /**
2
+ * TypeScript bridge to Python MarkItDown document converter
3
+ * @module @dcyfr/ai-rag/ingestion/markitdown-bridge
4
+ */
5
+ import { spawn } from 'node:child_process';
6
+ import { existsSync, promises as fs } from 'node:fs';
7
+ import { tmpdir } from 'node:os';
8
+ import { join, resolve, basename, extname, dirname } from 'node:path';
9
+ import { fileURLToPath } from 'node:url';
10
+ import { ConversionError, ConversionErrorType } from './types.js';
11
+ // ES module __dirname equivalent
12
+ const __filename = fileURLToPath(import.meta.url);
13
+ const __dirname = dirname(__filename);
14
+ /**
15
+ * Default conversion options
16
+ */
17
+ const DEFAULT_OPTIONS = {
18
+ timeout: 30000, // 30 seconds
19
+ maxFileSize: 52428800, // 50MB
20
+ enableLLMDescriptions: false,
21
+ preserveMetadata: true,
22
+ };
23
+ /**
24
+ * Supported file extensions mapping to format types
25
+ */
26
+ const EXTENSION_MAP = {
27
+ '.pdf': 'pdf',
28
+ '.docx': 'docx',
29
+ '.pptx': 'pptx',
30
+ '.xlsx': 'xlsx',
31
+ '.csv': 'csv',
32
+ '.html': 'html',
33
+ '.htm': 'htm',
34
+ '.xml': 'xml',
35
+ '.json': 'json',
36
+ '.png': 'png',
37
+ '.jpg': 'jpg',
38
+ '.jpeg': 'jpeg',
39
+ '.gif': 'gif',
40
+ '.webp': 'webp',
41
+ '.mp3': 'mp3',
42
+ '.wav': 'wav',
43
+ '.m4a': 'm4a',
44
+ '.epub': 'epub',
45
+ '.zip': 'zip',
46
+ };
47
+ /**
48
+ * Detect file format from extension
49
+ */
50
+ function detectFormat(filePath) {
51
+ const ext = extname(filePath).toLowerCase();
52
+ const format = EXTENSION_MAP[ext];
53
+ if (!format) {
54
+ throw new ConversionError(ConversionErrorType.UNSUPPORTED_FORMAT, `Unsupported file format: ${ext}`, { filePath, extension: ext });
55
+ }
56
+ return format;
57
+ }
58
+ /**
59
+ * Create temporary directory for conversion workspace
60
+ */
61
+ async function createTempDir() {
62
+ try {
63
+ const tempDir = join(tmpdir(), `markitdown-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`);
64
+ await fs.mkdir(tempDir, { recursive: true });
65
+ return tempDir;
66
+ }
67
+ catch (error) {
68
+ throw new ConversionError(ConversionErrorType.TEMP_DIR_ERROR, 'Failed to create temporary directory', { error: error instanceof Error ? error.message : String(error) });
69
+ }
70
+ }
71
+ /**
72
+ * Clean up temporary directory
73
+ */
74
+ async function cleanupTempDir(tempDir) {
75
+ try {
76
+ await fs.rm(tempDir, { recursive: true, force: true });
77
+ }
78
+ catch (error) {
79
+ // Log warning but don't throw - cleanup failure shouldn't break the flow
80
+ console.warn(`Failed to cleanup temp directory ${tempDir}:`, error);
81
+ }
82
+ }
83
+ /**
84
+ * Validate file accessibility and size
85
+ */
86
+ async function validateFile(filePath, maxFileSize) {
87
+ try {
88
+ const stats = await fs.stat(filePath);
89
+ if (!stats.isFile()) {
90
+ throw new ConversionError(ConversionErrorType.FILE_NOT_FOUND, 'Path is not a file', { filePath });
91
+ }
92
+ if (stats.size > maxFileSize) {
93
+ throw new ConversionError(ConversionErrorType.FILE_TOO_LARGE, `File size ${stats.size} bytes exceeds limit ${maxFileSize} bytes`, { filePath, fileSize: stats.size, maxFileSize });
94
+ }
95
+ }
96
+ catch (error) {
97
+ if (error instanceof ConversionError) {
98
+ throw error;
99
+ }
100
+ throw new ConversionError(ConversionErrorType.FILE_NOT_FOUND, `File not found or inaccessible: ${filePath}`, { error: error instanceof Error ? error.message : String(error) });
101
+ }
102
+ }
103
+ /**
104
+ * Find Python executable with worktree-aware fallback order
105
+ */
106
+ function getPythonExecutable() {
107
+ const explicitPython = process.env.PYTHON_EXECUTABLE;
108
+ if (explicitPython && existsSync(explicitPython)) {
109
+ return explicitPython;
110
+ }
111
+ const activeVenv = process.env.VIRTUAL_ENV;
112
+ if (activeVenv) {
113
+ const activeVenvPython = join(activeVenv, 'bin', 'python');
114
+ if (existsSync(activeVenvPython)) {
115
+ return activeVenvPython;
116
+ }
117
+ }
118
+ const workspaceRoot = resolve(__dirname, '../../../..');
119
+ const candidatePaths = [
120
+ join(workspaceRoot, '.venv', 'bin', 'python'),
121
+ join(workspaceRoot, '..', 'dcyfr-workspace', '.venv', 'bin', 'python'),
122
+ '/usr/bin/python3',
123
+ ];
124
+ for (const candidate of candidatePaths) {
125
+ if (existsSync(candidate)) {
126
+ return candidate;
127
+ }
128
+ }
129
+ return 'python3';
130
+ }
131
+ /**
132
+ * Execute Python subprocess with timeout handling
133
+ */
134
+ async function executeSubprocess(python, args, options) {
135
+ const child = spawn(python, args, {
136
+ cwd: options.cwd,
137
+ timeout: options.timeout,
138
+ env: options.env,
139
+ });
140
+ let stdout = '';
141
+ let stderr = '';
142
+ child.stdout?.on('data', (data) => {
143
+ stdout += data.toString();
144
+ });
145
+ child.stderr?.on('data', (data) => {
146
+ stderr += data.toString();
147
+ });
148
+ const exitCode = await new Promise((resolve, reject) => {
149
+ const timeoutId = setTimeout(() => {
150
+ child.kill('SIGTERM');
151
+ setTimeout(() => child.kill('SIGKILL'), 1000);
152
+ reject(new ConversionError(ConversionErrorType.TIMEOUT, `Conversion exceeded timeout of ${options.timeout}ms`, { filePath: options.filePath, timeout: options.timeout }));
153
+ }, options.timeout);
154
+ child.on('exit', (code) => {
155
+ clearTimeout(timeoutId);
156
+ resolve(code);
157
+ });
158
+ child.on('error', (error) => {
159
+ clearTimeout(timeoutId);
160
+ reject(new ConversionError(ConversionErrorType.SUBPROCESS_ERROR, `Python subprocess failed: ${error.message}`, { error: error.message, python }));
161
+ });
162
+ });
163
+ return { stdout, stderr, exitCode };
164
+ }
165
+ /**
166
+ * Build conversion result metadata
167
+ */
168
+ async function buildMetadata(fileName, resolvedPath, format, durationMs, stderr, opts) {
169
+ const stats = await fs.stat(resolvedPath);
170
+ const metadata = {
171
+ fileName,
172
+ fileSize: stats.size,
173
+ format,
174
+ convertedAt: new Date().toISOString(),
175
+ durationMs,
176
+ usedLLMDescriptions: opts.enableLLMDescriptions,
177
+ };
178
+ const pageRegex = /(\p{N}+)\s+pages?/iu;
179
+ const pageMatch = pageRegex.exec(stderr);
180
+ if (pageMatch) {
181
+ metadata.pageCount = Number.parseInt(pageMatch[1], 10);
182
+ }
183
+ return metadata;
184
+ }
185
+ /**
186
+ * Validate subprocess execution result
187
+ */
188
+ function validateSubprocessResult(output, resolvedPath) {
189
+ if (output.exitCode !== 0) {
190
+ throw new ConversionError(ConversionErrorType.SUBPROCESS_ERROR, `MarkItDown conversion failed with exit code ${output.exitCode}`, { exitCode: output.exitCode, stderr: output.stderr, stdout: output.stdout, filePath: resolvedPath });
191
+ }
192
+ const markdown = output.stdout.trim();
193
+ if (!markdown) {
194
+ throw new ConversionError(ConversionErrorType.SUBPROCESS_ERROR, 'MarkItDown returned empty output', { stderr: output.stderr, filePath: resolvedPath });
195
+ }
196
+ return markdown;
197
+ }
198
+ /**
199
+ * Convert document to Markdown using Python MarkItDown subprocess
200
+ *
201
+ * @param filePath - Absolute path to file to convert
202
+ * @param options - Conversion options
203
+ * @returns Conversion result with markdown and metadata
204
+ *
205
+ * @throws {ConversionError} If conversion fails
206
+ *
207
+ * @example
208
+ * ```typescript
209
+ * const result = await convertToMarkdown('/path/to/document.pdf', {
210
+ * timeout: 45000,
211
+ * enableLLMDescriptions: true
212
+ * });
213
+ * console.log(result.markdown);
214
+ * ```
215
+ */
216
+ export async function convertToMarkdown(filePath, options = {}) {
217
+ const resolvedPath = resolve(filePath);
218
+ const opts = {
219
+ ...DEFAULT_OPTIONS,
220
+ ...options,
221
+ };
222
+ await validateFile(resolvedPath, opts.maxFileSize);
223
+ const format = detectFormat(resolvedPath);
224
+ const fileName = basename(resolvedPath);
225
+ let tempDir = null;
226
+ try {
227
+ tempDir = await createTempDir();
228
+ const python = getPythonExecutable();
229
+ const startMs = Date.now();
230
+ const output = await executeSubprocess(python, ['-m', 'markitdown', resolvedPath], {
231
+ cwd: tempDir,
232
+ timeout: opts.timeout,
233
+ env: {
234
+ ...process.env,
235
+ ...(opts.enableLLMDescriptions && {
236
+ OPENAI_API_KEY: process.env.OPENAI_API_KEY,
237
+ ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY,
238
+ LLM_MODEL: opts.llmModel || 'gpt-4-vision-preview',
239
+ }),
240
+ },
241
+ filePath: resolvedPath,
242
+ });
243
+ const markdown = validateSubprocessResult(output, resolvedPath);
244
+ const durationMs = Date.now() - startMs;
245
+ const metadata = await buildMetadata(fileName, resolvedPath, format, durationMs, output.stderr, opts);
246
+ return {
247
+ markdown,
248
+ metadata,
249
+ success: true,
250
+ warnings: output.stderr ? [output.stderr] : undefined,
251
+ };
252
+ }
253
+ catch (error) {
254
+ if (error instanceof ConversionError) {
255
+ throw error;
256
+ }
257
+ throw new ConversionError(ConversionErrorType.SUBPROCESS_ERROR, `Unexpected error during conversion: ${error instanceof Error ? error.message : String(error)}`, { error: error instanceof Error ? error.message : String(error), filePath: resolvedPath });
258
+ }
259
+ finally {
260
+ if (tempDir) {
261
+ await cleanupTempDir(tempDir);
262
+ }
263
+ }
264
+ }
265
+ /**
266
+ * Batch convert multiple documents
267
+ *
268
+ * @param filePaths - Array of file paths to convert
269
+ * @param options - Shared conversion options
270
+ * @returns Array of conversion results (same order as input)
271
+ */
272
+ export async function convertBatch(filePaths, options = {}) {
273
+ // Process conversions in parallel with concurrency limit
274
+ const MAX_CONCURRENT = 3;
275
+ const results = [];
276
+ for (let i = 0; i < filePaths.length; i += MAX_CONCURRENT) {
277
+ const batch = filePaths.slice(i, i + MAX_CONCURRENT);
278
+ const batchResults = await Promise.allSettled(batch.map((path) => convertToMarkdown(path, options)));
279
+ for (const result of batchResults) {
280
+ if (result.status === 'fulfilled') {
281
+ results.push(result.value);
282
+ }
283
+ else {
284
+ // Convert rejected promise to failed ConversionResult
285
+ const error = result.reason;
286
+ results.push({
287
+ markdown: '',
288
+ metadata: {
289
+ fileName: '',
290
+ fileSize: 0,
291
+ format: 'pdf',
292
+ convertedAt: new Date().toISOString(),
293
+ durationMs: 0,
294
+ },
295
+ success: false,
296
+ error: error instanceof Error ? error.message : String(error),
297
+ });
298
+ }
299
+ }
300
+ }
301
+ return results;
302
+ }
303
+ /**
304
+ * Check if Python MarkItDown is installed and accessible
305
+ */
306
+ export async function checkMarkItDownInstalled() {
307
+ try {
308
+ const python = getPythonExecutable();
309
+ const child = spawn(python, ['-c', 'import markitdown; print("ok")'], {
310
+ timeout: 5000,
311
+ });
312
+ return new Promise((resolve) => {
313
+ child.on('exit', (code) => {
314
+ resolve(code === 0);
315
+ });
316
+ child.on('error', () => {
317
+ resolve(false);
318
+ });
319
+ });
320
+ }
321
+ catch {
322
+ return false;
323
+ }
324
+ }
325
+ //# sourceMappingURL=markitdown-bridge.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"markitdown-bridge.js","sourceRoot":"","sources":["../../src/ingestion/markitdown-bridge.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACtE,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAOzC,OAAO,EAAE,eAAe,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AAElE,iCAAiC;AACjC,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;AAEtC;;GAEG;AACH,MAAM,eAAe,GAA+G;IAClI,OAAO,EAAE,KAAK,EAAE,aAAa;IAC7B,WAAW,EAAE,QAAQ,EAAE,OAAO;IAC9B,qBAAqB,EAAE,KAAK;IAC5B,gBAAgB,EAAE,IAAI;CACvB,CAAC;AAEF;;GAEG;AACH,MAAM,aAAa,GAAoC;IACrD,MAAM,EAAE,KAAK;IACb,OAAO,EAAE,MAAM;IACf,OAAO,EAAE,MAAM;IACf,OAAO,EAAE,MAAM;IACf,MAAM,EAAE,KAAK;IACb,OAAO,EAAE,MAAM;IACf,MAAM,EAAE,KAAK;IACb,MAAM,EAAE,KAAK;IACb,OAAO,EAAE,MAAM;IACf,MAAM,EAAE,KAAK;IACb,MAAM,EAAE,KAAK;IACb,OAAO,EAAE,MAAM;IACf,MAAM,EAAE,KAAK;IACb,OAAO,EAAE,MAAM;IACf,MAAM,EAAE,KAAK;IACb,MAAM,EAAE,KAAK;IACb,MAAM,EAAE,KAAK;IACb,OAAO,EAAE,MAAM;IACf,MAAM,EAAE,KAAK;CACd,CAAC;AAEF;;GAEG;AACH,SAAS,YAAY,CAAC,QAAgB;IACpC,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;IAClC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,eAAe,CACvB,mBAAmB,CAAC,kBAAkB,EACtC,4BAA4B,GAAG,EAAE,EACjC,EAAE,QAAQ,EAAE,SAAS,EAAE,GAAG,EAAE,CAC7B,CAAC;IACJ,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,aAAa;IAC1B,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,cAAc,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC;QACrG,MAAM,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC7C,OAAO,OAAO,CAAC;IACjB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,eAAe,CACvB,mBAAmB,CAAC,cAAc,EAClC,sCAAsC,EACtC,EAAE,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAClE,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,cAAc,CAAC,OAAe;IAC3C,IAAI,CAAC;QACH,MAAM,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IACzD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,yEAAyE;QACzE,OAAO,CAAC,IAAI,CAAC,oCAAoC,OAAO,GAAG,EAAE,KAAK,CAAC,CAAC;IACtE,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,YAAY,CAAC,QAAgB,EAAE,WAAmB;IAC/D,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAEtC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC;YACpB,MAAM,IAAI,eAAe,CACvB,mBAAmB,CAAC,cAAc,EAClC,oBAAoB,EACpB,EAAE,QAAQ,EAAE,CACb,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,GAAG,WAAW,EAAE,CAAC;YAC7B,MAAM,IAAI,eAAe,CACvB,mBAAmB,CAAC,cAAc,EAClC,aAAa,KAAK,CAAC,IAAI,wBAAwB,WAAW,QAAQ,EAClE,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,CAAC,IAAI,EAAE,WAAW,EAAE,CAChD,CAAC;QACJ,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,eAAe,EAAE,CAAC;YACrC,MAAM,KAAK,CAAC;QACd,CAAC;QACD,MAAM,IAAI,eAAe,CACvB,mBAAmB,CAAC,cAAc,EAClC,mCAAmC,QAAQ,EAAE,EAC7C,EAAE,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAClE,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB;IAC1B,MAAM,cAAc,GAAG,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC;IACrD,IAAI,cAAc,IAAI,UAAU,CAAC,cAAc,CAAC,EAAE,CAAC;QACjD,OAAO,cAAc,CAAC;IACxB,CAAC;IAED,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IAC3C,IAAI,UAAU,EAAE,CAAC;QACf,MAAM,gBAAgB,GAAG,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;QAC3D,IAAI,UAAU,CAAC,gBAAgB,CAAC,EAAE,CAAC;YACjC,OAAO,gBAAgB,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,MAAM,aAAa,GAAG,OAAO,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;IACxD,MAAM,cAAc,GAAG;QACrB,IAAI,CAAC,aAAa,EAAE,OAAO,EAAE,KAAK,EAAE,QAAQ,CAAC;QAC7C,IAAI,CAAC,aAAa,EAAE,IAAI,EAAE,iBAAiB,EAAE,OAAO,EAAE,KAAK,EAAE,QAAQ,CAAC;QACtE,kBAAkB;KACnB,CAAC;IAEF,KAAK,MAAM,SAAS,IAAI,cAAc,EAAE,CAAC;QACvC,IAAI,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YAC1B,OAAO,SAAS,CAAC;QACnB,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAWD;;GAEG;AACH,KAAK,UAAU,iBAAiB,CAC9B,MAAc,EACd,IAAc,EACd,OAKC;IAED,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,IAAI,EAAE;QAChC,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,GAAG,EAAE,OAAO,CAAC,GAAG;KACjB,CAAC,CAAC;IAEH,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,IAAI,MAAM,GAAG,EAAE,CAAC;IAEhB,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;QAChC,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;IAC5B,CAAC,CAAC,CAAC;IAEH,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;QAChC,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;IAC5B,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,MAAM,IAAI,OAAO,CAAgB,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACpE,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,EAAE;YAChC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACtB,UAAU,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,CAAC,CAAC;YAC9C,MAAM,CAAC,IAAI,eAAe,CACxB,mBAAmB,CAAC,OAAO,EAC3B,kCAAkC,OAAO,CAAC,OAAO,IAAI,EACrD,EAAE,QAAQ,EAAE,OAAO,CAAC,QAAQ,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,EAAE,CACzD,CAAC,CAAC;QACL,CAAC,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;QAEpB,KAAK,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;YACxB,YAAY,CAAC,SAAS,CAAC,CAAC;YACxB,OAAO,CAAC,IAAI,CAAC,CAAC;QAChB,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE;YAC1B,YAAY,CAAC,SAAS,CAAC,CAAC;YACxB,MAAM,CAAC,IAAI,eAAe,CACxB,mBAAmB,CAAC,gBAAgB,EACpC,6BAA6B,KAAK,CAAC,OAAO,EAAE,EAC5C,EAAE,KAAK,EAAE,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,CACjC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;AACtC,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,aAAa,CAC1B,QAAgB,EAChB,YAAoB,EACpB,MAAuB,EACvB,UAAkB,EAClB,MAAc,EACd,IAAgE;IAEhE,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAE1C,MAAM,QAAQ,GAAqB;QACjC,QAAQ;QACR,QAAQ,EAAE,KAAK,CAAC,IAAI;QACpB,MAAM;QACN,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACrC,UAAU;QACV,mBAAmB,EAAE,IAAI,CAAC,qBAAqB;KAChD,CAAC;IAEF,MAAM,SAAS,GAAG,qBAAqB,CAAC;IACxC,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACzC,IAAI,SAAS,EAAE,CAAC;QACd,QAAQ,CAAC,SAAS,GAAG,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACzD,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,SAAS,wBAAwB,CAC/B,MAAwB,EACxB,YAAoB;IAEpB,IAAI,MAAM,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC;QAC1B,MAAM,IAAI,eAAe,CACvB,mBAAmB,CAAC,gBAAgB,EACpC,+CAA+C,MAAM,CAAC,QAAQ,EAAE,EAChE,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,CACpG,CAAC;IACJ,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;IACtC,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,MAAM,IAAI,eAAe,CACvB,mBAAmB,CAAC,gBAAgB,EACpC,kCAAkC,EAClC,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,CAClD,CAAC;IACJ,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,QAAgB,EAChB,UAA6B,EAAE;IAE/B,MAAM,YAAY,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IACvC,MAAM,IAAI,GAAG;QACX,GAAG,eAAe;QAClB,GAAG,OAAO;KACX,CAAC;IAEF,MAAM,YAAY,CAAC,YAAY,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;IAEnD,MAAM,MAAM,GAAG,YAAY,CAAC,YAAY,CAAC,CAAC;IAC1C,MAAM,QAAQ,GAAG,QAAQ,CAAC,YAAY,CAAC,CAAC;IAExC,IAAI,OAAO,GAAkB,IAAI,CAAC;IAClC,IAAI,CAAC;QACH,OAAO,GAAG,MAAM,aAAa,EAAE,CAAC;QAChC,MAAM,MAAM,GAAG,mBAAmB,EAAE,CAAC;QACrC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE3B,MAAM,MAAM,GAAG,MAAM,iBAAiB,CACpC,MAAM,EACN,CAAC,IAAI,EAAE,YAAY,EAAE,YAAY,CAAC,EAClC;YACE,GAAG,EAAE,OAAO;YACZ,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,GAAG,EAAE;gBACH,GAAG,OAAO,CAAC,GAAG;gBACd,GAAG,CAAC,IAAI,CAAC,qBAAqB,IAAI;oBAChC,cAAc,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;oBAC1C,iBAAiB,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB;oBAChD,SAAS,EAAE,IAAI,CAAC,QAAQ,IAAI,sBAAsB;iBACnD,CAAC;aACH;YACD,QAAQ,EAAE,YAAY;SACvB,CACF,CAAC;QAEF,MAAM,QAAQ,GAAG,wBAAwB,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;QAChE,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC;QACxC,MAAM,QAAQ,GAAG,MAAM,aAAa,CAClC,QAAQ,EACR,YAAY,EACZ,MAAM,EACN,UAAU,EACV,MAAM,CAAC,MAAM,EACb,IAAI,CACL,CAAC;QAEF,OAAO;YACL,QAAQ;YACR,QAAQ;YACR,OAAO,EAAE,IAAI;YACb,QAAQ,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS;SACtD,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,eAAe,EAAE,CAAC;YACrC,MAAM,KAAK,CAAC;QACd,CAAC;QAED,MAAM,IAAI,eAAe,CACvB,mBAAmB,CAAC,gBAAgB,EACpC,uCAAuC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,EAC/F,EAAE,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,QAAQ,EAAE,YAAY,EAAE,CAC1F,CAAC;IACJ,CAAC;YAAS,CAAC;QACT,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,cAAc,CAAC,OAAO,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,SAAmB,EACnB,UAA6B,EAAE;IAE/B,yDAAyD;IACzD,MAAM,cAAc,GAAG,CAAC,CAAC;IACzB,MAAM,OAAO,GAAuB,EAAE,CAAC;IAEvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,IAAI,cAAc,EAAE,CAAC;QAC1D,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,cAAc,CAAC,CAAC;QACrD,MAAM,YAAY,GAAG,MAAM,OAAO,CAAC,UAAU,CAC3C,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CACtD,CAAC;QAEF,KAAK,MAAM,MAAM,IAAI,YAAY,EAAE,CAAC;YAClC,IAAI,MAAM,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBAClC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC7B,CAAC;iBAAM,CAAC;gBACN,sDAAsD;gBACtD,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC;gBAC5B,OAAO,CAAC,IAAI,CAAC;oBACX,QAAQ,EAAE,EAAE;oBACZ,QAAQ,EAAE;wBACR,QAAQ,EAAE,EAAE;wBACZ,QAAQ,EAAE,CAAC;wBACX,MAAM,EAAE,KAAK;wBACb,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;wBACrC,UAAU,EAAE,CAAC;qBACd;oBACD,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;iBAC9D,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,wBAAwB;IAC5C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,mBAAmB,EAAE,CAAC;QACrC,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,gCAAgC,CAAC,EAAE;YACpE,OAAO,EAAE,IAAI;SACd,CAAC,CAAC;QAEH,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;YAC7B,KAAK,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;gBACxB,OAAO,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC;YACtB,CAAC,CAAC,CAAC;YAEH,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;gBACrB,OAAO,CAAC,KAAK,CAAC,CAAC;YACjB,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC"}
@@ -0,0 +1,158 @@
1
+ /**
2
+ * MarkItDown document conversion types
3
+ * @module @dcyfr/ai-rag/ingestion
4
+ */
5
+ /**
6
+ * Supported file formats for conversion
7
+ */
8
+ export type SupportedFormat = 'pdf' | 'docx' | 'pptx' | 'xlsx' | 'csv' | 'html' | 'htm' | 'xml' | 'json' | 'png' | 'jpg' | 'jpeg' | 'gif' | 'webp' | 'mp3' | 'wav' | 'm4a' | 'epub' | 'zip';
9
+ /**
10
+ * Options for document conversion
11
+ */
12
+ export interface ConversionOptions {
13
+ /**
14
+ * Maximum time to wait for conversion (milliseconds)
15
+ * @default 30000 (30 seconds)
16
+ */
17
+ timeout?: number;
18
+ /**
19
+ * Maximum file size to process (bytes)
20
+ * @default 52428800 (50MB)
21
+ */
22
+ maxFileSize?: number;
23
+ /**
24
+ * Enable LLM-powered image descriptions (requires API key)
25
+ * Supports: OpenAI GPT-4 Vision, Anthropic Claude
26
+ * @default false
27
+ */
28
+ enableLLMDescriptions?: boolean;
29
+ /**
30
+ * LLM model to use for image descriptions
31
+ * @default "gpt-4-vision-preview" or "claude-3-opus-20240229"
32
+ */
33
+ llmModel?: string;
34
+ /**
35
+ * Preserve original file metadata in result
36
+ * @default true
37
+ */
38
+ preserveMetadata?: boolean;
39
+ /**
40
+ * Working directory for temporary files
41
+ * Auto-created and cleaned up after conversion
42
+ * @default system temp directory
43
+ */
44
+ workDir?: string;
45
+ }
46
+ /**
47
+ * Metadata about the converted document
48
+ */
49
+ export interface DocumentMetadata {
50
+ /**
51
+ * Original file name
52
+ */
53
+ fileName: string;
54
+ /**
55
+ * File size in bytes
56
+ */
57
+ fileSize: number;
58
+ /**
59
+ * Detected or specified file format
60
+ */
61
+ format: SupportedFormat;
62
+ /**
63
+ * Conversion timestamp (ISO 8601)
64
+ */
65
+ convertedAt: string;
66
+ /**
67
+ * Conversion duration in milliseconds
68
+ */
69
+ durationMs: number;
70
+ /**
71
+ * Number of pages (PDF, DOCX, PPTX) or sections
72
+ */
73
+ pageCount?: number;
74
+ /**
75
+ * Whether LLM descriptions were used
76
+ */
77
+ usedLLMDescriptions?: boolean;
78
+ /**
79
+ * Additional format-specific metadata
80
+ */
81
+ [key: string]: unknown;
82
+ }
83
+ /**
84
+ * Result of document conversion
85
+ */
86
+ export interface ConversionResult {
87
+ /**
88
+ * Converted markdown content
89
+ */
90
+ markdown: string;
91
+ /**
92
+ * Document metadata
93
+ */
94
+ metadata: DocumentMetadata;
95
+ /**
96
+ * Conversion success status
97
+ */
98
+ success: boolean;
99
+ /**
100
+ * Error message if conversion failed
101
+ */
102
+ error?: string;
103
+ /**
104
+ * Warning messages (non-fatal issues)
105
+ */
106
+ warnings?: string[];
107
+ }
108
+ /**
109
+ * Error types for conversion failures
110
+ */
111
+ export declare enum ConversionErrorType {
112
+ /** File not found or inaccessible */
113
+ FILE_NOT_FOUND = "FILE_NOT_FOUND",
114
+ /** File exceeds maximum size limit */
115
+ FILE_TOO_LARGE = "FILE_TOO_LARGE",
116
+ /** File format not supported by MarkItDown */
117
+ UNSUPPORTED_FORMAT = "UNSUPPORTED_FORMAT",
118
+ /** Conversion timeout exceeded */
119
+ TIMEOUT = "TIMEOUT",
120
+ /** Python subprocess failed to start or crashed */
121
+ SUBPROCESS_ERROR = "SUBPROCESS_ERROR",
122
+ /** Temporary directory creation/cleanup failed */
123
+ TEMP_DIR_ERROR = "TEMP_DIR_ERROR",
124
+ /** Invalid conversion options provided */
125
+ INVALID_OPTIONS = "INVALID_OPTIONS",
126
+ /** Python environment or MarkItDown not installed */
127
+ PYTHON_ENV_ERROR = "PYTHON_ENV_ERROR",
128
+ /** LLM API call failed (if enableLLMDescriptions=true) */
129
+ LLM_API_ERROR = "LLM_API_ERROR"
130
+ }
131
+ /**
132
+ * Conversion error with typed error code
133
+ */
134
+ export declare class ConversionError extends Error {
135
+ readonly type: ConversionErrorType;
136
+ readonly details?: Record<string, unknown> | undefined;
137
+ constructor(type: ConversionErrorType, message: string, details?: Record<string, unknown> | undefined);
138
+ }
139
+ /**
140
+ * Internal subprocess communication message format
141
+ */
142
+ export interface SubprocessMessage {
143
+ /** Message type: request or response */
144
+ type: 'request' | 'response';
145
+ /** File path to convert */
146
+ filePath?: string;
147
+ /** Conversion options */
148
+ options?: ConversionOptions;
149
+ /** Conversion result */
150
+ result?: ConversionResult;
151
+ /** Error information */
152
+ error?: {
153
+ type: string;
154
+ message: string;
155
+ details?: Record<string, unknown>;
156
+ };
157
+ }
158
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/ingestion/types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;GAEG;AACH,MAAM,MAAM,eAAe,GACvB,KAAK,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,GAAG,KAAK,GACxC,MAAM,GAAG,KAAK,GAAG,KAAK,GAAG,MAAM,GAC/B,KAAK,GAAG,KAAK,GAAG,MAAM,GAAG,KAAK,GAAG,MAAM,GACvC,KAAK,GAAG,KAAK,GAAG,KAAK,GACrB,MAAM,GAAG,KAAK,CAAC;AAEnB;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB;;;;OAIG;IACH,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAEhC;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;;OAGG;IACH,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAE3B;;;;OAIG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,MAAM,EAAE,eAAe,CAAC;IAExB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IAEpB;;OAEG;IACH,UAAU,EAAE,MAAM,CAAC;IAEnB;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;OAEG;IACH,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAE9B;;OAEG;IACH,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,QAAQ,EAAE,gBAAgB,CAAC;IAE3B;;OAEG;IACH,OAAO,EAAE,OAAO,CAAC;IAEjB;;OAEG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf;;OAEG;IACH,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AAED;;GAEG;AACH,oBAAY,mBAAmB;IAC7B,qCAAqC;IACrC,cAAc,mBAAmB;IAEjC,sCAAsC;IACtC,cAAc,mBAAmB;IAEjC,8CAA8C;IAC9C,kBAAkB,uBAAuB;IAEzC,kCAAkC;IAClC,OAAO,YAAY;IAEnB,mDAAmD;IACnD,gBAAgB,qBAAqB;IAErC,kDAAkD;IAClD,cAAc,mBAAmB;IAEjC,0CAA0C;IAC1C,eAAe,oBAAoB;IAEnC,qDAAqD;IACrD,gBAAgB,qBAAqB;IAErC,0DAA0D;IAC1D,aAAa,kBAAkB;CAChC;AAED;;GAEG;AACH,qBAAa,eAAgB,SAAQ,KAAK;aAEtB,IAAI,EAAE,mBAAmB;aAEzB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC;gBAFjC,IAAI,EAAE,mBAAmB,EACzC,OAAO,EAAE,MAAM,EACC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,YAAA;CAKpD;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,wCAAwC;IACxC,IAAI,EAAE,SAAS,GAAG,UAAU,CAAC;IAE7B,2BAA2B;IAC3B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB,yBAAyB;IACzB,OAAO,CAAC,EAAE,iBAAiB,CAAC;IAE5B,wBAAwB;IACxB,MAAM,CAAC,EAAE,gBAAgB,CAAC;IAE1B,wBAAwB;IACxB,KAAK,CAAC,EAAE;QACN,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;QAChB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACnC,CAAC;CACH"}
@@ -0,0 +1,42 @@
1
+ /**
2
+ * MarkItDown document conversion types
3
+ * @module @dcyfr/ai-rag/ingestion
4
+ */
5
+ /**
6
+ * Error types for conversion failures
7
+ */
8
+ export var ConversionErrorType;
9
+ (function (ConversionErrorType) {
10
+ /** File not found or inaccessible */
11
+ ConversionErrorType["FILE_NOT_FOUND"] = "FILE_NOT_FOUND";
12
+ /** File exceeds maximum size limit */
13
+ ConversionErrorType["FILE_TOO_LARGE"] = "FILE_TOO_LARGE";
14
+ /** File format not supported by MarkItDown */
15
+ ConversionErrorType["UNSUPPORTED_FORMAT"] = "UNSUPPORTED_FORMAT";
16
+ /** Conversion timeout exceeded */
17
+ ConversionErrorType["TIMEOUT"] = "TIMEOUT";
18
+ /** Python subprocess failed to start or crashed */
19
+ ConversionErrorType["SUBPROCESS_ERROR"] = "SUBPROCESS_ERROR";
20
+ /** Temporary directory creation/cleanup failed */
21
+ ConversionErrorType["TEMP_DIR_ERROR"] = "TEMP_DIR_ERROR";
22
+ /** Invalid conversion options provided */
23
+ ConversionErrorType["INVALID_OPTIONS"] = "INVALID_OPTIONS";
24
+ /** Python environment or MarkItDown not installed */
25
+ ConversionErrorType["PYTHON_ENV_ERROR"] = "PYTHON_ENV_ERROR";
26
+ /** LLM API call failed (if enableLLMDescriptions=true) */
27
+ ConversionErrorType["LLM_API_ERROR"] = "LLM_API_ERROR";
28
+ })(ConversionErrorType || (ConversionErrorType = {}));
29
+ /**
30
+ * Conversion error with typed error code
31
+ */
32
+ export class ConversionError extends Error {
33
+ type;
34
+ details;
35
+ constructor(type, message, details) {
36
+ super(message);
37
+ this.type = type;
38
+ this.details = details;
39
+ this.name = 'ConversionError';
40
+ }
41
+ }
42
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/ingestion/types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAkIH;;GAEG;AACH,MAAM,CAAN,IAAY,mBA2BX;AA3BD,WAAY,mBAAmB;IAC7B,qCAAqC;IACrC,wDAAiC,CAAA;IAEjC,sCAAsC;IACtC,wDAAiC,CAAA;IAEjC,8CAA8C;IAC9C,gEAAyC,CAAA;IAEzC,kCAAkC;IAClC,0CAAmB,CAAA;IAEnB,mDAAmD;IACnD,4DAAqC,CAAA;IAErC,kDAAkD;IAClD,wDAAiC,CAAA;IAEjC,0CAA0C;IAC1C,0DAAmC,CAAA;IAEnC,qDAAqD;IACrD,4DAAqC,CAAA;IAErC,0DAA0D;IAC1D,sDAA+B,CAAA;AACjC,CAAC,EA3BW,mBAAmB,KAAnB,mBAAmB,QA2B9B;AAED;;GAEG;AACH,MAAM,OAAO,eAAgB,SAAQ,KAAK;IAEtB;IAEA;IAHlB,YACkB,IAAyB,EACzC,OAAe,EACC,OAAiC;QAEjD,KAAK,CAAC,OAAO,CAAC,CAAC;QAJC,SAAI,GAAJ,IAAI,CAAqB;QAEzB,YAAO,GAAP,OAAO,CAA0B;QAGjD,IAAI,CAAC,IAAI,GAAG,iBAAiB,CAAC;IAChC,CAAC;CACF"}
@@ -7,21 +7,33 @@ import type { DocumentLoader, EmbeddingGenerator, VectorStore, IngestionOptions,
7
7
  * Pipeline for ingesting documents into vector store
8
8
  */
9
9
  export declare class IngestionPipeline {
10
- private loader;
11
- private embedder;
12
- private store;
10
+ private readonly loader;
11
+ private readonly embedder;
12
+ private readonly store;
13
13
  constructor(loader: DocumentLoader, embedder: EmbeddingGenerator, store: VectorStore);
14
14
  /**
15
15
  * Ingest one or more documents
16
16
  */
17
17
  ingest(filePaths: string | string[], options?: IngestionOptions): Promise<IngestionResult>;
18
+ private loadDocumentsForPath;
19
+ private processAndStoreDocuments;
20
+ private extractErrorType;
21
+ private updateMemoryMetrics;
18
22
  /**
19
23
  * Process documents: chunk and embed
20
24
  */
21
25
  private processDocuments;
22
26
  /**
23
- * Simple chunking (override with loader-specific chunking if available)
27
+ * Fixed-size chunking with overlap
24
28
  */
25
- private chunkDocument;
29
+ private fixedChunkDocument;
30
+ /**
31
+ * Semantic chunking preserving heading-based sections where possible
32
+ */
33
+ private semanticChunkDocument;
34
+ /**
35
+ * Infer document type from file extension
36
+ */
37
+ private inferDocumentType;
26
38
  }
27
39
  //# sourceMappingURL=pipeline.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../../src/pipeline/ingestion/pipeline.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAGV,cAAc,EACd,kBAAkB,EAClB,WAAW,EACX,gBAAgB,EAChB,eAAe,EAChB,MAAM,sBAAsB,CAAC;AAE9B;;GAEG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,QAAQ,CAAqB;IACrC,OAAO,CAAC,KAAK,CAAc;gBAGzB,MAAM,EAAE,cAAc,EACtB,QAAQ,EAAE,kBAAkB,EAC5B,KAAK,EAAE,WAAW;IAOpB;;OAEG;IACG,MAAM,CACV,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE,EAC5B,OAAO,CAAC,EAAE,gBAAgB,GACzB,OAAO,CAAC,eAAe,CAAC;IA6D3B;;OAEG;YACW,gBAAgB;IAqB9B;;OAEG;IACH,OAAO,CAAC,aAAa;CAgCtB"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../../src/pipeline/ingestion/pipeline.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAGV,cAAc,EACd,kBAAkB,EAClB,WAAW,EACX,gBAAgB,EAChB,eAAe,EAChB,MAAM,sBAAsB,CAAC;AAG9B;;GAEG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAiB;IACxC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAqB;IAC9C,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAc;gBAGlC,MAAM,EAAE,cAAc,EACtB,QAAQ,EAAE,kBAAkB,EAC5B,KAAK,EAAE,WAAW;IAOpB;;OAEG;IACG,MAAM,CACV,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE,EAC5B,OAAO,CAAC,EAAE,gBAAgB,GACzB,OAAO,CAAC,eAAe,CAAC;YA+Fb,oBAAoB;YA0CpB,wBAAwB;IAsCtC,OAAO,CAAC,gBAAgB;IAQxB,OAAO,CAAC,mBAAmB;IAW3B;;OAEG;YACW,gBAAgB;IAmC9B;;OAEG;IACH,OAAO,CAAC,kBAAkB;IA+B1B;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAsC7B;;OAEG;IACH,OAAO,CAAC,iBAAiB;CAS1B"}
@@ -2,6 +2,7 @@
2
2
  * Document ingestion pipeline
3
3
  * Orchestrates loading, embedding, and storage
4
4
  */
5
+ import { convertToMarkdown } from '../../ingestion/index.js';
5
6
  /**
6
7
  * Pipeline for ingesting documents into vector store
7
8
  */
@@ -20,58 +21,150 @@ export class IngestionPipeline {
20
21
  async ingest(filePaths, options) {
21
22
  const paths = Array.isArray(filePaths) ? filePaths : [filePaths];
22
23
  const startTime = Date.now();
23
- const { batchSize = 32, onProgress, loaderConfig, } = options ?? {};
24
+ const { batchSize = 32, onProgress, loaderConfig, enableDocumentConversion = false, conversionTimeoutMs = 30000, conversionMaxFileSize = 50 * 1024 * 1024, enableLLMDescriptions = false, chunkingStrategy = 'fixed', fixedChunkSize, fixedChunkOverlap, onConversionError, } = options ?? {};
24
25
  let totalDocuments = 0;
25
26
  let totalChunks = 0;
27
+ let successCount = 0;
28
+ let failureCount = 0;
26
29
  const errors = [];
30
+ const conversionDurationsMs = [];
31
+ let peakHeapUsedBytes = process.memoryUsage().heapUsed;
32
+ const warnings = [];
27
33
  for (let i = 0; i < paths.length; i++) {
28
34
  const path = paths[i];
29
35
  try {
30
- // Load documents
31
- const documents = await this.loader.load(path, loaderConfig);
36
+ const documents = await this.loadDocumentsForPath(path, {
37
+ enableDocumentConversion,
38
+ conversionTimeoutMs,
39
+ conversionMaxFileSize,
40
+ enableLLMDescriptions,
41
+ loaderConfig,
42
+ conversionDurationsMs,
43
+ });
32
44
  totalDocuments += documents.length;
33
- // Chunk and embed in batches
34
- const allChunks = [];
35
- for (let j = 0; j < documents.length; j += batchSize) {
36
- const batch = documents.slice(j, j + batchSize);
37
- const chunks = await this.processDocuments(batch);
38
- allChunks.push(...chunks);
39
- if (onProgress) {
40
- onProgress(i + 1, paths.length, {
41
- currentFile: path,
42
- documentsProcessed: Math.min(j + batchSize, documents.length),
43
- totalDocuments: documents.length,
44
- chunksGenerated: allChunks.length,
45
- });
46
- }
47
- }
48
- // Store chunks
49
- await this.store.addDocuments(allChunks);
50
- totalChunks += allChunks.length;
45
+ const chunkCount = await this.processAndStoreDocuments(path, documents, {
46
+ batchSize,
47
+ chunkingStrategy,
48
+ fixedChunkSize,
49
+ fixedChunkOverlap,
50
+ onProgress,
51
+ fileIndex: i,
52
+ totalFiles: paths.length,
53
+ });
54
+ totalChunks += chunkCount;
55
+ successCount++;
51
56
  }
52
57
  catch (error) {
53
- errors.push({
54
- file: path,
55
- error: error instanceof Error ? error.message : String(error),
56
- });
58
+ failureCount++;
59
+ const errorMessage = error instanceof Error ? error.message : String(error);
60
+ errors.push({ file: path, error: errorMessage });
61
+ if (enableDocumentConversion && onConversionError) {
62
+ onConversionError({
63
+ file: path,
64
+ error: errorMessage,
65
+ errorType: this.extractErrorType(error),
66
+ });
67
+ }
57
68
  }
69
+ peakHeapUsedBytes = this.updateMemoryMetrics(path, peakHeapUsedBytes, warnings);
58
70
  }
59
71
  const endTime = Date.now();
72
+ const durationMs = endTime - startTime;
73
+ const averageConversionMs = conversionDurationsMs.length > 0
74
+ ? conversionDurationsMs.reduce((sum, ms) => sum + ms, 0) / conversionDurationsMs.length
75
+ : undefined;
76
+ const documentsPerSecond = durationMs > 0 ? (successCount / durationMs) * 1000 : 0;
60
77
  return {
61
78
  documentsProcessed: totalDocuments,
79
+ successCount,
80
+ failureCount,
62
81
  chunksGenerated: totalChunks,
63
82
  errors,
64
- durationMs: endTime - startTime,
83
+ metrics: {
84
+ averageConversionMs,
85
+ documentsPerSecond,
86
+ peakHeapUsedMb: peakHeapUsedBytes / (1024 * 1024),
87
+ warnings,
88
+ },
89
+ durationMs,
65
90
  };
66
91
  }
92
+ async loadDocumentsForPath(path, config) {
93
+ if (!config.enableDocumentConversion) {
94
+ return this.loader.load(path, config.loaderConfig);
95
+ }
96
+ const conversionStart = Date.now();
97
+ const converted = await convertToMarkdown(path, {
98
+ timeout: config.conversionTimeoutMs,
99
+ maxFileSize: config.conversionMaxFileSize,
100
+ enableLLMDescriptions: config.enableLLMDescriptions,
101
+ preserveMetadata: true,
102
+ });
103
+ config.conversionDurationsMs.push(Date.now() - conversionStart);
104
+ return [
105
+ {
106
+ id: `${path}-${Date.now()}`,
107
+ content: converted.markdown,
108
+ metadata: {
109
+ source: path,
110
+ type: this.inferDocumentType(path),
111
+ createdAt: new Date(),
112
+ conversionTimestamp: converted.metadata.convertedAt,
113
+ originalFileType: converted.metadata.format,
114
+ pageCount: converted.metadata.pageCount,
115
+ conversionDurationMs: converted.metadata.durationMs,
116
+ usedLLMDescriptions: converted.metadata.usedLLMDescriptions,
117
+ },
118
+ },
119
+ ];
120
+ }
121
+ async processAndStoreDocuments(path, documents, config) {
122
+ const allChunks = [];
123
+ for (let j = 0; j < documents.length; j += config.batchSize) {
124
+ const batch = documents.slice(j, j + config.batchSize);
125
+ const chunks = await this.processDocuments(batch, {
126
+ chunkingStrategy: config.chunkingStrategy,
127
+ fixedChunkSize: config.fixedChunkSize,
128
+ fixedChunkOverlap: config.fixedChunkOverlap,
129
+ });
130
+ allChunks.push(...chunks);
131
+ if (config.onProgress) {
132
+ config.onProgress(config.fileIndex + 1, config.totalFiles, {
133
+ currentFile: path,
134
+ documentsProcessed: Math.min(j + config.batchSize, documents.length),
135
+ totalDocuments: documents.length,
136
+ chunksGenerated: allChunks.length,
137
+ });
138
+ }
139
+ }
140
+ await this.store.addDocuments(allChunks);
141
+ return allChunks.length;
142
+ }
143
+ extractErrorType(error) {
144
+ if (typeof error !== 'object' || error === null || !('type' in error)) {
145
+ return undefined;
146
+ }
147
+ const rawType = error.type;
148
+ return typeof rawType === 'string' ? rawType : undefined;
149
+ }
150
+ updateMemoryMetrics(path, currentPeak, warnings) {
151
+ const currentHeap = process.memoryUsage().heapUsed;
152
+ const nextPeak = Math.max(currentHeap, currentPeak);
153
+ if (currentHeap > 512 * 1024 * 1024) {
154
+ warnings.push(`High memory usage detected while processing ${path}: ${(currentHeap / (1024 * 1024)).toFixed(1)}MB`);
155
+ }
156
+ return nextPeak;
157
+ }
67
158
  /**
68
159
  * Process documents: chunk and embed
69
160
  */
70
- async processDocuments(documents) {
161
+ async processDocuments(documents, options) {
71
162
  const chunks = [];
72
163
  for (const doc of documents) {
73
- // Split into chunks (using simple text splitting)
74
- const docChunks = this.chunkDocument(doc);
164
+ // Split into chunks
165
+ const docChunks = options?.chunkingStrategy === 'semantic'
166
+ ? this.semanticChunkDocument(doc)
167
+ : this.fixedChunkDocument(doc, options?.fixedChunkSize ?? 1000, options?.fixedChunkOverlap ?? 200);
75
168
  chunks.push(...docChunks);
76
169
  }
77
170
  // Generate embeddings
@@ -84,11 +177,9 @@ export class IngestionPipeline {
84
177
  return chunks;
85
178
  }
86
179
  /**
87
- * Simple chunking (override with loader-specific chunking if available)
180
+ * Fixed-size chunking with overlap
88
181
  */
89
- chunkDocument(doc) {
90
- const chunkSize = 1000;
91
- const overlap = 200;
182
+ fixedChunkDocument(doc, chunkSize, overlap) {
92
183
  const chunks = [];
93
184
  const content = doc.content;
94
185
  for (let i = 0; i < content.length; i += chunkSize - overlap) {
@@ -114,5 +205,57 @@ export class IngestionPipeline {
114
205
  }
115
206
  return chunks;
116
207
  }
208
+ /**
209
+ * Semantic chunking preserving heading-based sections where possible
210
+ */
211
+ semanticChunkDocument(doc) {
212
+ const chunks = [];
213
+ const sections = doc.content
214
+ .split(/\n(?=#{1,6}\s)/g)
215
+ .filter((section) => section.trim().length > 0);
216
+ if (sections.length <= 1) {
217
+ return this.fixedChunkDocument(doc, 1000, 200);
218
+ }
219
+ let cursor = 0;
220
+ for (let i = 0; i < sections.length; i++) {
221
+ const section = sections[i];
222
+ const startChar = doc.content.indexOf(section, cursor);
223
+ const endChar = startChar + section.length;
224
+ const chunkId = `${doc.id}-chunk-${i}`;
225
+ chunks.push({
226
+ id: chunkId,
227
+ documentId: doc.id,
228
+ content: section,
229
+ index: i,
230
+ metadata: {
231
+ chunkIndex: i,
232
+ chunkCount: sections.length,
233
+ startChar,
234
+ endChar,
235
+ chunkingStrategy: 'semantic',
236
+ ...doc.metadata,
237
+ },
238
+ });
239
+ cursor = endChar;
240
+ }
241
+ return chunks;
242
+ }
243
+ /**
244
+ * Infer document type from file extension
245
+ */
246
+ inferDocumentType(path) {
247
+ const lowerPath = path.toLowerCase();
248
+ if (lowerPath.endsWith('.pdf'))
249
+ return 'pdf';
250
+ if (lowerPath.endsWith('.md') || lowerPath.endsWith('.markdown'))
251
+ return 'markdown';
252
+ if (lowerPath.endsWith('.html') || lowerPath.endsWith('.htm'))
253
+ return 'html';
254
+ if (lowerPath.endsWith('.json'))
255
+ return 'json';
256
+ if (lowerPath.endsWith('.txt'))
257
+ return 'text';
258
+ return 'other';
259
+ }
117
260
  }
118
261
  //# sourceMappingURL=pipeline.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../../src/pipeline/ingestion/pipeline.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAYH;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,MAAM,CAAiB;IACvB,QAAQ,CAAqB;IAC7B,KAAK,CAAc;IAE3B,YACE,MAAsB,EACtB,QAA4B,EAC5B,KAAkB;QAElB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CACV,SAA4B,EAC5B,OAA0B;QAE1B,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QACjE,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,MAAM,EACJ,SAAS,GAAG,EAAE,EACd,UAAU,EACV,YAAY,GACb,GAAG,OAAO,IAAI,EAAE,CAAC;QAElB,IAAI,cAAc,GAAG,CAAC,CAAC;QACvB,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,MAAM,MAAM,GAA2C,EAAE,CAAC;QAE1D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAEtB,IAAI,CAAC;gBACH,iBAAiB;gBACjB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;gBAC7D,cAAc,IAAI,SAAS,CAAC,MAAM,CAAC;gBAEnC,6BAA6B;gBAC7B,MAAM,SAAS,GAAoB,EAAE,CAAC;gBAEtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;oBACrD,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;oBAChD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;oBAClD,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;oBAE1B,IAAI,UAAU,EAAE,CAAC;wBACf,UAAU,CAAC,CAAC,GAAG,CAAC,EAAE,KAAK,CAAC,MAAM,EAAE;4BAC9B,WAAW,EAAE,IAAI;4BACjB,kBAAkB,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,SAAS,CAAC,MAAM,CAAC;4BAC7D,cAAc,EAAE,SAAS,CAAC,MAAM;4BAChC,eAAe,EAAE,SAAS,CAAC,MAAM;yBAClC,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;gBAED,eAAe;gBACf,MAAM,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;gBACzC,WAAW,IAAI,SAAS,CAAC,MAAM,CAAC;YAClC,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,IAAI;oBACV,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;iBAC9D,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE3B,OAAO;YACL,kBAAkB,EAAE,cAAc;YAClC,eAAe,EAAE,WAAW;YAC5B,MAAM;YACN,UAAU,EAAE,OAAO,GAAG,SAAS;SAChC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,gBAAgB,CAAC,SAAqB;QAClD,MAAM,MAAM,GAAoB,EAAE,CAAC;QAEnC,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;YAC5B,kDAAkD;YAClD,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;YAC1C,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;QAC5B,CAAC;QAED,sBAAsB;QACtB,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACnD,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAEpD,oBAAoB;QACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;QACtC,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,GAAa;QACjC,MAAM,SAAS,GAAG,IAAI,CAAC;QACvB,MAAM,OAAO,GAAG,GAAG,CAAC;QACpB,MAAM,MAAM,GAAoB,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC;QAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,GAAG,OAAO,EAAE,CAAC;YAC7D,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;YAC9C,MAAM,OAAO,GAAG,GAAG,GAAG,CAAC,EAAE,UAAU,MAAM,CAAC,MAAM,EAAE,CAAC;YAEnD,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,OAAO;gBACX,UAAU,EAAE,GAAG,CAAC,EAAE;gBAClB,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,MAAM,CAAC,MAAM;gBACpB,QAAQ,EAAE;oBACR,UAAU,EAAE,MAAM,CAAC,MAAM;oBACzB,UAAU,EAAE,CAAC,EAAE,oBAAoB;oBACnC,SAAS,EAAE,CAAC;oBACZ,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,OAAO,CAAC,MAAM,CAAC;oBAChD,GAAG,GAAG,CAAC,QAAQ;iBAChB;aACF,CAAC,CAAC;QACL,CAAC;QAED,sBAAsB;QACtB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,KAAK,CAAC,QAAQ,CAAC,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5C,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
1
+ {"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../../src/pipeline/ingestion/pipeline.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAWH,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAE7D;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACX,MAAM,CAAiB;IACvB,QAAQ,CAAqB;IAC7B,KAAK,CAAc;IAEpC,YACE,MAAsB,EACtB,QAA4B,EAC5B,KAAkB;QAElB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CACV,SAA4B,EAC5B,OAA0B;QAE1B,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QACjE,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,MAAM,EACJ,SAAS,GAAG,EAAE,EACd,UAAU,EACV,YAAY,EACZ,wBAAwB,GAAG,KAAK,EAChC,mBAAmB,GAAG,KAAK,EAC3B,qBAAqB,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,EACxC,qBAAqB,GAAG,KAAK,EAC7B,gBAAgB,GAAG,OAAO,EAC1B,cAAc,EACd,iBAAiB,EACjB,iBAAiB,GAClB,GAAG,OAAO,IAAI,EAAE,CAAC;QAElB,IAAI,cAAc,GAAG,CAAC,CAAC;QACvB,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,MAAM,MAAM,GAA2C,EAAE,CAAC;QAC1D,MAAM,qBAAqB,GAAa,EAAE,CAAC;QAC3C,IAAI,iBAAiB,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC;QACvD,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAEtB,IAAI,CAAC;gBACH,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,oBAAoB,CAAC,IAAI,EAAE;oBACtD,wBAAwB;oBACxB,mBAAmB;oBACnB,qBAAqB;oBACrB,qBAAqB;oBACrB,YAAY;oBACZ,qBAAqB;iBACtB,CAAC,CAAC;gBAEH,cAAc,IAAI,SAAS,CAAC,MAAM,CAAC;gBAEnC,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,wBAAwB,CAAC,IAAI,EAAE,SAAS,EAAE;oBACtE,SAAS;oBACT,gBAAgB;oBAChB,cAAc;oBACd,iBAAiB;oBACjB,UAAU;oBACV,SAAS,EAAE,CAAC;oBACZ,UAAU,EAAE,KAAK,CAAC,MAAM;iBACzB,CAAC,CAAC;gBAEH,WAAW,IAAI,UAAU,CAAC;gBAC1B,YAAY,EAAE,CAAC;YACjB,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,YAAY,EAAE,CAAC;gBACf,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAC5E,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC,CAAC;gBAEjD,IAAI,wBAAwB,IAAI,iBAAiB,EAAE,CAAC;oBAClD,iBAAiB,CAAC;wBAChB,IAAI,EAAE,IAAI;wBACV,KAAK,EAAE,YAAY;wBACnB,SAAS,EAAE,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC;qBACxC,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAED,iBAAiB,GAAG,IAAI,CAAC,mBAAmB,CAAC,IAAI,EAAE,iBAAiB,EAAE,QAAQ,CAAC,CAAC;QAClF,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC3B,MAAM,UAAU,GAAG,OAAO,GAAG,SAAS,CAAC;QACvC,MAAM,mBAAmB,GACvB,qBAAqB,CAAC,MAAM,GAAG,CAAC;YAC9B,CAAC,CAAC,qBAAqB,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,EAAE,EAAE,CAAC,GAAG,GAAG,EAAE,EAAE,CAAC,CAAC,GAAG,qBAAqB,CAAC,MAAM;YACvF,CAAC,CAAC,SAAS,CAAC;QAChB,MAAM,kBAAkB,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,UAAU,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QAEnF,OAAO;YACL,kBAAkB,EAAE,cAAc;YAClC,YAAY;YACZ,YAAY;YACZ,eAAe,EAAE,WAAW;YAC5B,MAAM;YACN,OAAO,EAAE;gBACP,mBAAmB;gBACnB,kBAAkB;gBAClB,cAAc,EAAE,iBAAiB,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC;gBACjD,QAAQ;aACT;YACD,UAAU;SACX,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,oBAAoB,CAChC,IAAY,EACZ,MAOC;QAED,IAAI,CAAC,MAAM,CAAC,wBAAwB,EAAE,CAAC;YACrC,OAAO,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,YAAY,CAAC,CAAC;QACrD,CAAC;QAED,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACnC,MAAM,SAAS,GAAG,MAAM,iBAAiB,CAAC,IAAI,EAAE;YAC9C,OAAO,EAAE,MAAM,CAAC,mBAAmB;YACnC,WAAW,EAAE,MAAM,CAAC,qBAAqB;YACzC,qBAAqB,EAAE,MAAM,CAAC,qBAAqB;YACnD,gBAAgB,EAAE,IAAI;SACvB,CAAC,CAAC;QACH,MAAM,CAAC,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,eAAe,CAAC,CAAC;QAEhE,OAAO;YACL;gBACE,EAAE,EAAE,GAAG,IAAI,IAAI,IAAI,CAAC,GAAG,EAAE,EAAE;gBAC3B,OAAO,EAAE,SAAS,CAAC,QAAQ;gBAC3B,QAAQ,EAAE;oBACR,MAAM,EAAE,IAAI;oBACZ,IAAI,EAAE,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC;oBAClC,SAAS,EAAE,IAAI,IAAI,EAAE;oBACrB,mBAAmB,EAAE,SAAS,CAAC,QAAQ,CAAC,WAAW;oBACnD,gBAAgB,EAAE,SAAS,CAAC,QAAQ,CAAC,MAAM;oBAC3C,SAAS,EAAE,SAAS,CAAC,QAAQ,CAAC,SAAS;oBACvC,oBAAoB,EAAE,SAAS,CAAC,QAAQ,CAAC,UAAU;oBACnD,mBAAmB,EAAE,SAAS,CAAC,QAAQ,CAAC,mBAAmB;iBAC5D;aACF;SACF,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,wBAAwB,CACpC,IAAY,EACZ,SAAqB,EACrB,MAQC;QAED,MAAM,SAAS,GAAoB,EAAE,CAAC;QAEtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;YAC5D,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,SAAS,CAAC,CAAC;YACvD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE;gBAChD,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;gBACzC,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;aAC5C,CAAC,CAAC;YACH,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;YAE1B,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;gBACtB,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,SAAS,GAAG,CAAC,EAAE,MAAM,CAAC,UAAU,EAAE;oBACzD,WAAW,EAAE,IAAI;oBACjB,kBAAkB,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,SAAS,EAAE,SAAS,CAAC,MAAM,CAAC;oBACpE,cAAc,EAAE,SAAS,CAAC,MAAM;oBAChC,eAAe,EAAE,SAAS,CAAC,MAAM;iBAClC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,MAAM,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;QACzC,OAAO,SAAS,CAAC,MAAM,CAAC;IAC1B,CAAC;IAEO,gBAAgB,CAAC,KAAc;QACrC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,IAAI,CAAC,CAAC,MAAM,IAAI,KAAK,CAAC,EAAE,CAAC;YACtE,OAAO,SAAS,CAAC;QACnB,CAAC;QACD,MAAM,OAAO,GAAI,KAA4B,CAAC,IAAI,CAAC;QACnD,OAAO,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC;IAC3D,CAAC;IAEO,mBAAmB,CAAC,IAAY,EAAE,WAAmB,EAAE,QAAkB;QAC/E,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC;QACnD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,WAAW,CAAC,CAAC;QACpD,IAAI,WAAW,GAAG,GAAG,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC;YACpC,QAAQ,CAAC,IAAI,CACX,+CAA+C,IAAI,KAAK,CAAC,WAAW,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CACrG,CAAC;QACJ,CAAC;QACD,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,gBAAgB,CAC5B,SAAqB,EACrB,OAIC;QAED,MAAM,MAAM,GAAoB,EAAE,CAAC;QAEnC,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;YAC5B,oBAAoB;YACpB,MAAM,SAAS,GACb,OAAO,EAAE,gBAAgB,KAAK,UAAU;gBACtC,CAAC,CAAC,IAAI,CAAC,qBAAqB,CAAC,GAAG,CAAC;gBACjC,CAAC,CAAC,IAAI,CAAC,kBAAkB,CACrB,GAAG,EACH,OAAO,EAAE,cAAc,IAAI,IAAI,EAC/B,OAAO,EAAE,iBAAiB,IAAI,GAAG,CAClC,CAAC;YACR,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;QAC5B,CAAC;QAED,sBAAsB;QACtB,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACnD,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAEpD,oBAAoB;QACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;QACtC,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,kBAAkB,CAAC,GAAa,EAAE,SAAiB,EAAE,OAAe;QAC1E,MAAM,MAAM,GAAoB,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC;QAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,GAAG,OAAO,EAAE,CAAC;YAC7D,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;YAC9C,MAAM,OAAO,GAAG,GAAG,GAAG,CAAC,EAAE,UAAU,MAAM,CAAC,MAAM,EAAE,CAAC;YAEnD,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,OAAO;gBACX,UAAU,EAAE,GAAG,CAAC,EAAE;gBAClB,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,MAAM,CAAC,MAAM;gBACpB,QAAQ,EAAE;oBACR,UAAU,EAAE,MAAM,CAAC,MAAM;oBACzB,UAAU,EAAE,CAAC,EAAE,oBAAoB;oBACnC,SAAS,EAAE,CAAC;oBACZ,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,OAAO,CAAC,MAAM,CAAC;oBAChD,GAAG,GAAG,CAAC,QAAQ;iBAChB;aACF,CAAC,CAAC;QACL,CAAC;QAED,sBAAsB;QACtB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,KAAK,CAAC,QAAQ,CAAC,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5C,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,qBAAqB,CAAC,GAAa;QACzC,MAAM,MAAM,GAAoB,EAAE,CAAC;QACnC,MAAM,QAAQ,GAAG,GAAG,CAAC,OAAO;aACzB,KAAK,CAAC,iBAAiB,CAAC;aACxB,MAAM,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAElD,IAAI,QAAQ,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC,kBAAkB,CAAC,GAAG,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC;QACjD,CAAC;QAED,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACzC,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YAC5B,MAAM,SAAS,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YACvD,MAAM,OAAO,GAAG,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC;YAC3C,MAAM,OAAO,GAAG,GAAG,GAAG,CAAC,EAAE,UAAU,CAAC,EAAE,CAAC;YAEvC,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,OAAO;gBACX,UAAU,EAAE,GAAG,CAAC,EAAE;gBAClB,OAAO,EAAE,OAAO;gBAChB,KAAK,EAAE,CAAC;gBACR,QAAQ,EAAE;oBACR,UAAU,EAAE,CAAC;oBACb,UAAU,EAAE,QAAQ,CAAC,MAAM;oBAC3B,SAAS;oBACT,OAAO;oBACP,gBAAgB,EAAE,UAAU;oBAC5B,GAAG,GAAG,CAAC,QAAQ;iBAChB;aACF,CAAC,CAAC;YAEH,MAAM,GAAG,OAAO,CAAC;QACnB,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,iBAAiB,CAAC,IAAY;QACpC,MAAM,SAAS,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QACrC,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,OAAO,KAAK,CAAC;QAC7C,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,WAAW,CAAC;YAAE,OAAO,UAAU,CAAC;QACpF,IAAI,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,OAAO,MAAM,CAAC;QAC7E,IAAI,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC;YAAE,OAAO,MAAM,CAAC;QAC/C,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,OAAO,MAAM,CAAC;QAC9C,OAAO,OAAO,CAAC;IACjB,CAAC;CACF"}
@@ -232,6 +232,26 @@ export interface IngestionOptions {
232
232
  batchSize?: number;
233
233
  /** Loader configuration */
234
234
  loaderConfig?: LoaderConfig;
235
+ /** Enable MarkItDown preprocessing for non-text documents */
236
+ enableDocumentConversion?: boolean;
237
+ /** Conversion timeout in milliseconds */
238
+ conversionTimeoutMs?: number;
239
+ /** Conversion file size limit in bytes */
240
+ conversionMaxFileSize?: number;
241
+ /** Enable LLM image descriptions during conversion */
242
+ enableLLMDescriptions?: boolean;
243
+ /** Chunking strategy */
244
+ chunkingStrategy?: 'semantic' | 'fixed';
245
+ /** Fixed chunk size (characters) */
246
+ fixedChunkSize?: number;
247
+ /** Fixed chunk overlap (characters) */
248
+ fixedChunkOverlap?: number;
249
+ /** Callback for conversion errors */
250
+ onConversionError?: (error: {
251
+ file: string;
252
+ error: string;
253
+ errorType?: string;
254
+ }) => void;
235
255
  /** Progress callback */
236
256
  onProgress?: (current: number, total: number, details?: {
237
257
  currentFile: string;
@@ -246,6 +266,10 @@ export interface IngestionOptions {
246
266
  export interface IngestionResult {
247
267
  /** Number of documents processed */
248
268
  documentsProcessed: number;
269
+ /** Number of input files successfully processed */
270
+ successCount: number;
271
+ /** Number of input files that failed processing */
272
+ failureCount: number;
249
273
  /** Number of chunks generated */
250
274
  chunksGenerated: number;
251
275
  /** Errors during ingestion */
@@ -253,6 +277,17 @@ export interface IngestionResult {
253
277
  file: string;
254
278
  error: string;
255
279
  }>;
280
+ /** Performance and resource metrics */
281
+ metrics?: {
282
+ /** Average conversion duration per file in milliseconds */
283
+ averageConversionMs?: number;
284
+ /** Documents converted per second */
285
+ documentsPerSecond?: number;
286
+ /** Peak heap usage in MB during ingestion */
287
+ peakHeapUsedMb?: number;
288
+ /** Warnings generated during ingestion */
289
+ warnings?: string[];
290
+ };
256
291
  /** Total duration in milliseconds */
257
292
  durationMs: number;
258
293
  }
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,wBAAwB;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,uBAAuB;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,wBAAwB;IACxB,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,sCAAsC;IACtC,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,8BAA8B;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,oBAAoB;IACpB,IAAI,EAAE,KAAK,GAAG,UAAU,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;IAC9D,yBAAyB;IACzB,SAAS,EAAE,IAAI,CAAC;IAChB,8BAA8B;IAC9B,SAAS,CAAC,EAAE,IAAI,CAAC;IACjB,yBAAyB;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,qBAAqB;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iCAAiC;IACjC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,uBAAuB;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,yBAAyB;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,oBAAoB;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,8BAA8B;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,qBAAqB;IACrB,QAAQ,EAAE,aAAa,CAAC;IACxB,uBAAuB;IACvB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,kBAAkB;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,oDAAoD;IACpD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kDAAkD;IAClD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,yBAAyB;IACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,iCAAiC;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,+CAA+C;IAC/C,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,+BAA+B;IAC/B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kCAAkC;IAClC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,qCAAqC;IACrC,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,6BAA6B;IAC7B,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,gCAAgC;IAChC,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;IACjE,gCAAgC;IAChC,mBAAmB,EAAE,MAAM,EAAE,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,+BAA+B;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2BAA2B;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gCAAgC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,mCAAmC;IACnC,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,MAAM,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IACtE,+BAA+B;IAC/B,aAAa,IAAI,MAAM,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,4BAA4B;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB,2BAA2B;IAC3B,mBAAmB,EAAE,MAAM,CAAC;IAC5B,sBAAsB;IACtB,cAAc,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,KAAK,CAAC;IAChD,2CAA2C;IAC3C,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,iCAAiC;IACjC,YAAY,CAAC,SAAS,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACxD,mCAAmC;IACnC,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,EAAE,KAAK,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;IACnG,6BAA6B;IAC7B,eAAe,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC9C,sBAAsB;IACtB,cAAc,CAAC,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO,CAAC,aAAa,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5E,yBAAyB;IACzB,WAAW,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,GAAG,IAAI,CAAC,CAAC;IACvD,0BAA0B;IAC1B,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,yBAAyB;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,eAAe;IACf,QAAQ,EAAE,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,KAAK,GAAG,IAAI,GAAG,KAAK,GAAG,IAAI,GAAG,KAAK,CAAC;IACnE,uBAAuB;IACvB,KAAK,EAAE,OAAO,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,8BAA8B;IAC9B,QAAQ,EAAE,aAAa,CAAC;IACxB,qDAAqD;IACrD,KAAK,EAAE,MAAM,CAAC;IACd,kDAAkD;IAClD,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,iCAAiC;IACjC,WAAW,EAAE,iBAAiB,CAAC;IAC/B,8BAA8B;IAC9B,SAAS,CAAC,EAAE,eAAe,CAAC;IAC5B,2BAA2B;IAC3B,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,8BAA8B;IAC9B,SAAS,CAAC,EAAE,eAAe,CAAC;CAC7B;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,oCAAoC;IACpC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,yCAAyC;IACzC,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,gCAAgC;IAChC,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,gCAAgC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,oCAAoC;IACpC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,+CAA+C;IAC/C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uBAAuB;IACvB,MAAM,CAAC,EAAE,cAAc,CAAC;IACxB,kCAAkC;IAClC,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,qBAAqB;IACrB,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,qBAAqB;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,qBAAqB;IACrB,OAAO,EAAE,YAAY,EAAE,CAAC;IACxB,wBAAwB;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,+BAA+B;IAC/B,QAAQ,EAAE;QACR,6BAA6B;QAC7B,YAAY,EAAE,MAAM,CAAC;QACrB,iCAAiC;QACjC,UAAU,EAAE,MAAM,CAAC;QACnB,8BAA8B;QAC9B,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,gCAAgC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,wBAAwB;IACxB,UAAU,CAAC,EAAE,CACX,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE;QACR,WAAW,EAAE,MAAM,CAAC;QACpB,kBAAkB,EAAE,MAAM,CAAC;QAC3B,cAAc,EAAE,MAAM,CAAC;QACvB,eAAe,EAAE,MAAM,CAAC;KACzB,KACE,IAAI,CAAC;CACX;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,oCAAoC;IACpC,kBAAkB,EAAE,MAAM,CAAC;IAC3B,iCAAiC;IACjC,eAAe,EAAE,MAAM,CAAC;IACxB,8BAA8B;IAC9B,MAAM,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC/C,qCAAqC;IACrC,UAAU,EAAE,MAAM,CAAC;CACpB"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,wBAAwB;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,uBAAuB;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,wBAAwB;IACxB,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,sCAAsC;IACtC,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,8BAA8B;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,oBAAoB;IACpB,IAAI,EAAE,KAAK,GAAG,UAAU,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;IAC9D,yBAAyB;IACzB,SAAS,EAAE,IAAI,CAAC;IAChB,8BAA8B;IAC9B,SAAS,CAAC,EAAE,IAAI,CAAC;IACjB,yBAAyB;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,qBAAqB;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iCAAiC;IACjC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,uBAAuB;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,yBAAyB;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,oBAAoB;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,8BAA8B;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,qBAAqB;IACrB,QAAQ,EAAE,aAAa,CAAC;IACxB,uBAAuB;IACvB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,kBAAkB;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,oDAAoD;IACpD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kDAAkD;IAClD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,yBAAyB;IACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,iCAAiC;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,+CAA+C;IAC/C,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,+BAA+B;IAC/B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kCAAkC;IAClC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,qCAAqC;IACrC,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,6BAA6B;IAC7B,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,gCAAgC;IAChC,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;IACjE,gCAAgC;IAChC,mBAAmB,EAAE,MAAM,EAAE,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,+BAA+B;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2BAA2B;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gCAAgC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,mCAAmC;IACnC,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,MAAM,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IACtE,+BAA+B;IAC/B,aAAa,IAAI,MAAM,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,4BAA4B;IAC5B,cAAc,EAAE,MAAM,CAAC;IACvB,2BAA2B;IAC3B,mBAAmB,EAAE,MAAM,CAAC;IAC5B,sBAAsB;IACtB,cAAc,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,KAAK,CAAC;IAChD,2CAA2C;IAC3C,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,iCAAiC;IACjC,YAAY,CAAC,SAAS,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACxD,mCAAmC;IACnC,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,EAAE,KAAK,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;IACnG,6BAA6B;IAC7B,eAAe,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC9C,sBAAsB;IACtB,cAAc,CAAC,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO,CAAC,aAAa,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5E,yBAAyB;IACzB,WAAW,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,GAAG,IAAI,CAAC,CAAC;IACvD,0BAA0B;IAC1B,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,yBAAyB;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,eAAe;IACf,QAAQ,EAAE,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,KAAK,GAAG,IAAI,GAAG,KAAK,GAAG,IAAI,GAAG,KAAK,CAAC;IACnE,uBAAuB;IACvB,KAAK,EAAE,OAAO,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,8BAA8B;IAC9B,QAAQ,EAAE,aAAa,CAAC;IACxB,qDAAqD;IACrD,KAAK,EAAE,MAAM,CAAC;IACd,kDAAkD;IAClD,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,iCAAiC;IACjC,WAAW,EAAE,iBAAiB,CAAC;IAC/B,8BAA8B;IAC9B,SAAS,CAAC,EAAE,eAAe,CAAC;IAC5B,2BAA2B;IAC3B,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,8BAA8B;IAC9B,SAAS,CAAC,EAAE,eAAe,CAAC;CAC7B;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,oCAAoC;IACpC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,yCAAyC;IACzC,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,gCAAgC;IAChC,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,gCAAgC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,oCAAoC;IACpC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,+CAA+C;IAC/C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uBAAuB;IACvB,MAAM,CAAC,EAAE,cAAc,CAAC;IACxB,kCAAkC;IAClC,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,qBAAqB;IACrB,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,qBAAqB;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,qBAAqB;IACrB,OAAO,EAAE,YAAY,EAAE,CAAC;IACxB,wBAAwB;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,+BAA+B;IAC/B,QAAQ,EAAE;QACR,6BAA6B;QAC7B,YAAY,EAAE,MAAM,CAAC;QACrB,iCAAiC;QACjC,UAAU,EAAE,MAAM,CAAC;QACnB,8BAA8B;QAC9B,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,gCAAgC;IAChC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,6DAA6D;IAC7D,wBAAwB,CAAC,EAAE,OAAO,CAAC;IACnC,yCAAyC;IACzC,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,0CAA0C;IAC1C,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,sDAAsD;IACtD,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,wBAAwB;IACxB,gBAAgB,CAAC,EAAE,UAAU,GAAG,OAAO,CAAC;IACxC,oCAAoC;IACpC,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uCAAuC;IACvC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,qCAAqC;IACrC,iBAAiB,CAAC,EAAE,CAAC,KAAK,EAAE;QAC1B,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC;QACd,SAAS,CAAC,EAAE,MAAM,CAAC;KACpB,KAAK,IAAI,CAAC;IACX,wBAAwB;IACxB,UAAU,CAAC,EAAE,CACX,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE;QACR,WAAW,EAAE,MAAM,CAAC;QACpB,kBAAkB,EAAE,MAAM,CAAC;QAC3B,cAAc,EAAE,MAAM,CAAC;QACvB,eAAe,EAAE,MAAM,CAAC;KACzB,KACE,IAAI,CAAC;CACX;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,oCAAoC;IACpC,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mDAAmD;IACnD,YAAY,EAAE,MAAM,CAAC;IACrB,mDAAmD;IACnD,YAAY,EAAE,MAAM,CAAC;IACrB,iCAAiC;IACjC,eAAe,EAAE,MAAM,CAAC;IACxB,8BAA8B;IAC9B,MAAM,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC/C,uCAAuC;IACvC,OAAO,CAAC,EAAE;QACR,2DAA2D;QAC3D,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,qCAAqC;QACrC,kBAAkB,CAAC,EAAE,MAAM,CAAC;QAC5B,6CAA6C;QAC7C,cAAc,CAAC,EAAE,MAAM,CAAC;QACxB,0CAA0C;QAC1C,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;KACrB,CAAC;IACF,qCAAqC;IACrC,UAAU,EAAE,MAAM,CAAC;CACpB"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dcyfr/ai-rag",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "description": "RAG (Retrieval-Augmented Generation) system template - DCYFR AI starter",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -77,7 +77,7 @@
77
77
  "zod": "^3.22.4"
78
78
  },
79
79
  "peerDependencies": {
80
- "@dcyfr/ai": "^1.0.0",
80
+ "@dcyfr/ai": "^2.1.0",
81
81
  "chromadb": "^1.8.0"
82
82
  },
83
83
  "peerDependenciesMeta": {
@@ -90,14 +90,13 @@
90
90
  },
91
91
  "devDependencies": {
92
92
  "@changesets/changelog-github": "^0.5.2",
93
- "@changesets/cli": "^2.29.8",
94
- "@types/node": "^20.11.0",
95
- "@typescript-eslint/eslint-plugin": "^6.19.0",
96
- "@typescript-eslint/parser": "^6.19.0",
93
+ "@changesets/cli": "^2.30.0",
94
+ "@types/node": "^22.0.0",
97
95
  "@vitest/coverage-v8": "^4.0.18",
98
- "eslint": "^8.56.0",
96
+ "eslint": "^9.39.2",
99
97
  "tsx": "^4.7.0",
100
98
  "typescript": "^5.3.3",
99
+ "typescript-eslint": "^8.56.0",
101
100
  "vitest": "^4.0.18"
102
101
  },
103
102
  "engines": {