@memvid/sdk 2.0.155 → 2.0.156

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,250 @@
1
+ /**
2
+ * High-performance batch image ingestion for Memvid SDK (Node.js).
3
+ *
4
+ * Uses OCR to extract text from images, then ingests into a .mv2 memory file.
5
+ * docTR (via Python) provides highest accuracy (85.3%), Tesseract.js is available as optional dependency.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * import { ImageIngestor } from '@memvid/sdk';
10
+ *
11
+ * // First install tesseract.js: npm install tesseract.js
12
+ * const ingestor = new ImageIngestor({
13
+ * ocrProvider: 'tesseract',
14
+ * workers: 4,
15
+ * });
16
+ *
17
+ * const result = await ingestor.ingestDirectory(
18
+ * './construction_drawings/',
19
+ * './project.mv2',
20
+ * {
21
+ * patterns: ['*.png', '*.jpg'],
22
+ * onProgress: (done, total) => console.log(`${done}/${total}`),
23
+ * }
24
+ * );
25
+ *
26
+ * console.log(`Processed ${result.totalImages} images`);
27
+ * await ingestor.terminate();
28
+ * ```
29
+ *
30
+ * For highest accuracy (85.3%), use docTR via Python:
31
+ * ```typescript
32
+ * // Requires: pip install python-doctr[torch]
33
+ * const ingestor = new ImageIngestor({ ocrProvider: 'doctr' });
34
+ * ```
35
+ */
36
+ import { OCRProviderType } from './ocr';
37
+ /**
38
+ * Options for image ingestion.
39
+ */
40
+ export interface ImageIngestOptions {
41
+ /** Minimum OCR confidence threshold (0-1). Default: 0.3 */
42
+ minConfidence?: number;
43
+ /** Use fallback OCR on low confidence. Default: true */
44
+ fallbackOcr?: boolean;
45
+ /** Images to process per batch. Default: 10 */
46
+ batchSize?: number;
47
+ /** Metadata to attach to all ingested frames */
48
+ metadata?: Record<string, unknown>;
49
+ /** Label for ingested frames. Default: 'image-extract' */
50
+ label?: string;
51
+ }
52
+ /**
53
+ * Options for directory ingestion.
54
+ */
55
+ export interface DirectoryIngestOptions extends ImageIngestOptions {
56
+ /** Glob patterns for files to include. Default: ['*.png', '*.jpg', '*.jpeg', '*.tiff'] */
57
+ patterns?: string[];
58
+ /** Search subdirectories. Default: true */
59
+ recursive?: boolean;
60
+ /** Progress callback */
61
+ onProgress?: (completed: number, total: number) => void;
62
+ }
63
+ /**
64
+ * Options for array-based ingestion.
65
+ */
66
+ export interface ImagesIngestOptions extends ImageIngestOptions {
67
+ /** Progress callback */
68
+ onProgress?: (completed: number, total: number) => void;
69
+ }
70
+ /**
71
+ * Result from batch image ingestion.
72
+ */
73
+ export interface ImageIngestResult {
74
+ /** Total images processed */
75
+ totalImages: number;
76
+ /** Successfully ingested images */
77
+ successful: number;
78
+ /** Failed images */
79
+ failed: number;
80
+ /** Total chunks/frames created */
81
+ totalChunks: number;
82
+ /** Processing time in seconds */
83
+ elapsedSeconds: number;
84
+ /** Output file size in bytes */
85
+ outputSizeBytes: number;
86
+ /** Errors encountered */
87
+ errors: Array<{
88
+ path: string;
89
+ error: string;
90
+ }>;
91
+ /** Images processed per second */
92
+ imagesPerSecond: number;
93
+ /** Output size in MB */
94
+ outputSizeMb: number;
95
+ }
96
+ /**
97
+ * Constructor options for ImageIngestor.
98
+ */
99
+ export interface ImageIngestorOptions {
100
+ /** OCR provider: 'tesseract', 'doctr', or 'easyocr'. Default: 'tesseract' */
101
+ ocrProvider?: OCRProviderType;
102
+ /** Number of parallel workers. Default: CPU count */
103
+ workers?: number;
104
+ /** Python path for doctr/easyocr providers */
105
+ pythonPath?: string;
106
+ }
107
+ /**
108
+ * High-performance batch image ingestor for Memvid.
109
+ *
110
+ * Combines OCR text extraction with parallel processing for fast, accurate
111
+ * ingestion of large image collections.
112
+ *
113
+ * OCR Accuracy (tested on construction drawings):
114
+ * - docTR (Python): 85.3% - BEST
115
+ * - EasyOCR (Python): 79.4%
116
+ * - Tesseract.js: ~50-60%
117
+ *
118
+ * @example
119
+ * ```typescript
120
+ * const ingestor = new ImageIngestor({
121
+ * ocrProvider: 'doctr',
122
+ * workers: 8,
123
+ * });
124
+ *
125
+ * const result = await ingestor.ingestDirectory('./drawings/', './output.mv2');
126
+ * console.log(`Processed ${result.totalImages} images in ${result.elapsedSeconds}s`);
127
+ *
128
+ * await ingestor.terminate();
129
+ * ```
130
+ */
131
+ export declare class ImageIngestor {
132
+ private _ocr;
133
+ private _fallbackOcr;
134
+ private _workers;
135
+ private _ocrType;
136
+ constructor(options?: ImageIngestorOptions);
137
+ /** Primary OCR provider name */
138
+ get ocrName(): string;
139
+ /** Number of parallel workers */
140
+ get workers(): number;
141
+ /**
142
+ * Ingest multiple images into a .mv2 file.
143
+ *
144
+ * @param paths - Array of image file paths
145
+ * @param outputPath - Output .mv2 file path
146
+ * @param options - Ingestion options
147
+ * @returns Promise resolving to ingestion result
148
+ *
149
+ * @example
150
+ * ```typescript
151
+ * const result = await ingestor.ingestImages(
152
+ * ['img1.png', 'img2.png'],
153
+ * './output.mv2',
154
+ * { onProgress: (d, t) => console.log(`${d}/${t}`) }
155
+ * );
156
+ * ```
157
+ */
158
+ ingestImages(paths: string[], outputPath: string, options?: ImagesIngestOptions): Promise<ImageIngestResult>;
159
+ /**
160
+ * Ingest all matching images from a directory.
161
+ *
162
+ * @param directory - Source directory path
163
+ * @param outputPath - Output .mv2 file path
164
+ * @param options - Directory ingestion options
165
+ * @returns Promise resolving to ingestion result
166
+ *
167
+ * @example
168
+ * ```typescript
169
+ * const result = await ingestor.ingestDirectory(
170
+ * './construction_drawings/',
171
+ * './project.mv2',
172
+ * {
173
+ * patterns: ['*.png', '*.jpg'],
174
+ * recursive: true,
175
+ * onProgress: (d, t) => console.log(`${d}/${t}`),
176
+ * }
177
+ * );
178
+ * ```
179
+ */
180
+ ingestDirectory(directory: string, outputPath: string, options?: DirectoryIngestOptions): Promise<ImageIngestResult>;
181
+ /**
182
+ * Extract text from a single image with fallback support.
183
+ */
184
+ private _extractText;
185
+ /**
186
+ * Clean up OCR worker resources.
187
+ *
188
+ * Call this when done using the ingestor to free memory.
189
+ */
190
+ terminate(): Promise<void>;
191
+ }
192
+ /**
193
+ * Convenience function for quick image ingestion.
194
+ *
195
+ * Creates an ImageIngestor, processes images, and cleans up automatically.
196
+ *
197
+ * @param paths - Array of image file paths
198
+ * @param outputPath - Output .mv2 file path
199
+ * @param options - Ingestion options
200
+ * @returns Promise resolving to ingestion result
201
+ *
202
+ * @example
203
+ * ```typescript
204
+ * import { ingestImages } from 'memvid-sdk';
205
+ *
206
+ * const result = await ingestImages(
207
+ * ['img1.png', 'img2.png'],
208
+ * './output.mv2',
209
+ * {
210
+ * ocrProvider: 'doctr',
211
+ * onProgress: (d, t) => console.log(`${d}/${t}`),
212
+ * }
213
+ * );
214
+ * ```
215
+ */
216
+ export declare function ingestImages(paths: string[], outputPath: string, options?: ImagesIngestOptions & {
217
+ ocrProvider?: OCRProviderType;
218
+ workers?: number;
219
+ pythonPath?: string;
220
+ }): Promise<ImageIngestResult>;
221
+ /**
222
+ * Convenience function for quick directory ingestion.
223
+ *
224
+ * Creates an ImageIngestor, processes directory, and cleans up automatically.
225
+ *
226
+ * @param directory - Source directory path
227
+ * @param outputPath - Output .mv2 file path
228
+ * @param options - Directory ingestion options
229
+ * @returns Promise resolving to ingestion result
230
+ *
231
+ * @example
232
+ * ```typescript
233
+ * import { ingestDirectory } from 'memvid-sdk';
234
+ *
235
+ * const result = await ingestDirectory(
236
+ * './construction_drawings/',
237
+ * './project.mv2',
238
+ * {
239
+ * ocrProvider: 'doctr',
240
+ * patterns: ['*.png', '*.jpg'],
241
+ * onProgress: (d, t) => console.log(`${d}/${t}`),
242
+ * }
243
+ * );
244
+ * ```
245
+ */
246
+ export declare function ingestDirectory(directory: string, outputPath: string, options?: DirectoryIngestOptions & {
247
+ ocrProvider?: OCRProviderType;
248
+ workers?: number;
249
+ pythonPath?: string;
250
+ }): Promise<ImageIngestResult>;
@@ -0,0 +1,411 @@
1
+ "use strict";
2
+ /**
3
+ * High-performance batch image ingestion for Memvid SDK (Node.js).
4
+ *
5
+ * Uses OCR to extract text from images, then ingests into a .mv2 memory file.
6
+ * docTR (via Python) provides highest accuracy (85.3%), Tesseract.js is available as optional dependency.
7
+ *
8
+ * @example
9
+ * ```typescript
10
+ * import { ImageIngestor } from '@memvid/sdk';
11
+ *
12
+ * // First install tesseract.js: npm install tesseract.js
13
+ * const ingestor = new ImageIngestor({
14
+ * ocrProvider: 'tesseract',
15
+ * workers: 4,
16
+ * });
17
+ *
18
+ * const result = await ingestor.ingestDirectory(
19
+ * './construction_drawings/',
20
+ * './project.mv2',
21
+ * {
22
+ * patterns: ['*.png', '*.jpg'],
23
+ * onProgress: (done, total) => console.log(`${done}/${total}`),
24
+ * }
25
+ * );
26
+ *
27
+ * console.log(`Processed ${result.totalImages} images`);
28
+ * await ingestor.terminate();
29
+ * ```
30
+ *
31
+ * For highest accuracy (85.3%), use docTR via Python:
32
+ * ```typescript
33
+ * // Requires: pip install python-doctr[torch]
34
+ * const ingestor = new ImageIngestor({ ocrProvider: 'doctr' });
35
+ * ```
36
+ */
37
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
38
+ if (k2 === undefined) k2 = k;
39
+ var desc = Object.getOwnPropertyDescriptor(m, k);
40
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
41
+ desc = { enumerable: true, get: function() { return m[k]; } };
42
+ }
43
+ Object.defineProperty(o, k2, desc);
44
+ }) : (function(o, m, k, k2) {
45
+ if (k2 === undefined) k2 = k;
46
+ o[k2] = m[k];
47
+ }));
48
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
49
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
50
+ }) : function(o, v) {
51
+ o["default"] = v;
52
+ });
53
+ var __importStar = (this && this.__importStar) || (function () {
54
+ var ownKeys = function(o) {
55
+ ownKeys = Object.getOwnPropertyNames || function (o) {
56
+ var ar = [];
57
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
58
+ return ar;
59
+ };
60
+ return ownKeys(o);
61
+ };
62
+ return function (mod) {
63
+ if (mod && mod.__esModule) return mod;
64
+ var result = {};
65
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
66
+ __setModuleDefault(result, mod);
67
+ return result;
68
+ };
69
+ })();
70
+ Object.defineProperty(exports, "__esModule", { value: true });
71
+ exports.ImageIngestor = void 0;
72
+ exports.ingestImages = ingestImages;
73
+ exports.ingestDirectory = ingestDirectory;
74
+ const path = __importStar(require("path"));
75
+ const fs = __importStar(require("fs/promises"));
76
+ const os = __importStar(require("os"));
77
+ const ocr_1 = require("./ocr");
78
+ /**
79
+ * High-performance batch image ingestor for Memvid.
80
+ *
81
+ * Combines OCR text extraction with parallel processing for fast, accurate
82
+ * ingestion of large image collections.
83
+ *
84
+ * OCR Accuracy (tested on construction drawings):
85
+ * - docTR (Python): 85.3% - BEST
86
+ * - EasyOCR (Python): 79.4%
87
+ * - Tesseract.js: ~50-60%
88
+ *
89
+ * @example
90
+ * ```typescript
91
+ * const ingestor = new ImageIngestor({
92
+ * ocrProvider: 'doctr',
93
+ * workers: 8,
94
+ * });
95
+ *
96
+ * const result = await ingestor.ingestDirectory('./drawings/', './output.mv2');
97
+ * console.log(`Processed ${result.totalImages} images in ${result.elapsedSeconds}s`);
98
+ *
99
+ * await ingestor.terminate();
100
+ * ```
101
+ */
102
+ class ImageIngestor {
103
+ constructor(options = {}) {
104
+ this._fallbackOcr = null;
105
+ this._ocrType = options.ocrProvider ?? 'tesseract';
106
+ this._ocr = (0, ocr_1.getOCRProvider)(this._ocrType, {
107
+ pythonPath: options.pythonPath,
108
+ });
109
+ this._workers = options.workers ?? os.cpus().length;
110
+ // Initialize fallback OCR only for single-engine providers (not ensemble which already combines engines)
111
+ if (this._ocrType !== 'tesseract' && this._ocrType !== 'ensemble') {
112
+ this._fallbackOcr = new ocr_1.TesseractOCR();
113
+ }
114
+ }
115
+ /** Primary OCR provider name */
116
+ get ocrName() {
117
+ return this._ocr.name;
118
+ }
119
+ /** Number of parallel workers */
120
+ get workers() {
121
+ return this._workers;
122
+ }
123
+ /**
124
+ * Ingest multiple images into a .mv2 file.
125
+ *
126
+ * @param paths - Array of image file paths
127
+ * @param outputPath - Output .mv2 file path
128
+ * @param options - Ingestion options
129
+ * @returns Promise resolving to ingestion result
130
+ *
131
+ * @example
132
+ * ```typescript
133
+ * const result = await ingestor.ingestImages(
134
+ * ['img1.png', 'img2.png'],
135
+ * './output.mv2',
136
+ * { onProgress: (d, t) => console.log(`${d}/${t}`) }
137
+ * );
138
+ * ```
139
+ */
140
+ async ingestImages(paths, outputPath, options = {}) {
141
+ const startTime = Date.now();
142
+ const total = paths.length;
143
+ const batchSize = options.batchSize ?? 10;
144
+ const minConfidence = options.minConfidence ?? 0.3;
145
+ const label = options.label ?? 'image-extract';
146
+ const useFallback = options.fallbackOcr !== false;
147
+ let successful = 0;
148
+ let failed = 0;
149
+ let totalChunks = 0;
150
+ const errors = [];
151
+ // Dynamically import the SDK to avoid circular dependencies
152
+ const { create } = await Promise.resolve().then(() => __importStar(require('./index')));
153
+ // Create memory file with lex index enabled for text search
154
+ const mem = await create(outputPath, 'basic', { enableLex: true });
155
+ // Process images in batches
156
+ for (let batchStart = 0; batchStart < total; batchStart += batchSize) {
157
+ const batch = paths.slice(batchStart, batchStart + batchSize);
158
+ // Process batch with parallel OCR
159
+ const results = await Promise.allSettled(batch.map(async (imagePath) => {
160
+ const result = await this._extractText(imagePath, minConfidence, useFallback);
161
+ return { path: imagePath, result };
162
+ }));
163
+ // Collect results
164
+ const textsToIngest = [];
165
+ for (const settled of results) {
166
+ if (settled.status === 'fulfilled') {
167
+ const { path: filePath, result } = settled.value;
168
+ if (result.confidence >= minConfidence) {
169
+ textsToIngest.push({
170
+ text: result.text,
171
+ title: path.basename(filePath, path.extname(filePath)),
172
+ metadata: {
173
+ ...options.metadata,
174
+ sourceFile: filePath,
175
+ confidence: result.confidence,
176
+ regions: result.regions.length,
177
+ ocrProvider: result.metadata?.version ?? this._ocr.name,
178
+ },
179
+ });
180
+ successful++;
181
+ }
182
+ else {
183
+ errors.push({
184
+ path: filePath,
185
+ error: `Low confidence: ${(result.confidence * 100).toFixed(1)}%`,
186
+ });
187
+ failed++;
188
+ }
189
+ }
190
+ else {
191
+ failed++;
192
+ errors.push({
193
+ path: batch[results.indexOf(settled)] ?? 'unknown',
194
+ error: settled.reason?.message ?? 'Unknown error',
195
+ });
196
+ }
197
+ }
198
+ // Ingest batch into memory using putMany for speed
199
+ if (textsToIngest.length > 0) {
200
+ await mem.putMany(textsToIngest.map(item => ({
201
+ text: item.text,
202
+ title: item.title,
203
+ label,
204
+ metadata: item.metadata,
205
+ })));
206
+ totalChunks += textsToIngest.length;
207
+ }
208
+ // Progress callback
209
+ const completed = Math.min(batchStart + batch.length, total);
210
+ options.onProgress?.(completed, total);
211
+ }
212
+ // Finalize memory
213
+ await mem.seal();
214
+ const elapsedSeconds = (Date.now() - startTime) / 1000;
215
+ let outputSizeBytes = 0;
216
+ try {
217
+ const stats = await fs.stat(outputPath);
218
+ outputSizeBytes = stats.size;
219
+ }
220
+ catch {
221
+ // File may not exist if no successful ingestions
222
+ }
223
+ return {
224
+ totalImages: total,
225
+ successful,
226
+ failed,
227
+ totalChunks,
228
+ elapsedSeconds,
229
+ outputSizeBytes,
230
+ errors,
231
+ imagesPerSecond: elapsedSeconds > 0 ? total / elapsedSeconds : 0,
232
+ outputSizeMb: outputSizeBytes / (1024 * 1024),
233
+ };
234
+ }
235
+ /**
236
+ * Ingest all matching images from a directory.
237
+ *
238
+ * @param directory - Source directory path
239
+ * @param outputPath - Output .mv2 file path
240
+ * @param options - Directory ingestion options
241
+ * @returns Promise resolving to ingestion result
242
+ *
243
+ * @example
244
+ * ```typescript
245
+ * const result = await ingestor.ingestDirectory(
246
+ * './construction_drawings/',
247
+ * './project.mv2',
248
+ * {
249
+ * patterns: ['*.png', '*.jpg'],
250
+ * recursive: true,
251
+ * onProgress: (d, t) => console.log(`${d}/${t}`),
252
+ * }
253
+ * );
254
+ * ```
255
+ */
256
+ async ingestDirectory(directory, outputPath, options = {}) {
257
+ const patterns = options.patterns ?? ['*.png', '*.jpg', '*.jpeg', '*.tiff'];
258
+ const recursive = options.recursive ?? true;
259
+ // Convert glob patterns to extensions (e.g., '*.png' -> '.png')
260
+ const extensions = new Set(patterns.map((p) => {
261
+ const ext = p.replace(/^\*/, '').toLowerCase();
262
+ return ext.startsWith('.') ? ext : `.${ext}`;
263
+ }));
264
+ // Use native fs.readdir with recursive option (Node 18+)
265
+ const allFiles = await fs.readdir(directory, {
266
+ recursive,
267
+ withFileTypes: true,
268
+ });
269
+ // Filter to matching image files
270
+ const imagePaths = allFiles
271
+ .filter((entry) => {
272
+ if (!entry.isFile())
273
+ return false;
274
+ const ext = path.extname(entry.name).toLowerCase();
275
+ return extensions.has(ext);
276
+ })
277
+ .map((entry) => {
278
+ // entry.parentPath is available in Node 20+, fallback to entry.path for Node 18-19
279
+ const parentPath = entry.parentPath ?? entry.path ?? directory;
280
+ return path.join(parentPath, entry.name);
281
+ });
282
+ // Sort for deterministic ordering
283
+ const sortedPaths = imagePaths.sort();
284
+ return this.ingestImages(sortedPaths, outputPath, {
285
+ minConfidence: options.minConfidence,
286
+ fallbackOcr: options.fallbackOcr,
287
+ batchSize: options.batchSize,
288
+ metadata: options.metadata,
289
+ label: options.label,
290
+ onProgress: options.onProgress,
291
+ });
292
+ }
293
+ /**
294
+ * Extract text from a single image with fallback support.
295
+ */
296
+ async _extractText(imagePath, minConfidence, useFallback) {
297
+ try {
298
+ // Try primary OCR
299
+ const result = await this._ocr.extractText(imagePath);
300
+ // Check if confidence is acceptable
301
+ if (result.confidence >= minConfidence) {
302
+ return result;
303
+ }
304
+ // Try fallback if enabled and available
305
+ if (useFallback && this._fallbackOcr) {
306
+ const fallbackResult = await this._fallbackOcr.extractText(imagePath);
307
+ if (fallbackResult.confidence > result.confidence) {
308
+ return fallbackResult;
309
+ }
310
+ }
311
+ return result;
312
+ }
313
+ catch (error) {
314
+ // Fallback on error
315
+ if (useFallback && this._fallbackOcr) {
316
+ return this._fallbackOcr.extractText(imagePath);
317
+ }
318
+ throw error;
319
+ }
320
+ }
321
+ /**
322
+ * Clean up OCR worker resources.
323
+ *
324
+ * Call this when done using the ingestor to free memory.
325
+ */
326
+ async terminate() {
327
+ if (this._ocr.terminate) {
328
+ await this._ocr.terminate();
329
+ }
330
+ if (this._fallbackOcr) {
331
+ await this._fallbackOcr.terminate();
332
+ this._fallbackOcr = null;
333
+ }
334
+ }
335
+ }
336
+ exports.ImageIngestor = ImageIngestor;
337
+ /**
338
+ * Convenience function for quick image ingestion.
339
+ *
340
+ * Creates an ImageIngestor, processes images, and cleans up automatically.
341
+ *
342
+ * @param paths - Array of image file paths
343
+ * @param outputPath - Output .mv2 file path
344
+ * @param options - Ingestion options
345
+ * @returns Promise resolving to ingestion result
346
+ *
347
+ * @example
348
+ * ```typescript
349
+ * import { ingestImages } from 'memvid-sdk';
350
+ *
351
+ * const result = await ingestImages(
352
+ * ['img1.png', 'img2.png'],
353
+ * './output.mv2',
354
+ * {
355
+ * ocrProvider: 'doctr',
356
+ * onProgress: (d, t) => console.log(`${d}/${t}`),
357
+ * }
358
+ * );
359
+ * ```
360
+ */
361
+ async function ingestImages(paths, outputPath, options = {}) {
362
+ const ingestor = new ImageIngestor({
363
+ ocrProvider: options.ocrProvider,
364
+ workers: options.workers,
365
+ pythonPath: options.pythonPath,
366
+ });
367
+ try {
368
+ return await ingestor.ingestImages(paths, outputPath, options);
369
+ }
370
+ finally {
371
+ await ingestor.terminate();
372
+ }
373
+ }
374
+ /**
375
+ * Convenience function for quick directory ingestion.
376
+ *
377
+ * Creates an ImageIngestor, processes directory, and cleans up automatically.
378
+ *
379
+ * @param directory - Source directory path
380
+ * @param outputPath - Output .mv2 file path
381
+ * @param options - Directory ingestion options
382
+ * @returns Promise resolving to ingestion result
383
+ *
384
+ * @example
385
+ * ```typescript
386
+ * import { ingestDirectory } from 'memvid-sdk';
387
+ *
388
+ * const result = await ingestDirectory(
389
+ * './construction_drawings/',
390
+ * './project.mv2',
391
+ * {
392
+ * ocrProvider: 'doctr',
393
+ * patterns: ['*.png', '*.jpg'],
394
+ * onProgress: (d, t) => console.log(`${d}/${t}`),
395
+ * }
396
+ * );
397
+ * ```
398
+ */
399
+ async function ingestDirectory(directory, outputPath, options = {}) {
400
+ const ingestor = new ImageIngestor({
401
+ ocrProvider: options.ocrProvider,
402
+ workers: options.workers,
403
+ pythonPath: options.pythonPath,
404
+ });
405
+ try {
406
+ return await ingestor.ingestDirectory(directory, outputPath, options);
407
+ }
408
+ finally {
409
+ await ingestor.terminate();
410
+ }
411
+ }