npm - @memvid/sdk - Versions diffs - 2.0.155 → 2.0.156 - Mend

@memvid/sdk 2.0.155 → 2.0.156

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/image-ingest.d.ts ADDED Viewed

@@ -0,0 +1,250 @@
+/**
+ * High-performance batch image ingestion for Memvid SDK (Node.js).
+ *
+ * Uses OCR to extract text from images, then ingests into a .mv2 memory file.
+ * docTR (via Python) provides highest accuracy (85.3%), Tesseract.js is available as optional dependency.
+ *
+ * @example
+ * ```typescript
+ * import { ImageIngestor } from '@memvid/sdk';
+ *
+ * // First install tesseract.js: npm install tesseract.js
+ * const ingestor = new ImageIngestor({
+ *   ocrProvider: 'tesseract',
+ *   workers: 4,
+ * });
+ *
+ * const result = await ingestor.ingestDirectory(
+ *   './construction_drawings/',
+ *   './project.mv2',
+ *   {
+ *     patterns: ['*.png', '*.jpg'],
+ *     onProgress: (done, total) => console.log(`${done}/${total}`),
+ *   }
+ * );
+ *
+ * console.log(`Processed ${result.totalImages} images`);
+ * await ingestor.terminate();
+ * ```
+ *
+ * For highest accuracy (85.3%), use docTR via Python:
+ * ```typescript
+ * // Requires: pip install python-doctr[torch]
+ * const ingestor = new ImageIngestor({ ocrProvider: 'doctr' });
+ * ```
+ */
+import { OCRProviderType } from './ocr';
+/**
+ * Options for image ingestion.
+ */
+export interface ImageIngestOptions {
+    /** Minimum OCR confidence threshold (0-1). Default: 0.3 */
+    minConfidence?: number;
+    /** Use fallback OCR on low confidence. Default: true */
+    fallbackOcr?: boolean;
+    /** Images to process per batch. Default: 10 */
+    batchSize?: number;
+    /** Metadata to attach to all ingested frames */
+    metadata?: Record<string, unknown>;
+    /** Label for ingested frames. Default: 'image-extract' */
+    label?: string;
+}
+/**
+ * Options for directory ingestion.
+ */
+export interface DirectoryIngestOptions extends ImageIngestOptions {
+    /** Glob patterns for files to include. Default: ['*.png', '*.jpg', '*.jpeg', '*.tiff'] */
+    patterns?: string[];
+    /** Search subdirectories. Default: true */
+    recursive?: boolean;
+    /** Progress callback */
+    onProgress?: (completed: number, total: number) => void;
+}
+/**
+ * Options for array-based ingestion.
+ */
+export interface ImagesIngestOptions extends ImageIngestOptions {
+    /** Progress callback */
+    onProgress?: (completed: number, total: number) => void;
+}
+/**
+ * Result from batch image ingestion.
+ */
+export interface ImageIngestResult {
+    /** Total images processed */
+    totalImages: number;
+    /** Successfully ingested images */
+    successful: number;
+    /** Failed images */
+    failed: number;
+    /** Total chunks/frames created */
+    totalChunks: number;
+    /** Processing time in seconds */
+    elapsedSeconds: number;
+    /** Output file size in bytes */
+    outputSizeBytes: number;
+    /** Errors encountered */
+    errors: Array<{
+        path: string;
+        error: string;
+    }>;
+    /** Images processed per second */
+    imagesPerSecond: number;
+    /** Output size in MB */
+    outputSizeMb: number;
+}
+/**
+ * Constructor options for ImageIngestor.
+ */
+export interface ImageIngestorOptions {
+    /** OCR provider: 'tesseract', 'doctr', or 'easyocr'. Default: 'tesseract' */
+    ocrProvider?: OCRProviderType;
+    /** Number of parallel workers. Default: CPU count */
+    workers?: number;
+    /** Python path for doctr/easyocr providers */
+    pythonPath?: string;
+}
+/**
+ * High-performance batch image ingestor for Memvid.
+ *
+ * Combines OCR text extraction with parallel processing for fast, accurate
+ * ingestion of large image collections.
+ *
+ * OCR Accuracy (tested on construction drawings):
+ *   - docTR (Python): 85.3% - BEST
+ *   - EasyOCR (Python): 79.4%
+ *   - Tesseract.js: ~50-60%
+ *
+ * @example
+ * ```typescript
+ * const ingestor = new ImageIngestor({
+ *   ocrProvider: 'doctr',
+ *   workers: 8,
+ * });
+ *
+ * const result = await ingestor.ingestDirectory('./drawings/', './output.mv2');
+ * console.log(`Processed ${result.totalImages} images in ${result.elapsedSeconds}s`);
+ *
+ * await ingestor.terminate();
+ * ```
+ */
+export declare class ImageIngestor {
+    private _ocr;
+    private _fallbackOcr;
+    private _workers;
+    private _ocrType;
+    constructor(options?: ImageIngestorOptions);
+    /** Primary OCR provider name */
+    get ocrName(): string;
+    /** Number of parallel workers */
+    get workers(): number;
+    /**
+     * Ingest multiple images into a .mv2 file.
+     *
+     * @param paths - Array of image file paths
+     * @param outputPath - Output .mv2 file path
+     * @param options - Ingestion options
+     * @returns Promise resolving to ingestion result
+     *
+     * @example
+     * ```typescript
+     * const result = await ingestor.ingestImages(
+     *   ['img1.png', 'img2.png'],
+     *   './output.mv2',
+     *   { onProgress: (d, t) => console.log(`${d}/${t}`) }
+     * );
+     * ```
+     */
+    ingestImages(paths: string[], outputPath: string, options?: ImagesIngestOptions): Promise<ImageIngestResult>;
+    /**
+     * Ingest all matching images from a directory.
+     *
+     * @param directory - Source directory path
+     * @param outputPath - Output .mv2 file path
+     * @param options - Directory ingestion options
+     * @returns Promise resolving to ingestion result
+     *
+     * @example
+     * ```typescript
+     * const result = await ingestor.ingestDirectory(
+     *   './construction_drawings/',
+     *   './project.mv2',
+     *   {
+     *     patterns: ['*.png', '*.jpg'],
+     *     recursive: true,
+     *     onProgress: (d, t) => console.log(`${d}/${t}`),
+     *   }
+     * );
+     * ```
+     */
+    ingestDirectory(directory: string, outputPath: string, options?: DirectoryIngestOptions): Promise<ImageIngestResult>;
+    /**
+     * Extract text from a single image with fallback support.
+     */
+    private _extractText;
+    /**
+     * Clean up OCR worker resources.
+     *
+     * Call this when done using the ingestor to free memory.
+     */
+    terminate(): Promise<void>;
+}
+/**
+ * Convenience function for quick image ingestion.
+ *
+ * Creates an ImageIngestor, processes images, and cleans up automatically.
+ *
+ * @param paths - Array of image file paths
+ * @param outputPath - Output .mv2 file path
+ * @param options - Ingestion options
+ * @returns Promise resolving to ingestion result
+ *
+ * @example
+ * ```typescript
+ * import { ingestImages } from 'memvid-sdk';
+ *
+ * const result = await ingestImages(
+ *   ['img1.png', 'img2.png'],
+ *   './output.mv2',
+ *   {
+ *     ocrProvider: 'doctr',
+ *     onProgress: (d, t) => console.log(`${d}/${t}`),
+ *   }
+ * );
+ * ```
+ */
+export declare function ingestImages(paths: string[], outputPath: string, options?: ImagesIngestOptions & {
+    ocrProvider?: OCRProviderType;
+    workers?: number;
+    pythonPath?: string;
+}): Promise<ImageIngestResult>;
+/**
+ * Convenience function for quick directory ingestion.
+ *
+ * Creates an ImageIngestor, processes directory, and cleans up automatically.
+ *
+ * @param directory - Source directory path
+ * @param outputPath - Output .mv2 file path
+ * @param options - Directory ingestion options
+ * @returns Promise resolving to ingestion result
+ *
+ * @example
+ * ```typescript
+ * import { ingestDirectory } from 'memvid-sdk';
+ *
+ * const result = await ingestDirectory(
+ *   './construction_drawings/',
+ *   './project.mv2',
+ *   {
+ *     ocrProvider: 'doctr',
+ *     patterns: ['*.png', '*.jpg'],
+ *     onProgress: (d, t) => console.log(`${d}/${t}`),
+ *   }
+ * );
+ * ```
+ */
+export declare function ingestDirectory(directory: string, outputPath: string, options?: DirectoryIngestOptions & {
+    ocrProvider?: OCRProviderType;
+    workers?: number;
+    pythonPath?: string;
+}): Promise<ImageIngestResult>;

package/dist/image-ingest.js ADDED Viewed

@@ -0,0 +1,411 @@
+"use strict";
+/**
+ * High-performance batch image ingestion for Memvid SDK (Node.js).
+ *
+ * Uses OCR to extract text from images, then ingests into a .mv2 memory file.
+ * docTR (via Python) provides highest accuracy (85.3%), Tesseract.js is available as optional dependency.
+ *
+ * @example
+ * ```typescript
+ * import { ImageIngestor } from '@memvid/sdk';
+ *
+ * // First install tesseract.js: npm install tesseract.js
+ * const ingestor = new ImageIngestor({
+ *   ocrProvider: 'tesseract',
+ *   workers: 4,
+ * });
+ *
+ * const result = await ingestor.ingestDirectory(
+ *   './construction_drawings/',
+ *   './project.mv2',
+ *   {
+ *     patterns: ['*.png', '*.jpg'],
+ *     onProgress: (done, total) => console.log(`${done}/${total}`),
+ *   }
+ * );
+ *
+ * console.log(`Processed ${result.totalImages} images`);
+ * await ingestor.terminate();
+ * ```
+ *
+ * For highest accuracy (85.3%), use docTR via Python:
+ * ```typescript
+ * // Requires: pip install python-doctr[torch]
+ * const ingestor = new ImageIngestor({ ocrProvider: 'doctr' });
+ * ```
+ */
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.ImageIngestor = void 0;
+exports.ingestImages = ingestImages;
+exports.ingestDirectory = ingestDirectory;
+const path = __importStar(require("path"));
+const fs = __importStar(require("fs/promises"));
+const os = __importStar(require("os"));
+const ocr_1 = require("./ocr");
+/**
+ * High-performance batch image ingestor for Memvid.
+ *
+ * Combines OCR text extraction with parallel processing for fast, accurate
+ * ingestion of large image collections.
+ *
+ * OCR Accuracy (tested on construction drawings):
+ *   - docTR (Python): 85.3% - BEST
+ *   - EasyOCR (Python): 79.4%
+ *   - Tesseract.js: ~50-60%
+ *
+ * @example
+ * ```typescript
+ * const ingestor = new ImageIngestor({
+ *   ocrProvider: 'doctr',
+ *   workers: 8,
+ * });
+ *
+ * const result = await ingestor.ingestDirectory('./drawings/', './output.mv2');
+ * console.log(`Processed ${result.totalImages} images in ${result.elapsedSeconds}s`);
+ *
+ * await ingestor.terminate();
+ * ```
+ */
+class ImageIngestor {
+    constructor(options = {}) {
+        this._fallbackOcr = null;
+        this._ocrType = options.ocrProvider ?? 'tesseract';
+        this._ocr = (0, ocr_1.getOCRProvider)(this._ocrType, {
+            pythonPath: options.pythonPath,
+        });
+        this._workers = options.workers ?? os.cpus().length;
+        // Initialize fallback OCR only for single-engine providers (not ensemble which already combines engines)
+        if (this._ocrType !== 'tesseract' && this._ocrType !== 'ensemble') {
+            this._fallbackOcr = new ocr_1.TesseractOCR();
+        }
+    }
+    /** Primary OCR provider name */
+    get ocrName() {
+        return this._ocr.name;
+    }
+    /** Number of parallel workers */
+    get workers() {
+        return this._workers;
+    }
+    /**
+     * Ingest multiple images into a .mv2 file.
+     *
+     * @param paths - Array of image file paths
+     * @param outputPath - Output .mv2 file path
+     * @param options - Ingestion options
+     * @returns Promise resolving to ingestion result
+     *
+     * @example
+     * ```typescript
+     * const result = await ingestor.ingestImages(
+     *   ['img1.png', 'img2.png'],
+     *   './output.mv2',
+     *   { onProgress: (d, t) => console.log(`${d}/${t}`) }
+     * );
+     * ```
+     */
+    async ingestImages(paths, outputPath, options = {}) {
+        const startTime = Date.now();
+        const total = paths.length;
+        const batchSize = options.batchSize ?? 10;
+        const minConfidence = options.minConfidence ?? 0.3;
+        const label = options.label ?? 'image-extract';
+        const useFallback = options.fallbackOcr !== false;
+        let successful = 0;
+        let failed = 0;
+        let totalChunks = 0;
+        const errors = [];
+        // Dynamically import the SDK to avoid circular dependencies
+        const { create } = await Promise.resolve().then(() => __importStar(require('./index')));
+        // Create memory file with lex index enabled for text search
+        const mem = await create(outputPath, 'basic', { enableLex: true });
+        // Process images in batches
+        for (let batchStart = 0; batchStart < total; batchStart += batchSize) {
+            const batch = paths.slice(batchStart, batchStart + batchSize);
+            // Process batch with parallel OCR
+            const results = await Promise.allSettled(batch.map(async (imagePath) => {
+                const result = await this._extractText(imagePath, minConfidence, useFallback);
+                return { path: imagePath, result };
+            }));
+            // Collect results
+            const textsToIngest = [];
+            for (const settled of results) {
+                if (settled.status === 'fulfilled') {
+                    const { path: filePath, result } = settled.value;
+                    if (result.confidence >= minConfidence) {
+                        textsToIngest.push({
+                            text: result.text,
+                            title: path.basename(filePath, path.extname(filePath)),
+                            metadata: {
+                                ...options.metadata,
+                                sourceFile: filePath,
+                                confidence: result.confidence,
+                                regions: result.regions.length,
+                                ocrProvider: result.metadata?.version ?? this._ocr.name,
+                            },
+                        });
+                        successful++;
+                    }
+                    else {
+                        errors.push({
+                            path: filePath,
+                            error: `Low confidence: ${(result.confidence * 100).toFixed(1)}%`,
+                        });
+                        failed++;
+                    }
+                }
+                else {
+                    failed++;
+                    errors.push({
+                        path: batch[results.indexOf(settled)] ?? 'unknown',
+                        error: settled.reason?.message ?? 'Unknown error',
+                    });
+                }
+            }
+            // Ingest batch into memory using putMany for speed
+            if (textsToIngest.length > 0) {
+                await mem.putMany(textsToIngest.map(item => ({
+                    text: item.text,
+                    title: item.title,
+                    label,
+                    metadata: item.metadata,
+                })));
+                totalChunks += textsToIngest.length;
+            }
+            // Progress callback
+            const completed = Math.min(batchStart + batch.length, total);
+            options.onProgress?.(completed, total);
+        }
+        // Finalize memory
+        await mem.seal();
+        const elapsedSeconds = (Date.now() - startTime) / 1000;
+        let outputSizeBytes = 0;
+        try {
+            const stats = await fs.stat(outputPath);
+            outputSizeBytes = stats.size;
+        }
+        catch {
+            // File may not exist if no successful ingestions
+        }
+        return {
+            totalImages: total,
+            successful,
+            failed,
+            totalChunks,
+            elapsedSeconds,
+            outputSizeBytes,
+            errors,
+            imagesPerSecond: elapsedSeconds > 0 ? total / elapsedSeconds : 0,
+            outputSizeMb: outputSizeBytes / (1024 * 1024),
+        };
+    }
+    /**
+     * Ingest all matching images from a directory.
+     *
+     * @param directory - Source directory path
+     * @param outputPath - Output .mv2 file path
+     * @param options - Directory ingestion options
+     * @returns Promise resolving to ingestion result
+     *
+     * @example
+     * ```typescript
+     * const result = await ingestor.ingestDirectory(
+     *   './construction_drawings/',
+     *   './project.mv2',
+     *   {
+     *     patterns: ['*.png', '*.jpg'],
+     *     recursive: true,
+     *     onProgress: (d, t) => console.log(`${d}/${t}`),
+     *   }
+     * );
+     * ```
+     */
+    async ingestDirectory(directory, outputPath, options = {}) {
+        const patterns = options.patterns ?? ['*.png', '*.jpg', '*.jpeg', '*.tiff'];
+        const recursive = options.recursive ?? true;
+        // Convert glob patterns to extensions (e.g., '*.png' -> '.png')
+        const extensions = new Set(patterns.map((p) => {
+            const ext = p.replace(/^\*/, '').toLowerCase();
+            return ext.startsWith('.') ? ext : `.${ext}`;
+        }));
+        // Use native fs.readdir with recursive option (Node 18+)
+        const allFiles = await fs.readdir(directory, {
+            recursive,
+            withFileTypes: true,
+        });
+        // Filter to matching image files
+        const imagePaths = allFiles
+            .filter((entry) => {
+            if (!entry.isFile())
+                return false;
+            const ext = path.extname(entry.name).toLowerCase();
+            return extensions.has(ext);
+        })
+            .map((entry) => {
+            // entry.parentPath is available in Node 20+, fallback to entry.path for Node 18-19
+            const parentPath = entry.parentPath ?? entry.path ?? directory;
+            return path.join(parentPath, entry.name);
+        });
+        // Sort for deterministic ordering
+        const sortedPaths = imagePaths.sort();
+        return this.ingestImages(sortedPaths, outputPath, {
+            minConfidence: options.minConfidence,
+            fallbackOcr: options.fallbackOcr,
+            batchSize: options.batchSize,
+            metadata: options.metadata,
+            label: options.label,
+            onProgress: options.onProgress,
+        });
+    }
+    /**
+     * Extract text from a single image with fallback support.
+     */
+    async _extractText(imagePath, minConfidence, useFallback) {
+        try {
+            // Try primary OCR
+            const result = await this._ocr.extractText(imagePath);
+            // Check if confidence is acceptable
+            if (result.confidence >= minConfidence) {
+                return result;
+            }
+            // Try fallback if enabled and available
+            if (useFallback && this._fallbackOcr) {
+                const fallbackResult = await this._fallbackOcr.extractText(imagePath);
+                if (fallbackResult.confidence > result.confidence) {
+                    return fallbackResult;
+                }
+            }
+            return result;
+        }
+        catch (error) {
+            // Fallback on error
+            if (useFallback && this._fallbackOcr) {
+                return this._fallbackOcr.extractText(imagePath);
+            }
+            throw error;
+        }
+    }
+    /**
+     * Clean up OCR worker resources.
+     *
+     * Call this when done using the ingestor to free memory.
+     */
+    async terminate() {
+        if (this._ocr.terminate) {
+            await this._ocr.terminate();
+        }
+        if (this._fallbackOcr) {
+            await this._fallbackOcr.terminate();
+            this._fallbackOcr = null;
+        }
+    }
+}
+exports.ImageIngestor = ImageIngestor;
+/**
+ * Convenience function for quick image ingestion.
+ *
+ * Creates an ImageIngestor, processes images, and cleans up automatically.
+ *
+ * @param paths - Array of image file paths
+ * @param outputPath - Output .mv2 file path
+ * @param options - Ingestion options
+ * @returns Promise resolving to ingestion result
+ *
+ * @example
+ * ```typescript
+ * import { ingestImages } from 'memvid-sdk';
+ *
+ * const result = await ingestImages(
+ *   ['img1.png', 'img2.png'],
+ *   './output.mv2',
+ *   {
+ *     ocrProvider: 'doctr',
+ *     onProgress: (d, t) => console.log(`${d}/${t}`),
+ *   }
+ * );
+ * ```
+ */
+async function ingestImages(paths, outputPath, options = {}) {
+    const ingestor = new ImageIngestor({
+        ocrProvider: options.ocrProvider,
+        workers: options.workers,
+        pythonPath: options.pythonPath,
+    });
+    try {
+        return await ingestor.ingestImages(paths, outputPath, options);
+    }
+    finally {
+        await ingestor.terminate();
+    }
+}
+/**
+ * Convenience function for quick directory ingestion.
+ *
+ * Creates an ImageIngestor, processes directory, and cleans up automatically.
+ *
+ * @param directory - Source directory path
+ * @param outputPath - Output .mv2 file path
+ * @param options - Directory ingestion options
+ * @returns Promise resolving to ingestion result
+ *
+ * @example
+ * ```typescript
+ * import { ingestDirectory } from 'memvid-sdk';
+ *
+ * const result = await ingestDirectory(
+ *   './construction_drawings/',
+ *   './project.mv2',
+ *   {
+ *     ocrProvider: 'doctr',
+ *     patterns: ['*.png', '*.jpg'],
+ *     onProgress: (d, t) => console.log(`${d}/${t}`),
+ *   }
+ * );
+ * ```
+ */
+async function ingestDirectory(directory, outputPath, options = {}) {
+    const ingestor = new ImageIngestor({
+        ocrProvider: options.ocrProvider,
+        workers: options.workers,
+        pythonPath: options.pythonPath,
+    });
+    try {
+        return await ingestor.ingestDirectory(directory, outputPath, options);
+    }
+    finally {
+        await ingestor.terminate();
+    }
+}