@memvid/sdk 2.0.155 → 2.0.157

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -36,10 +36,14 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
36
36
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
37
37
  };
38
38
  Object.defineProperty(exports, "__esModule", { value: true });
39
- exports.isTelemetryEnabled = exports.flushAnalytics = exports.LOCAL_EMBEDDING_MODELS = exports.MODEL_DIMENSIONS = exports.getEmbedder = exports.OllamaEmbeddings = exports.MistralEmbeddings = exports.GeminiEmbeddings = exports.NvidiaEmbeddings = exports.VoyageEmbeddings = exports.CohereEmbeddings = exports.OpenAIEmbeddings = exports.getErrorSuggestion = exports.QuotaExceededError = exports.EncryptionError = exports.EmbeddingFailedError = exports.VecDimensionMismatchError = exports.CorruptFileError = exports.VecIndexDisabledError = exports.FrameNotFoundError = exports.MemoryAlreadyBoundError = exports.FileNotFoundError = exports.ApiKeyRequiredError = exports.LockedError = exports.VerificationFailedError = exports.TimeIndexMissingError = exports.LexIndexDisabledError = exports.TicketReplayError = exports.TicketInvalidError = exports.CapacityExceededError = exports.MemvidError = exports.use = exports.GeminiEntities = exports.ClaudeEntities = exports.OpenAIEntities = exports.LocalNER = exports.getEntityExtractor = exports.GeminiClip = exports.OpenAIClip = exports.LocalClip = exports.getClipProvider = exports.entities = exports.clip = void 0;
39
+ exports.getEmbedder = exports.OllamaEmbeddings = exports.MistralEmbeddings = exports.GeminiEmbeddings = exports.NvidiaEmbeddings = exports.VoyageEmbeddings = exports.CohereEmbeddings = exports.OpenAIEmbeddings = exports.getErrorSuggestion = exports.QuotaExceededError = exports.EncryptionError = exports.EmbeddingFailedError = exports.VecDimensionMismatchError = exports.CorruptFileError = exports.VecIndexDisabledError = exports.FrameNotFoundError = exports.MemoryAlreadyBoundError = exports.FileNotFoundError = exports.ApiKeyRequiredError = exports.LockedError = exports.VerificationFailedError = exports.TimeIndexMissingError = exports.LexIndexDisabledError = exports.TicketReplayError = exports.TicketInvalidError = exports.CapacityExceededError = exports.MemvidError = exports.use = exports.ingestDirectory = exports.ingestImages = exports.ImageIngestor = exports.EasyOCRSubprocess = exports.DocTRSubprocess = exports.TesseractOCR = exports.PaddleOCR = exports.EnsembleOCR = exports.getOCRProvider = exports.GeminiEntities = exports.ClaudeEntities = exports.OpenAIEntities = exports.LocalNER = exports.getEntityExtractor = exports.GeminiClip = exports.OpenAIClip = exports.LocalClip = exports.getClipProvider = exports.imageIngest = exports.ocr = exports.entities = exports.clip = void 0;
40
+ exports.isTelemetryEnabled = exports.flushAnalytics = exports.LOCAL_EMBEDDING_MODELS = exports.MODEL_DIMENSIONS = void 0;
40
41
  exports.configure = configure;
41
42
  exports.getConfig = getConfig;
42
43
  exports.resetConfig = resetConfig;
44
+ exports.getAclScopeFromApiKey = getAclScopeFromApiKey;
45
+ exports.aclContextFromScope = aclContextFromScope;
46
+ exports.aclMetadataFromScope = aclMetadataFromScope;
43
47
  exports.validateConfig = validateConfig;
44
48
  exports.resolveMemory = resolveMemory;
45
49
  exports.createMemory = createMemory;
@@ -79,6 +83,9 @@ require("./adapters/mcp");
79
83
  exports.clip = __importStar(require("./clip"));
80
84
  exports.entities = __importStar(require("./entities"));
81
85
  __exportStar(require("./embeddings"), exports);
86
+ // Export OCR and image ingestion modules
87
+ exports.ocr = __importStar(require("./ocr"));
88
+ exports.imageIngest = __importStar(require("./image-ingest"));
82
89
  // Re-export provider factories for convenience
83
90
  var clip_1 = require("./clip");
84
91
  Object.defineProperty(exports, "getClipProvider", { enumerable: true, get: function () { return clip_1.getClipProvider; } });
@@ -91,6 +98,18 @@ Object.defineProperty(exports, "LocalNER", { enumerable: true, get: function ()
91
98
  Object.defineProperty(exports, "OpenAIEntities", { enumerable: true, get: function () { return entities_1.OpenAIEntities; } });
92
99
  Object.defineProperty(exports, "ClaudeEntities", { enumerable: true, get: function () { return entities_1.ClaudeEntities; } });
93
100
  Object.defineProperty(exports, "GeminiEntities", { enumerable: true, get: function () { return entities_1.GeminiEntities; } });
101
+ // Re-export OCR providers and image ingestion
102
+ var ocr_1 = require("./ocr");
103
+ Object.defineProperty(exports, "getOCRProvider", { enumerable: true, get: function () { return ocr_1.getOCRProvider; } });
104
+ Object.defineProperty(exports, "EnsembleOCR", { enumerable: true, get: function () { return ocr_1.EnsembleOCR; } });
105
+ Object.defineProperty(exports, "PaddleOCR", { enumerable: true, get: function () { return ocr_1.PaddleOCR; } });
106
+ Object.defineProperty(exports, "TesseractOCR", { enumerable: true, get: function () { return ocr_1.TesseractOCR; } });
107
+ Object.defineProperty(exports, "DocTRSubprocess", { enumerable: true, get: function () { return ocr_1.DocTRSubprocess; } });
108
+ Object.defineProperty(exports, "EasyOCRSubprocess", { enumerable: true, get: function () { return ocr_1.EasyOCRSubprocess; } });
109
+ var image_ingest_1 = require("./image-ingest");
110
+ Object.defineProperty(exports, "ImageIngestor", { enumerable: true, get: function () { return image_ingest_1.ImageIngestor; } });
111
+ Object.defineProperty(exports, "ingestImages", { enumerable: true, get: function () { return image_ingest_1.ingestImages; } });
112
+ Object.defineProperty(exports, "ingestDirectory", { enumerable: true, get: function () { return image_ingest_1.ingestDirectory; } });
94
113
  let globalConfig = {};
95
114
  /**
96
115
  * Configure global defaults for the Memvid SDK.
@@ -140,6 +159,147 @@ function getConfig() {
140
159
  */
141
160
  function resetConfig() {
142
161
  globalConfig = {};
162
+ aclScopeCache.clear();
163
+ }
164
+ const DEFAULT_ACL_SCOPE = {
165
+ tenantId: null,
166
+ subjectId: null,
167
+ roles: [],
168
+ groupIds: [],
169
+ enforcementMode: "audit",
170
+ };
171
+ const aclScopeCache = new Map();
172
+ function resolveDashboardUrl(override) {
173
+ return (override ||
174
+ globalConfig.dashboardUrl ||
175
+ process.env.MEMVID_DASHBOARD_URL ||
176
+ "https://memvid.com").replace(/\/$/, "");
177
+ }
178
+ function normalizeAclString(value) {
179
+ if (typeof value !== "string") {
180
+ return null;
181
+ }
182
+ const trimmed = value.trim();
183
+ if (!trimmed) {
184
+ return null;
185
+ }
186
+ return trimmed.toLowerCase();
187
+ }
188
+ function normalizeAclStringList(value) {
189
+ if (!Array.isArray(value)) {
190
+ return [];
191
+ }
192
+ const unique = new Set();
193
+ for (const item of value) {
194
+ const normalized = normalizeAclString(item);
195
+ if (normalized) {
196
+ unique.add(normalized);
197
+ }
198
+ }
199
+ return Array.from(unique).sort();
200
+ }
201
+ function parseTicketEnvelope(payload) {
202
+ if (!payload || typeof payload !== "object" || Array.isArray(payload)) {
203
+ return null;
204
+ }
205
+ const root = payload;
206
+ const data = root.data;
207
+ if (data && typeof data === "object" && !Array.isArray(data)) {
208
+ return data;
209
+ }
210
+ return root;
211
+ }
212
+ function normalizeApiKeyAclScopePayload(value) {
213
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
214
+ return { ...DEFAULT_ACL_SCOPE };
215
+ }
216
+ const source = value;
217
+ return {
218
+ tenantId: normalizeAclString(source.tenant_id ?? source.tenantId),
219
+ subjectId: normalizeAclString(source.subject_id ?? source.subjectId),
220
+ roles: normalizeAclStringList(source.roles),
221
+ groupIds: normalizeAclStringList(source.group_ids ?? source.groupIds),
222
+ enforcementMode: source.enforcement_mode === "enforce" || source.enforcementMode === "enforce"
223
+ ? "enforce"
224
+ : "audit",
225
+ };
226
+ }
227
+ /**
228
+ * Fetch ACL scope attached to the current Memvid API key.
229
+ *
230
+ * This reads `/api/ticket` from the dashboard control plane and returns
231
+ * the normalized ACL scope that can be reused for ingestion and retrieval.
232
+ */
233
+ async function getAclScopeFromApiKey(options) {
234
+ const apiKey = (options?.apiKey || globalConfig.apiKey || process.env.MEMVID_API_KEY || "").trim();
235
+ if (!apiKey) {
236
+ throw new error_1.ApiKeyRequiredError("API key required for getAclScopeFromApiKey(). Set configure({ apiKey: 'mv2_...' }) or MEMVID_API_KEY.");
237
+ }
238
+ if (!isMemvidApiKey(apiKey)) {
239
+ throw new error_1.ApiKeyRequiredError("Invalid API key format (expected mv2_*).");
240
+ }
241
+ const dashboardUrl = resolveDashboardUrl(options?.dashboardUrl);
242
+ const cacheKey = `${dashboardUrl}|${apiKey}`;
243
+ if (!options?.forceRefresh && aclScopeCache.has(cacheKey)) {
244
+ return { ...aclScopeCache.get(cacheKey) };
245
+ }
246
+ const timeoutMs = options?.timeoutMs && options.timeoutMs > 0 ? options.timeoutMs : 5000;
247
+ const response = await fetch(`${dashboardUrl}/api/ticket`, {
248
+ method: "GET",
249
+ headers: { "x-api-key": apiKey },
250
+ signal: AbortSignal.timeout(timeoutMs),
251
+ });
252
+ if (!response.ok) {
253
+ const body = await response.text().catch(() => "");
254
+ const message = response.status === 401
255
+ ? "Invalid API key. Get a valid key at https://memvid.com/dashboard/api-keys"
256
+ : `Failed to fetch ACL scope from ${dashboardUrl}/api/ticket (${response.status})`;
257
+ throw new error_1.MemvidError("MV022", message, body ? { body } : undefined);
258
+ }
259
+ const raw = await response.json().catch(() => ({}));
260
+ const ticket = parseTicketEnvelope(raw);
261
+ const scope = normalizeApiKeyAclScopePayload(ticket?.acl_scope);
262
+ aclScopeCache.set(cacheKey, scope);
263
+ return { ...scope };
264
+ }
265
+ /**
266
+ * Convert API-key ACL scope into query ACL context.
267
+ */
268
+ function aclContextFromScope(scope) {
269
+ if (!scope?.tenantId) {
270
+ return undefined;
271
+ }
272
+ const context = { tenantId: scope.tenantId };
273
+ if (scope.subjectId)
274
+ context.subjectId = scope.subjectId;
275
+ if (scope.roles.length > 0)
276
+ context.roles = [...scope.roles];
277
+ if (scope.groupIds.length > 0)
278
+ context.groupIds = [...scope.groupIds];
279
+ return context;
280
+ }
281
+ /**
282
+ * Build ACL metadata for ingest from an API-key ACL scope.
283
+ */
284
+ function aclMetadataFromScope(scope, options) {
285
+ if (!scope?.tenantId) {
286
+ throw new error_1.MemvidError("MV999", "ACL scope is missing tenantId. Create an ACL-scoped API key (tenant required) before ingesting restricted data.");
287
+ }
288
+ const mode = options?.visibility ?? "auto";
289
+ const hasReaders = !!scope.subjectId || scope.roles.length > 0 || scope.groupIds.length > 0;
290
+ const visibility = mode === "auto" ? (hasReaders ? "restricted" : "public") : mode;
291
+ const metadata = {
292
+ acl_tenant_id: scope.tenantId,
293
+ acl_visibility: visibility,
294
+ acl_policy_version: options?.policyVersion || "v1",
295
+ };
296
+ if (scope.subjectId)
297
+ metadata.acl_read_principals = [scope.subjectId];
298
+ if (scope.roles.length > 0)
299
+ metadata.acl_read_roles = [...scope.roles];
300
+ if (scope.groupIds.length > 0)
301
+ metadata.acl_read_groups = [...scope.groupIds];
302
+ return metadata;
143
303
  }
144
304
  /**
145
305
  * Validate the current configuration by testing API connectivity.
@@ -845,13 +1005,17 @@ function normaliseFindOptions(opts) {
845
1005
  cursor: opts.cursor,
846
1006
  asOfFrame: opts.asOfFrame,
847
1007
  asOfTs: opts.asOfTs,
1008
+ aclContext: opts.aclContext,
1009
+ aclEnforcementMode: opts.aclEnforcementMode,
848
1010
  };
849
1011
  if (payload.k == null &&
850
1012
  payload.snippetChars == null &&
851
1013
  payload.scope == null &&
852
1014
  payload.cursor == null &&
853
1015
  payload.asOfFrame == null &&
854
- payload.asOfTs == null) {
1016
+ payload.asOfTs == null &&
1017
+ payload.aclContext == null &&
1018
+ payload.aclEnforcementMode == null) {
855
1019
  return undefined;
856
1020
  }
857
1021
  return payload;
@@ -868,6 +1032,8 @@ function normaliseVecSearchOptions(opts) {
868
1032
  minRelevancy: opts.minRelevancy,
869
1033
  maxK: opts.maxK,
870
1034
  adaptiveStrategy: opts.adaptiveStrategy,
1035
+ aclContext: opts.aclContext,
1036
+ aclEnforcementMode: opts.aclEnforcementMode,
871
1037
  };
872
1038
  if (payload.k == null &&
873
1039
  payload.snippetChars == null &&
@@ -875,7 +1041,9 @@ function normaliseVecSearchOptions(opts) {
875
1041
  payload.adaptive == null &&
876
1042
  payload.minRelevancy == null &&
877
1043
  payload.maxK == null &&
878
- payload.adaptiveStrategy == null) {
1044
+ payload.adaptiveStrategy == null &&
1045
+ payload.aclContext == null &&
1046
+ payload.aclEnforcementMode == null) {
879
1047
  return undefined;
880
1048
  }
881
1049
  return payload;
@@ -903,6 +1071,8 @@ function normaliseAskOptions(opts) {
903
1071
  maxK: opts.maxK,
904
1072
  adaptiveStrategy: opts.adaptiveStrategy,
905
1073
  showChunks: opts.showChunks,
1074
+ aclContext: opts.aclContext,
1075
+ aclEnforcementMode: opts.aclEnforcementMode,
906
1076
  };
907
1077
  if (payload.k == null &&
908
1078
  payload.mode == null &&
@@ -921,7 +1091,9 @@ function normaliseAskOptions(opts) {
921
1091
  payload.minRelevancy == null &&
922
1092
  payload.maxK == null &&
923
1093
  payload.adaptiveStrategy == null &&
924
- payload.showChunks == null) {
1094
+ payload.showChunks == null &&
1095
+ payload.aclContext == null &&
1096
+ payload.aclEnforcementMode == null) {
925
1097
  return undefined;
926
1098
  }
927
1099
  return payload;
package/dist/ocr.d.ts ADDED
@@ -0,0 +1,302 @@
1
+ /**
2
+ * OCR provider support for Memvid SDK (Node.js).
3
+ *
4
+ * This module provides classes for extracting text from images using OCR engines.
5
+ * Since docTR (best accuracy) is Python-native, Node.js has two options:
6
+ * 1. TesseractOCR - Built-in via tesseract.js, no external deps
7
+ * 2. DocTRSubprocess - Spawns Python for highest accuracy (85.3%)
8
+ *
9
+ * OCR Accuracy Comparison (tested on construction drawings):
10
+ * - docTR (Python): 85.3% - BEST
11
+ * - EasyOCR (Python): 79.4%
12
+ * - Tesseract.js: ~50-60% (lower than Python Tesseract)
13
+ *
14
+ * @example
15
+ * ```typescript
16
+ * import { TesseractOCR, DocTRSubprocess, getOCRProvider } from 'memvid-sdk/ocr';
17
+ *
18
+ * // Built-in Tesseract.js (no external deps)
19
+ * const ocr = new TesseractOCR();
20
+ * const result = await ocr.extractText('drawing.png');
21
+ * console.log(result.text);
22
+ *
23
+ * // For highest accuracy, use docTR via Python subprocess
24
+ * const doctrOcr = new DocTRSubprocess();
25
+ * const result2 = await doctrOcr.extractText('drawing.png');
26
+ * ```
27
+ */
28
+ /**
29
+ * Result from OCR text extraction.
30
+ */
31
+ export interface OCRResult {
32
+ /** Full extracted text */
33
+ text: string;
34
+ /** Overall confidence score (0-1) */
35
+ confidence: number;
36
+ /** Word/region-level results with bounding boxes */
37
+ regions: OCRRegion[];
38
+ /** Provider-specific metadata */
39
+ metadata: Record<string, unknown>;
40
+ }
41
+ /**
42
+ * A detected text region with bounding box.
43
+ */
44
+ export interface OCRRegion {
45
+ /** Extracted text for this region */
46
+ text: string;
47
+ /** Confidence score for this region (0-1) */
48
+ confidence: number;
49
+ /** Bounding box coordinates */
50
+ bbox: {
51
+ x: number;
52
+ y: number;
53
+ w: number;
54
+ h: number;
55
+ };
56
+ }
57
+ /**
58
+ * Abstract interface for OCR providers.
59
+ */
60
+ export interface OCRProvider {
61
+ /** Provider name (e.g., 'tesseract', 'doctr') */
62
+ readonly name: string;
63
+ /**
64
+ * Extract text from an image file.
65
+ * @param imagePath - Path to the image file
66
+ * @returns Promise resolving to OCR result
67
+ */
68
+ extractText(imagePath: string): Promise<OCRResult>;
69
+ /**
70
+ * Extract text from multiple images.
71
+ * Default implementation is sequential; override for batch processing.
72
+ * @param imagePaths - Array of image file paths
73
+ * @returns Promise resolving to array of OCR results
74
+ */
75
+ extractTextBatch?(imagePaths: string[]): Promise<OCRResult[]>;
76
+ /**
77
+ * Clean up resources (e.g., terminate workers).
78
+ */
79
+ terminate?(): Promise<void>;
80
+ }
81
+ /**
82
+ * Tesseract.js configuration options.
83
+ */
84
+ export interface TesseractConfig {
85
+ /** Language code (default: 'eng') */
86
+ lang?: string;
87
+ /** Path to trained data (optional) */
88
+ dataPath?: string;
89
+ /** Number of worker threads (default: 1) */
90
+ workerCount?: number;
91
+ }
92
+ /**
93
+ * Tesseract.js OCR provider.
94
+ *
95
+ * Built-in OCR using tesseract.js - no external dependencies required.
96
+ * Lower accuracy than docTR (~50-60% vs 85.3%) but easy to set up.
97
+ *
98
+ * @example
99
+ * ```typescript
100
+ * const ocr = new TesseractOCR();
101
+ * const result = await ocr.extractText('construction_drawing.png');
102
+ * console.log(result.text);
103
+ * console.log(`Confidence: ${(result.confidence * 100).toFixed(1)}%`);
104
+ * await ocr.terminate(); // Clean up worker
105
+ * ```
106
+ */
107
+ export declare class TesseractOCR implements OCRProvider {
108
+ private _lang;
109
+ private _dataPath?;
110
+ private _worker;
111
+ private _workerCount;
112
+ private _tesseract;
113
+ constructor(config?: TesseractConfig);
114
+ get name(): string;
115
+ private getTesseract;
116
+ private getWorker;
117
+ extractText(imagePath: string): Promise<OCRResult>;
118
+ extractTextBatch(imagePaths: string[]): Promise<OCRResult[]>;
119
+ terminate(): Promise<void>;
120
+ }
121
+ /**
122
+ * docTR via Python subprocess configuration.
123
+ */
124
+ export interface DocTRConfig {
125
+ /** Path to Python executable (default: 'python3') */
126
+ pythonPath?: string;
127
+ /** Detection architecture (default: 'db_resnet50') */
128
+ detArch?: string;
129
+ /** Recognition architecture (default: 'crnn_vgg16_bn') */
130
+ recoArch?: string;
131
+ /** Timeout in milliseconds (default: 60000) */
132
+ timeout?: number;
133
+ }
134
+ /**
135
+ * docTR OCR via Python subprocess.
136
+ *
137
+ * Highest accuracy OCR (85.3% on construction drawings) using docTR.
138
+ * Requires Python with docTR installed: `pip install python-doctr[torch]`
139
+ *
140
+ * This spawns a Python subprocess for each extraction, which adds overhead
141
+ * but provides the best accuracy for technical documents.
142
+ *
143
+ * @example
144
+ * ```typescript
145
+ * const ocr = new DocTRSubprocess();
146
+ * const result = await ocr.extractText('construction_drawing.png');
147
+ * console.log(result.text);
148
+ * console.log(`Confidence: ${(result.confidence * 100).toFixed(1)}%`);
149
+ * ```
150
+ */
151
+ export declare class DocTRSubprocess implements OCRProvider {
152
+ private _pythonPath;
153
+ private _detArch;
154
+ private _recoArch;
155
+ private _timeout;
156
+ constructor(config?: DocTRConfig);
157
+ get name(): string;
158
+ extractText(imagePath: string): Promise<OCRResult>;
159
+ extractTextBatch(imagePaths: string[]): Promise<OCRResult[]>;
160
+ }
161
+ /**
162
+ * EasyOCR via Python subprocess configuration.
163
+ */
164
+ export interface EasyOCRConfig {
165
+ /** Path to Python executable (default: 'python3') */
166
+ pythonPath?: string;
167
+ /** Languages to recognize (default: ['en']) */
168
+ languages?: string[];
169
+ /** Use GPU if available (default: false) */
170
+ gpu?: boolean;
171
+ /** Timeout in milliseconds (default: 60000) */
172
+ timeout?: number;
173
+ }
174
+ /**
175
+ * EasyOCR via Python subprocess.
176
+ *
177
+ * Good fallback OCR (79.4% on construction drawings) using EasyOCR.
178
+ * Requires Python with EasyOCR installed: `pip install easyocr`
179
+ *
180
+ * @example
181
+ * ```typescript
182
+ * const ocr = new EasyOCRSubprocess();
183
+ * const result = await ocr.extractText('construction_drawing.png');
184
+ * console.log(result.text);
185
+ * ```
186
+ */
187
+ export declare class EasyOCRSubprocess implements OCRProvider {
188
+ private _pythonPath;
189
+ private _languages;
190
+ private _gpu;
191
+ private _timeout;
192
+ constructor(config?: EasyOCRConfig);
193
+ get name(): string;
194
+ extractText(imagePath: string): Promise<OCRResult>;
195
+ }
196
+ /**
197
+ * PaddleOCR configuration options.
198
+ */
199
+ export interface PaddleOCRConfig {
200
+ /** Apply high contrast preprocessing (default: true, improves accuracy by ~25%) */
201
+ preprocessing?: boolean;
202
+ /** Contrast multiplier for preprocessing (default: 1.5) */
203
+ contrast?: number;
204
+ }
205
+ /**
206
+ * PaddleOCR via @gutenye/ocr-node (ONNX runtime).
207
+ *
208
+ * Fast and accurate OCR using PaddleOCR PP-OCRv4 model.
209
+ * Runs natively in Node.js via ONNX - no Python required.
210
+ *
211
+ * Requires: npm install @gutenye/ocr-node
212
+ *
213
+ * Performance (on construction drawings):
214
+ * - Speed: ~0.35s
215
+ * - Accuracy: ~75% with preprocessing (vs 50% without)
216
+ *
217
+ * @example
218
+ * ```typescript
219
+ * const ocr = new PaddleOCR();
220
+ * const result = await ocr.extractText('drawing.png');
221
+ * console.log(result.text);
222
+ * ```
223
+ */
224
+ export declare class PaddleOCR implements OCRProvider {
225
+ private _ocr;
226
+ private _sharp;
227
+ private _preprocessing;
228
+ private _contrast;
229
+ constructor(config?: PaddleOCRConfig);
230
+ get name(): string;
231
+ private getOcr;
232
+ private getSharp;
233
+ private preprocessImage;
234
+ extractText(imagePath: string): Promise<OCRResult>;
235
+ extractTextBatch(imagePaths: string[]): Promise<OCRResult[]>;
236
+ }
237
+ /**
238
+ * Ensemble OCR configuration options.
239
+ */
240
+ export interface EnsembleOCRConfig {
241
+ /** Contrast multiplier for preprocessing (default: 1.5) */
242
+ contrast?: number;
243
+ /** Enable sharpening (default: true) */
244
+ sharpen?: boolean;
245
+ }
246
+ /**
247
+ * Ensemble OCR - combines PaddleOCR + Tesseract for maximum accuracy.
248
+ *
249
+ * Achieves ~100% accuracy by running multiple OCR engines in parallel
250
+ * with different preprocessing, then merging results.
251
+ *
252
+ * Performance:
253
+ * - Accuracy: ~100% (vs 75% for PaddleOCR alone)
254
+ * - Speed: ~900ms (parallel execution)
255
+ *
256
+ * Requires: npm install @gutenye/ocr-node tesseract.js
257
+ *
258
+ * @example
259
+ * ```typescript
260
+ * const ocr = new EnsembleOCR();
261
+ * const result = await ocr.extractText('drawing.png');
262
+ * console.log(result.text); // Maximum accuracy
263
+ * ```
264
+ */
265
+ export declare class EnsembleOCR implements OCRProvider {
266
+ private _paddleOcr;
267
+ private _tessWorker;
268
+ private _sharp;
269
+ private _contrast;
270
+ private _sharpen;
271
+ constructor(config?: EnsembleOCRConfig);
272
+ get name(): string;
273
+ private getPaddleOcr;
274
+ private getTessWorker;
275
+ private getSharp;
276
+ extractText(imagePath: string): Promise<OCRResult>;
277
+ terminate(): Promise<void>;
278
+ }
279
+ /**
280
+ * OCR provider type for factory function.
281
+ */
282
+ export type OCRProviderType = 'ensemble' | 'paddle' | 'tesseract' | 'doctr' | 'easyocr';
283
+ /**
284
+ * Factory function to create an OCR provider.
285
+ *
286
+ * @param provider - Provider type: 'paddle', 'tesseract', 'doctr', or 'easyocr'
287
+ * @param config - Provider-specific configuration
288
+ * @returns OCR provider instance
289
+ *
290
+ * @example
291
+ * ```typescript
292
+ * // PaddleOCR via ONNX (fast, recommended for Node.js)
293
+ * const ocr1 = getOCRProvider('paddle');
294
+ *
295
+ * // Tesseract.js (fallback)
296
+ * const ocr2 = getOCRProvider('tesseract');
297
+ *
298
+ * // docTR via Python (best accuracy: 85%)
299
+ * const ocr3 = getOCRProvider('doctr');
300
+ * ```
301
+ */
302
+ export declare function getOCRProvider(provider?: OCRProviderType, config?: TesseractConfig | DocTRConfig | EasyOCRConfig | PaddleOCRConfig | EnsembleOCRConfig): OCRProvider;