@memvid/sdk 2.0.155 → 2.0.157
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/documents/index.d.ts +2 -1
- package/dist/documents/index.js +2 -1
- package/dist/documents/xlsx.d.ts +77 -1
- package/dist/documents/xlsx.js +60 -1
- package/dist/image-ingest.d.ts +250 -0
- package/dist/image-ingest.js +411 -0
- package/dist/index.d.ts +23 -2
- package/dist/index.js +176 -4
- package/dist/ocr.d.ts +302 -0
- package/dist/ocr.js +778 -0
- package/dist/types.d.ts +36 -0
- package/package.json +8 -8
package/dist/index.js
CHANGED
|
@@ -36,10 +36,14 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
36
36
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
37
37
|
};
|
|
38
38
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
-
exports.
|
|
39
|
+
exports.getEmbedder = exports.OllamaEmbeddings = exports.MistralEmbeddings = exports.GeminiEmbeddings = exports.NvidiaEmbeddings = exports.VoyageEmbeddings = exports.CohereEmbeddings = exports.OpenAIEmbeddings = exports.getErrorSuggestion = exports.QuotaExceededError = exports.EncryptionError = exports.EmbeddingFailedError = exports.VecDimensionMismatchError = exports.CorruptFileError = exports.VecIndexDisabledError = exports.FrameNotFoundError = exports.MemoryAlreadyBoundError = exports.FileNotFoundError = exports.ApiKeyRequiredError = exports.LockedError = exports.VerificationFailedError = exports.TimeIndexMissingError = exports.LexIndexDisabledError = exports.TicketReplayError = exports.TicketInvalidError = exports.CapacityExceededError = exports.MemvidError = exports.use = exports.ingestDirectory = exports.ingestImages = exports.ImageIngestor = exports.EasyOCRSubprocess = exports.DocTRSubprocess = exports.TesseractOCR = exports.PaddleOCR = exports.EnsembleOCR = exports.getOCRProvider = exports.GeminiEntities = exports.ClaudeEntities = exports.OpenAIEntities = exports.LocalNER = exports.getEntityExtractor = exports.GeminiClip = exports.OpenAIClip = exports.LocalClip = exports.getClipProvider = exports.imageIngest = exports.ocr = exports.entities = exports.clip = void 0;
|
|
40
|
+
exports.isTelemetryEnabled = exports.flushAnalytics = exports.LOCAL_EMBEDDING_MODELS = exports.MODEL_DIMENSIONS = void 0;
|
|
40
41
|
exports.configure = configure;
|
|
41
42
|
exports.getConfig = getConfig;
|
|
42
43
|
exports.resetConfig = resetConfig;
|
|
44
|
+
exports.getAclScopeFromApiKey = getAclScopeFromApiKey;
|
|
45
|
+
exports.aclContextFromScope = aclContextFromScope;
|
|
46
|
+
exports.aclMetadataFromScope = aclMetadataFromScope;
|
|
43
47
|
exports.validateConfig = validateConfig;
|
|
44
48
|
exports.resolveMemory = resolveMemory;
|
|
45
49
|
exports.createMemory = createMemory;
|
|
@@ -79,6 +83,9 @@ require("./adapters/mcp");
|
|
|
79
83
|
exports.clip = __importStar(require("./clip"));
|
|
80
84
|
exports.entities = __importStar(require("./entities"));
|
|
81
85
|
__exportStar(require("./embeddings"), exports);
|
|
86
|
+
// Export OCR and image ingestion modules
|
|
87
|
+
exports.ocr = __importStar(require("./ocr"));
|
|
88
|
+
exports.imageIngest = __importStar(require("./image-ingest"));
|
|
82
89
|
// Re-export provider factories for convenience
|
|
83
90
|
var clip_1 = require("./clip");
|
|
84
91
|
Object.defineProperty(exports, "getClipProvider", { enumerable: true, get: function () { return clip_1.getClipProvider; } });
|
|
@@ -91,6 +98,18 @@ Object.defineProperty(exports, "LocalNER", { enumerable: true, get: function ()
|
|
|
91
98
|
Object.defineProperty(exports, "OpenAIEntities", { enumerable: true, get: function () { return entities_1.OpenAIEntities; } });
|
|
92
99
|
Object.defineProperty(exports, "ClaudeEntities", { enumerable: true, get: function () { return entities_1.ClaudeEntities; } });
|
|
93
100
|
Object.defineProperty(exports, "GeminiEntities", { enumerable: true, get: function () { return entities_1.GeminiEntities; } });
|
|
101
|
+
// Re-export OCR providers and image ingestion
|
|
102
|
+
var ocr_1 = require("./ocr");
|
|
103
|
+
Object.defineProperty(exports, "getOCRProvider", { enumerable: true, get: function () { return ocr_1.getOCRProvider; } });
|
|
104
|
+
Object.defineProperty(exports, "EnsembleOCR", { enumerable: true, get: function () { return ocr_1.EnsembleOCR; } });
|
|
105
|
+
Object.defineProperty(exports, "PaddleOCR", { enumerable: true, get: function () { return ocr_1.PaddleOCR; } });
|
|
106
|
+
Object.defineProperty(exports, "TesseractOCR", { enumerable: true, get: function () { return ocr_1.TesseractOCR; } });
|
|
107
|
+
Object.defineProperty(exports, "DocTRSubprocess", { enumerable: true, get: function () { return ocr_1.DocTRSubprocess; } });
|
|
108
|
+
Object.defineProperty(exports, "EasyOCRSubprocess", { enumerable: true, get: function () { return ocr_1.EasyOCRSubprocess; } });
|
|
109
|
+
var image_ingest_1 = require("./image-ingest");
|
|
110
|
+
Object.defineProperty(exports, "ImageIngestor", { enumerable: true, get: function () { return image_ingest_1.ImageIngestor; } });
|
|
111
|
+
Object.defineProperty(exports, "ingestImages", { enumerable: true, get: function () { return image_ingest_1.ingestImages; } });
|
|
112
|
+
Object.defineProperty(exports, "ingestDirectory", { enumerable: true, get: function () { return image_ingest_1.ingestDirectory; } });
|
|
94
113
|
let globalConfig = {};
|
|
95
114
|
/**
|
|
96
115
|
* Configure global defaults for the Memvid SDK.
|
|
@@ -140,6 +159,147 @@ function getConfig() {
|
|
|
140
159
|
*/
|
|
141
160
|
function resetConfig() {
|
|
142
161
|
globalConfig = {};
|
|
162
|
+
aclScopeCache.clear();
|
|
163
|
+
}
|
|
164
|
+
const DEFAULT_ACL_SCOPE = {
|
|
165
|
+
tenantId: null,
|
|
166
|
+
subjectId: null,
|
|
167
|
+
roles: [],
|
|
168
|
+
groupIds: [],
|
|
169
|
+
enforcementMode: "audit",
|
|
170
|
+
};
|
|
171
|
+
const aclScopeCache = new Map();
|
|
172
|
+
function resolveDashboardUrl(override) {
|
|
173
|
+
return (override ||
|
|
174
|
+
globalConfig.dashboardUrl ||
|
|
175
|
+
process.env.MEMVID_DASHBOARD_URL ||
|
|
176
|
+
"https://memvid.com").replace(/\/$/, "");
|
|
177
|
+
}
|
|
178
|
+
function normalizeAclString(value) {
|
|
179
|
+
if (typeof value !== "string") {
|
|
180
|
+
return null;
|
|
181
|
+
}
|
|
182
|
+
const trimmed = value.trim();
|
|
183
|
+
if (!trimmed) {
|
|
184
|
+
return null;
|
|
185
|
+
}
|
|
186
|
+
return trimmed.toLowerCase();
|
|
187
|
+
}
|
|
188
|
+
function normalizeAclStringList(value) {
|
|
189
|
+
if (!Array.isArray(value)) {
|
|
190
|
+
return [];
|
|
191
|
+
}
|
|
192
|
+
const unique = new Set();
|
|
193
|
+
for (const item of value) {
|
|
194
|
+
const normalized = normalizeAclString(item);
|
|
195
|
+
if (normalized) {
|
|
196
|
+
unique.add(normalized);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
return Array.from(unique).sort();
|
|
200
|
+
}
|
|
201
|
+
function parseTicketEnvelope(payload) {
|
|
202
|
+
if (!payload || typeof payload !== "object" || Array.isArray(payload)) {
|
|
203
|
+
return null;
|
|
204
|
+
}
|
|
205
|
+
const root = payload;
|
|
206
|
+
const data = root.data;
|
|
207
|
+
if (data && typeof data === "object" && !Array.isArray(data)) {
|
|
208
|
+
return data;
|
|
209
|
+
}
|
|
210
|
+
return root;
|
|
211
|
+
}
|
|
212
|
+
function normalizeApiKeyAclScopePayload(value) {
|
|
213
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
214
|
+
return { ...DEFAULT_ACL_SCOPE };
|
|
215
|
+
}
|
|
216
|
+
const source = value;
|
|
217
|
+
return {
|
|
218
|
+
tenantId: normalizeAclString(source.tenant_id ?? source.tenantId),
|
|
219
|
+
subjectId: normalizeAclString(source.subject_id ?? source.subjectId),
|
|
220
|
+
roles: normalizeAclStringList(source.roles),
|
|
221
|
+
groupIds: normalizeAclStringList(source.group_ids ?? source.groupIds),
|
|
222
|
+
enforcementMode: source.enforcement_mode === "enforce" || source.enforcementMode === "enforce"
|
|
223
|
+
? "enforce"
|
|
224
|
+
: "audit",
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* Fetch ACL scope attached to the current Memvid API key.
|
|
229
|
+
*
|
|
230
|
+
* This reads `/api/ticket` from the dashboard control plane and returns
|
|
231
|
+
* the normalized ACL scope that can be reused for ingestion and retrieval.
|
|
232
|
+
*/
|
|
233
|
+
async function getAclScopeFromApiKey(options) {
|
|
234
|
+
const apiKey = (options?.apiKey || globalConfig.apiKey || process.env.MEMVID_API_KEY || "").trim();
|
|
235
|
+
if (!apiKey) {
|
|
236
|
+
throw new error_1.ApiKeyRequiredError("API key required for getAclScopeFromApiKey(). Set configure({ apiKey: 'mv2_...' }) or MEMVID_API_KEY.");
|
|
237
|
+
}
|
|
238
|
+
if (!isMemvidApiKey(apiKey)) {
|
|
239
|
+
throw new error_1.ApiKeyRequiredError("Invalid API key format (expected mv2_*).");
|
|
240
|
+
}
|
|
241
|
+
const dashboardUrl = resolveDashboardUrl(options?.dashboardUrl);
|
|
242
|
+
const cacheKey = `${dashboardUrl}|${apiKey}`;
|
|
243
|
+
if (!options?.forceRefresh && aclScopeCache.has(cacheKey)) {
|
|
244
|
+
return { ...aclScopeCache.get(cacheKey) };
|
|
245
|
+
}
|
|
246
|
+
const timeoutMs = options?.timeoutMs && options.timeoutMs > 0 ? options.timeoutMs : 5000;
|
|
247
|
+
const response = await fetch(`${dashboardUrl}/api/ticket`, {
|
|
248
|
+
method: "GET",
|
|
249
|
+
headers: { "x-api-key": apiKey },
|
|
250
|
+
signal: AbortSignal.timeout(timeoutMs),
|
|
251
|
+
});
|
|
252
|
+
if (!response.ok) {
|
|
253
|
+
const body = await response.text().catch(() => "");
|
|
254
|
+
const message = response.status === 401
|
|
255
|
+
? "Invalid API key. Get a valid key at https://memvid.com/dashboard/api-keys"
|
|
256
|
+
: `Failed to fetch ACL scope from ${dashboardUrl}/api/ticket (${response.status})`;
|
|
257
|
+
throw new error_1.MemvidError("MV022", message, body ? { body } : undefined);
|
|
258
|
+
}
|
|
259
|
+
const raw = await response.json().catch(() => ({}));
|
|
260
|
+
const ticket = parseTicketEnvelope(raw);
|
|
261
|
+
const scope = normalizeApiKeyAclScopePayload(ticket?.acl_scope);
|
|
262
|
+
aclScopeCache.set(cacheKey, scope);
|
|
263
|
+
return { ...scope };
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* Convert API-key ACL scope into query ACL context.
|
|
267
|
+
*/
|
|
268
|
+
function aclContextFromScope(scope) {
|
|
269
|
+
if (!scope?.tenantId) {
|
|
270
|
+
return undefined;
|
|
271
|
+
}
|
|
272
|
+
const context = { tenantId: scope.tenantId };
|
|
273
|
+
if (scope.subjectId)
|
|
274
|
+
context.subjectId = scope.subjectId;
|
|
275
|
+
if (scope.roles.length > 0)
|
|
276
|
+
context.roles = [...scope.roles];
|
|
277
|
+
if (scope.groupIds.length > 0)
|
|
278
|
+
context.groupIds = [...scope.groupIds];
|
|
279
|
+
return context;
|
|
280
|
+
}
|
|
281
|
+
/**
|
|
282
|
+
* Build ACL metadata for ingest from an API-key ACL scope.
|
|
283
|
+
*/
|
|
284
|
+
function aclMetadataFromScope(scope, options) {
|
|
285
|
+
if (!scope?.tenantId) {
|
|
286
|
+
throw new error_1.MemvidError("MV999", "ACL scope is missing tenantId. Create an ACL-scoped API key (tenant required) before ingesting restricted data.");
|
|
287
|
+
}
|
|
288
|
+
const mode = options?.visibility ?? "auto";
|
|
289
|
+
const hasReaders = !!scope.subjectId || scope.roles.length > 0 || scope.groupIds.length > 0;
|
|
290
|
+
const visibility = mode === "auto" ? (hasReaders ? "restricted" : "public") : mode;
|
|
291
|
+
const metadata = {
|
|
292
|
+
acl_tenant_id: scope.tenantId,
|
|
293
|
+
acl_visibility: visibility,
|
|
294
|
+
acl_policy_version: options?.policyVersion || "v1",
|
|
295
|
+
};
|
|
296
|
+
if (scope.subjectId)
|
|
297
|
+
metadata.acl_read_principals = [scope.subjectId];
|
|
298
|
+
if (scope.roles.length > 0)
|
|
299
|
+
metadata.acl_read_roles = [...scope.roles];
|
|
300
|
+
if (scope.groupIds.length > 0)
|
|
301
|
+
metadata.acl_read_groups = [...scope.groupIds];
|
|
302
|
+
return metadata;
|
|
143
303
|
}
|
|
144
304
|
/**
|
|
145
305
|
* Validate the current configuration by testing API connectivity.
|
|
@@ -845,13 +1005,17 @@ function normaliseFindOptions(opts) {
|
|
|
845
1005
|
cursor: opts.cursor,
|
|
846
1006
|
asOfFrame: opts.asOfFrame,
|
|
847
1007
|
asOfTs: opts.asOfTs,
|
|
1008
|
+
aclContext: opts.aclContext,
|
|
1009
|
+
aclEnforcementMode: opts.aclEnforcementMode,
|
|
848
1010
|
};
|
|
849
1011
|
if (payload.k == null &&
|
|
850
1012
|
payload.snippetChars == null &&
|
|
851
1013
|
payload.scope == null &&
|
|
852
1014
|
payload.cursor == null &&
|
|
853
1015
|
payload.asOfFrame == null &&
|
|
854
|
-
payload.asOfTs == null
|
|
1016
|
+
payload.asOfTs == null &&
|
|
1017
|
+
payload.aclContext == null &&
|
|
1018
|
+
payload.aclEnforcementMode == null) {
|
|
855
1019
|
return undefined;
|
|
856
1020
|
}
|
|
857
1021
|
return payload;
|
|
@@ -868,6 +1032,8 @@ function normaliseVecSearchOptions(opts) {
|
|
|
868
1032
|
minRelevancy: opts.minRelevancy,
|
|
869
1033
|
maxK: opts.maxK,
|
|
870
1034
|
adaptiveStrategy: opts.adaptiveStrategy,
|
|
1035
|
+
aclContext: opts.aclContext,
|
|
1036
|
+
aclEnforcementMode: opts.aclEnforcementMode,
|
|
871
1037
|
};
|
|
872
1038
|
if (payload.k == null &&
|
|
873
1039
|
payload.snippetChars == null &&
|
|
@@ -875,7 +1041,9 @@ function normaliseVecSearchOptions(opts) {
|
|
|
875
1041
|
payload.adaptive == null &&
|
|
876
1042
|
payload.minRelevancy == null &&
|
|
877
1043
|
payload.maxK == null &&
|
|
878
|
-
payload.adaptiveStrategy == null
|
|
1044
|
+
payload.adaptiveStrategy == null &&
|
|
1045
|
+
payload.aclContext == null &&
|
|
1046
|
+
payload.aclEnforcementMode == null) {
|
|
879
1047
|
return undefined;
|
|
880
1048
|
}
|
|
881
1049
|
return payload;
|
|
@@ -903,6 +1071,8 @@ function normaliseAskOptions(opts) {
|
|
|
903
1071
|
maxK: opts.maxK,
|
|
904
1072
|
adaptiveStrategy: opts.adaptiveStrategy,
|
|
905
1073
|
showChunks: opts.showChunks,
|
|
1074
|
+
aclContext: opts.aclContext,
|
|
1075
|
+
aclEnforcementMode: opts.aclEnforcementMode,
|
|
906
1076
|
};
|
|
907
1077
|
if (payload.k == null &&
|
|
908
1078
|
payload.mode == null &&
|
|
@@ -921,7 +1091,9 @@ function normaliseAskOptions(opts) {
|
|
|
921
1091
|
payload.minRelevancy == null &&
|
|
922
1092
|
payload.maxK == null &&
|
|
923
1093
|
payload.adaptiveStrategy == null &&
|
|
924
|
-
payload.showChunks == null
|
|
1094
|
+
payload.showChunks == null &&
|
|
1095
|
+
payload.aclContext == null &&
|
|
1096
|
+
payload.aclEnforcementMode == null) {
|
|
925
1097
|
return undefined;
|
|
926
1098
|
}
|
|
927
1099
|
return payload;
|
package/dist/ocr.d.ts
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OCR provider support for Memvid SDK (Node.js).
|
|
3
|
+
*
|
|
4
|
+
* This module provides classes for extracting text from images using OCR engines.
|
|
5
|
+
* Since docTR (best accuracy) is Python-native, Node.js has two options:
|
|
6
|
+
* 1. TesseractOCR - Built-in via tesseract.js, no external deps
|
|
7
|
+
* 2. DocTRSubprocess - Spawns Python for highest accuracy (85.3%)
|
|
8
|
+
*
|
|
9
|
+
* OCR Accuracy Comparison (tested on construction drawings):
|
|
10
|
+
* - docTR (Python): 85.3% - BEST
|
|
11
|
+
* - EasyOCR (Python): 79.4%
|
|
12
|
+
* - Tesseract.js: ~50-60% (lower than Python Tesseract)
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* ```typescript
|
|
16
|
+
* import { TesseractOCR, DocTRSubprocess, getOCRProvider } from 'memvid-sdk/ocr';
|
|
17
|
+
*
|
|
18
|
+
* // Built-in Tesseract.js (no external deps)
|
|
19
|
+
* const ocr = new TesseractOCR();
|
|
20
|
+
* const result = await ocr.extractText('drawing.png');
|
|
21
|
+
* console.log(result.text);
|
|
22
|
+
*
|
|
23
|
+
* // For highest accuracy, use docTR via Python subprocess
|
|
24
|
+
* const doctrOcr = new DocTRSubprocess();
|
|
25
|
+
* const result2 = await doctrOcr.extractText('drawing.png');
|
|
26
|
+
* ```
|
|
27
|
+
*/
|
|
28
|
+
/**
|
|
29
|
+
* Result from OCR text extraction.
|
|
30
|
+
*/
|
|
31
|
+
export interface OCRResult {
|
|
32
|
+
/** Full extracted text */
|
|
33
|
+
text: string;
|
|
34
|
+
/** Overall confidence score (0-1) */
|
|
35
|
+
confidence: number;
|
|
36
|
+
/** Word/region-level results with bounding boxes */
|
|
37
|
+
regions: OCRRegion[];
|
|
38
|
+
/** Provider-specific metadata */
|
|
39
|
+
metadata: Record<string, unknown>;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* A detected text region with bounding box.
|
|
43
|
+
*/
|
|
44
|
+
export interface OCRRegion {
|
|
45
|
+
/** Extracted text for this region */
|
|
46
|
+
text: string;
|
|
47
|
+
/** Confidence score for this region (0-1) */
|
|
48
|
+
confidence: number;
|
|
49
|
+
/** Bounding box coordinates */
|
|
50
|
+
bbox: {
|
|
51
|
+
x: number;
|
|
52
|
+
y: number;
|
|
53
|
+
w: number;
|
|
54
|
+
h: number;
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Abstract interface for OCR providers.
|
|
59
|
+
*/
|
|
60
|
+
export interface OCRProvider {
|
|
61
|
+
/** Provider name (e.g., 'tesseract', 'doctr') */
|
|
62
|
+
readonly name: string;
|
|
63
|
+
/**
|
|
64
|
+
* Extract text from an image file.
|
|
65
|
+
* @param imagePath - Path to the image file
|
|
66
|
+
* @returns Promise resolving to OCR result
|
|
67
|
+
*/
|
|
68
|
+
extractText(imagePath: string): Promise<OCRResult>;
|
|
69
|
+
/**
|
|
70
|
+
* Extract text from multiple images.
|
|
71
|
+
* Default implementation is sequential; override for batch processing.
|
|
72
|
+
* @param imagePaths - Array of image file paths
|
|
73
|
+
* @returns Promise resolving to array of OCR results
|
|
74
|
+
*/
|
|
75
|
+
extractTextBatch?(imagePaths: string[]): Promise<OCRResult[]>;
|
|
76
|
+
/**
|
|
77
|
+
* Clean up resources (e.g., terminate workers).
|
|
78
|
+
*/
|
|
79
|
+
terminate?(): Promise<void>;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Tesseract.js configuration options.
|
|
83
|
+
*/
|
|
84
|
+
export interface TesseractConfig {
|
|
85
|
+
/** Language code (default: 'eng') */
|
|
86
|
+
lang?: string;
|
|
87
|
+
/** Path to trained data (optional) */
|
|
88
|
+
dataPath?: string;
|
|
89
|
+
/** Number of worker threads (default: 1) */
|
|
90
|
+
workerCount?: number;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Tesseract.js OCR provider.
|
|
94
|
+
*
|
|
95
|
+
* Built-in OCR using tesseract.js - no external dependencies required.
|
|
96
|
+
* Lower accuracy than docTR (~50-60% vs 85.3%) but easy to set up.
|
|
97
|
+
*
|
|
98
|
+
* @example
|
|
99
|
+
* ```typescript
|
|
100
|
+
* const ocr = new TesseractOCR();
|
|
101
|
+
* const result = await ocr.extractText('construction_drawing.png');
|
|
102
|
+
* console.log(result.text);
|
|
103
|
+
* console.log(`Confidence: ${(result.confidence * 100).toFixed(1)}%`);
|
|
104
|
+
* await ocr.terminate(); // Clean up worker
|
|
105
|
+
* ```
|
|
106
|
+
*/
|
|
107
|
+
export declare class TesseractOCR implements OCRProvider {
|
|
108
|
+
private _lang;
|
|
109
|
+
private _dataPath?;
|
|
110
|
+
private _worker;
|
|
111
|
+
private _workerCount;
|
|
112
|
+
private _tesseract;
|
|
113
|
+
constructor(config?: TesseractConfig);
|
|
114
|
+
get name(): string;
|
|
115
|
+
private getTesseract;
|
|
116
|
+
private getWorker;
|
|
117
|
+
extractText(imagePath: string): Promise<OCRResult>;
|
|
118
|
+
extractTextBatch(imagePaths: string[]): Promise<OCRResult[]>;
|
|
119
|
+
terminate(): Promise<void>;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* docTR via Python subprocess configuration.
|
|
123
|
+
*/
|
|
124
|
+
export interface DocTRConfig {
|
|
125
|
+
/** Path to Python executable (default: 'python3') */
|
|
126
|
+
pythonPath?: string;
|
|
127
|
+
/** Detection architecture (default: 'db_resnet50') */
|
|
128
|
+
detArch?: string;
|
|
129
|
+
/** Recognition architecture (default: 'crnn_vgg16_bn') */
|
|
130
|
+
recoArch?: string;
|
|
131
|
+
/** Timeout in milliseconds (default: 60000) */
|
|
132
|
+
timeout?: number;
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* docTR OCR via Python subprocess.
|
|
136
|
+
*
|
|
137
|
+
* Highest accuracy OCR (85.3% on construction drawings) using docTR.
|
|
138
|
+
* Requires Python with docTR installed: `pip install python-doctr[torch]`
|
|
139
|
+
*
|
|
140
|
+
* This spawns a Python subprocess for each extraction, which adds overhead
|
|
141
|
+
* but provides the best accuracy for technical documents.
|
|
142
|
+
*
|
|
143
|
+
* @example
|
|
144
|
+
* ```typescript
|
|
145
|
+
* const ocr = new DocTRSubprocess();
|
|
146
|
+
* const result = await ocr.extractText('construction_drawing.png');
|
|
147
|
+
* console.log(result.text);
|
|
148
|
+
* console.log(`Confidence: ${(result.confidence * 100).toFixed(1)}%`);
|
|
149
|
+
* ```
|
|
150
|
+
*/
|
|
151
|
+
export declare class DocTRSubprocess implements OCRProvider {
|
|
152
|
+
private _pythonPath;
|
|
153
|
+
private _detArch;
|
|
154
|
+
private _recoArch;
|
|
155
|
+
private _timeout;
|
|
156
|
+
constructor(config?: DocTRConfig);
|
|
157
|
+
get name(): string;
|
|
158
|
+
extractText(imagePath: string): Promise<OCRResult>;
|
|
159
|
+
extractTextBatch(imagePaths: string[]): Promise<OCRResult[]>;
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* EasyOCR via Python subprocess configuration.
|
|
163
|
+
*/
|
|
164
|
+
export interface EasyOCRConfig {
|
|
165
|
+
/** Path to Python executable (default: 'python3') */
|
|
166
|
+
pythonPath?: string;
|
|
167
|
+
/** Languages to recognize (default: ['en']) */
|
|
168
|
+
languages?: string[];
|
|
169
|
+
/** Use GPU if available (default: false) */
|
|
170
|
+
gpu?: boolean;
|
|
171
|
+
/** Timeout in milliseconds (default: 60000) */
|
|
172
|
+
timeout?: number;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* EasyOCR via Python subprocess.
|
|
176
|
+
*
|
|
177
|
+
* Good fallback OCR (79.4% on construction drawings) using EasyOCR.
|
|
178
|
+
* Requires Python with EasyOCR installed: `pip install easyocr`
|
|
179
|
+
*
|
|
180
|
+
* @example
|
|
181
|
+
* ```typescript
|
|
182
|
+
* const ocr = new EasyOCRSubprocess();
|
|
183
|
+
* const result = await ocr.extractText('construction_drawing.png');
|
|
184
|
+
* console.log(result.text);
|
|
185
|
+
* ```
|
|
186
|
+
*/
|
|
187
|
+
export declare class EasyOCRSubprocess implements OCRProvider {
|
|
188
|
+
private _pythonPath;
|
|
189
|
+
private _languages;
|
|
190
|
+
private _gpu;
|
|
191
|
+
private _timeout;
|
|
192
|
+
constructor(config?: EasyOCRConfig);
|
|
193
|
+
get name(): string;
|
|
194
|
+
extractText(imagePath: string): Promise<OCRResult>;
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* PaddleOCR configuration options.
|
|
198
|
+
*/
|
|
199
|
+
export interface PaddleOCRConfig {
|
|
200
|
+
/** Apply high contrast preprocessing (default: true, improves accuracy by ~25%) */
|
|
201
|
+
preprocessing?: boolean;
|
|
202
|
+
/** Contrast multiplier for preprocessing (default: 1.5) */
|
|
203
|
+
contrast?: number;
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* PaddleOCR via @gutenye/ocr-node (ONNX runtime).
|
|
207
|
+
*
|
|
208
|
+
* Fast and accurate OCR using PaddleOCR PP-OCRv4 model.
|
|
209
|
+
* Runs natively in Node.js via ONNX - no Python required.
|
|
210
|
+
*
|
|
211
|
+
* Requires: npm install @gutenye/ocr-node
|
|
212
|
+
*
|
|
213
|
+
* Performance (on construction drawings):
|
|
214
|
+
* - Speed: ~0.35s
|
|
215
|
+
* - Accuracy: ~75% with preprocessing (vs 50% without)
|
|
216
|
+
*
|
|
217
|
+
* @example
|
|
218
|
+
* ```typescript
|
|
219
|
+
* const ocr = new PaddleOCR();
|
|
220
|
+
* const result = await ocr.extractText('drawing.png');
|
|
221
|
+
* console.log(result.text);
|
|
222
|
+
* ```
|
|
223
|
+
*/
|
|
224
|
+
export declare class PaddleOCR implements OCRProvider {
|
|
225
|
+
private _ocr;
|
|
226
|
+
private _sharp;
|
|
227
|
+
private _preprocessing;
|
|
228
|
+
private _contrast;
|
|
229
|
+
constructor(config?: PaddleOCRConfig);
|
|
230
|
+
get name(): string;
|
|
231
|
+
private getOcr;
|
|
232
|
+
private getSharp;
|
|
233
|
+
private preprocessImage;
|
|
234
|
+
extractText(imagePath: string): Promise<OCRResult>;
|
|
235
|
+
extractTextBatch(imagePaths: string[]): Promise<OCRResult[]>;
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Ensemble OCR configuration options.
|
|
239
|
+
*/
|
|
240
|
+
export interface EnsembleOCRConfig {
|
|
241
|
+
/** Contrast multiplier for preprocessing (default: 1.5) */
|
|
242
|
+
contrast?: number;
|
|
243
|
+
/** Enable sharpening (default: true) */
|
|
244
|
+
sharpen?: boolean;
|
|
245
|
+
}
|
|
246
|
+
/**
|
|
247
|
+
* Ensemble OCR - combines PaddleOCR + Tesseract for maximum accuracy.
|
|
248
|
+
*
|
|
249
|
+
* Achieves ~100% accuracy by running multiple OCR engines in parallel
|
|
250
|
+
* with different preprocessing, then merging results.
|
|
251
|
+
*
|
|
252
|
+
* Performance:
|
|
253
|
+
* - Accuracy: ~100% (vs 75% for PaddleOCR alone)
|
|
254
|
+
* - Speed: ~900ms (parallel execution)
|
|
255
|
+
*
|
|
256
|
+
* Requires: npm install @gutenye/ocr-node tesseract.js
|
|
257
|
+
*
|
|
258
|
+
* @example
|
|
259
|
+
* ```typescript
|
|
260
|
+
* const ocr = new EnsembleOCR();
|
|
261
|
+
* const result = await ocr.extractText('drawing.png');
|
|
262
|
+
* console.log(result.text); // Maximum accuracy
|
|
263
|
+
* ```
|
|
264
|
+
*/
|
|
265
|
+
export declare class EnsembleOCR implements OCRProvider {
|
|
266
|
+
private _paddleOcr;
|
|
267
|
+
private _tessWorker;
|
|
268
|
+
private _sharp;
|
|
269
|
+
private _contrast;
|
|
270
|
+
private _sharpen;
|
|
271
|
+
constructor(config?: EnsembleOCRConfig);
|
|
272
|
+
get name(): string;
|
|
273
|
+
private getPaddleOcr;
|
|
274
|
+
private getTessWorker;
|
|
275
|
+
private getSharp;
|
|
276
|
+
extractText(imagePath: string): Promise<OCRResult>;
|
|
277
|
+
terminate(): Promise<void>;
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* OCR provider type for factory function.
|
|
281
|
+
*/
|
|
282
|
+
export type OCRProviderType = 'ensemble' | 'paddle' | 'tesseract' | 'doctr' | 'easyocr';
|
|
283
|
+
/**
|
|
284
|
+
* Factory function to create an OCR provider.
|
|
285
|
+
*
|
|
286
|
+
* @param provider - Provider type: 'paddle', 'tesseract', 'doctr', or 'easyocr'
|
|
287
|
+
* @param config - Provider-specific configuration
|
|
288
|
+
* @returns OCR provider instance
|
|
289
|
+
*
|
|
290
|
+
* @example
|
|
291
|
+
* ```typescript
|
|
292
|
+
* // PaddleOCR via ONNX (fast, recommended for Node.js)
|
|
293
|
+
* const ocr1 = getOCRProvider('paddle');
|
|
294
|
+
*
|
|
295
|
+
* // Tesseract.js (fallback)
|
|
296
|
+
* const ocr2 = getOCRProvider('tesseract');
|
|
297
|
+
*
|
|
298
|
+
* // docTR via Python (best accuracy: 85%)
|
|
299
|
+
* const ocr3 = getOCRProvider('doctr');
|
|
300
|
+
* ```
|
|
301
|
+
*/
|
|
302
|
+
export declare function getOCRProvider(provider?: OCRProviderType, config?: TesseractConfig | DocTRConfig | EasyOCRConfig | PaddleOCRConfig | EnsembleOCRConfig): OCRProvider;
|