scribe.js-ocr 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -196,6 +196,9 @@ export class ImageCache {
196
196
 
197
197
  const workersPromiseArr = range(0, scheduler.workers.length - 1).map(async (x) => {
198
198
  const w = scheduler.workers[x];
199
+
200
+ if (w.pdfDoc) await w.freeDocument(w.pdfDoc);
201
+
199
202
  // The ArrayBuffer is transferred to the worker, so a new one must be created for each worker.
200
203
  // const fileData = await file.arrayBuffer();
201
204
  const fileDataCopy = fileData.slice(0);
@@ -143,6 +143,8 @@ export async function exportData(format = 'txt', minValue = 0, maxValue = -1) {
143
143
  doc1: pdfOverlay, minpage: minValue, maxpage: maxValue, pagewidth: dimsLimit.width, pageheight: dimsLimit.height, humanReadable: opt.humanReadablePDF,
144
144
  });
145
145
  }
146
+
147
+ w.freeDocument(pdfOverlay);
146
148
  } else {
147
149
  const pdfStr = await writePdf(ocrDownload, minValue, maxValue, opt.displayMode, false, true, dimsLimit, opt.confThreshHigh, opt.confThreshMed,
148
150
  opt.overlayOpacity / 100);
@@ -169,6 +171,8 @@ export async function exportData(format = 'txt', minValue = 0, maxValue = -1) {
169
171
  content = await w.write({
170
172
  doc1: pdf, minpage: minValue, maxpage: maxValue, pagewidth: dimsLimit.width, pageheight: dimsLimit.height, humanReadable: opt.humanReadablePDF,
171
173
  });
174
+
175
+ w.freeDocument(pdf);
172
176
  }
173
177
  } else if (format === 'hocr') {
174
178
  content = writeHocr(ocrAll.active, minValue, maxValue);
@@ -90,7 +90,7 @@ export async function initMuPDFWorker() {
90
90
  return function (...args) {
91
91
  return new Promise((resolve, reject) => {
92
92
  // Add the PDF as the first argument for most functions
93
- if (!['openDocument', 'cleanFile'].includes(func)) {
93
+ if (!['openDocument', 'cleanFile', 'freeDocument'].includes(func)) {
94
94
  // Remove job number (appended by Tesseract scheduler function)
95
95
  // args = args.slice(0,-1)
96
96
 
@@ -165,6 +165,8 @@ mupdf.pageText = function (doc, {
165
165
 
166
166
  const content = Module.UTF8ToString(dataPtr);
167
167
 
168
+ Module._free(dataPtr);
169
+
168
170
  return {
169
171
  letterCountTotal,
170
172
  letterCountVis,
@@ -464,7 +466,7 @@ const handleMessage = (data) => {
464
466
  } catch (error) {
465
467
  parentPort.postMessage(['ERROR', id, { name: error.name, message: error.message }]);
466
468
  }
467
- }
469
+ };
468
470
 
469
471
  if (typeof process === 'undefined') {
470
472
  onmessage = (event) => handleMessage(event.data);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "scribe.js-ocr",
3
- "version": "0.3.0",
3
+ "version": "0.3.1",
4
4
  "description": "High-quality OCR and text extraction for images and PDFs.",
5
5
  "main": "scribe.js",
6
6
  "directories": {