@monolith-forensics/monolith-ui 1.9.3-dev.1 → 1.9.3-dev.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -34,6 +34,19 @@ const readFileAsDataUrl = (file) => {
|
|
|
34
34
|
reader.readAsDataURL(file);
|
|
35
35
|
});
|
|
36
36
|
};
|
|
37
|
+
const getImageExtensionFromMimeType = (mimeType) => {
|
|
38
|
+
var _a;
|
|
39
|
+
const subtype = (_a = mimeType.toLowerCase().split("/")[1]) === null || _a === void 0 ? void 0 : _a.split(";")[0];
|
|
40
|
+
if (!subtype)
|
|
41
|
+
return "png";
|
|
42
|
+
if (subtype === "jpeg" || subtype === "pjpeg")
|
|
43
|
+
return "jpg";
|
|
44
|
+
if (subtype === "svg+xml")
|
|
45
|
+
return "svg";
|
|
46
|
+
if (subtype === "x-icon" || subtype === "vnd.microsoft.icon")
|
|
47
|
+
return "ico";
|
|
48
|
+
return subtype.replace(/[^a-z0-9]/g, "") || "png";
|
|
49
|
+
};
|
|
37
50
|
export const addImagePlaceholder = ({ view, id, pos, src, label = "Processing image...", }) => {
|
|
38
51
|
if (!uploadKey.getState(view.state))
|
|
39
52
|
return;
|
|
@@ -71,6 +84,7 @@ export const startImageUpload = (file, view, pos, handleImageUpload) => {
|
|
|
71
84
|
}
|
|
72
85
|
// A fresh object to act as the ID for this upload
|
|
73
86
|
const id = nanoid(25);
|
|
87
|
+
const uploadName = `${id}.${getImageExtensionFromMimeType(file.type)}`;
|
|
74
88
|
// Replace the selection with a placeholder
|
|
75
89
|
(() => __awaiter(void 0, void 0, void 0, function* () {
|
|
76
90
|
let placeholderAdded = false;
|
|
@@ -92,7 +106,7 @@ export const startImageUpload = (file, view, pos, handleImageUpload) => {
|
|
|
92
106
|
const hashes = yield calculateFileHash(file);
|
|
93
107
|
const src = yield handleImageUpload({
|
|
94
108
|
file,
|
|
95
|
-
name:
|
|
109
|
+
name: uploadName,
|
|
96
110
|
id,
|
|
97
111
|
md5: hashes.md5Hash,
|
|
98
112
|
sha1: hashes.sha1Hash,
|
|
@@ -111,10 +125,10 @@ export const startImageUpload = (file, view, pos, handleImageUpload) => {
|
|
|
111
125
|
const imageSrc = typeof src === "object" ? previewSrc : src;
|
|
112
126
|
const node = view.state.schema.nodes.image.create({
|
|
113
127
|
src: imageSrc,
|
|
114
|
-
alt:
|
|
128
|
+
alt: uploadName,
|
|
115
129
|
"data-uuid": id,
|
|
116
130
|
crossorigin: "anonymous",
|
|
117
|
-
title: `Filename: ${
|
|
131
|
+
title: `Filename: ${uploadName}`,
|
|
118
132
|
});
|
|
119
133
|
const insertTransaction = view.state.tr
|
|
120
134
|
.replaceWith(placeholderPos, placeholderPos, node)
|
package/package.json
CHANGED
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
export type ImageOcrProgress = {
|
|
2
|
-
status: string;
|
|
3
|
-
progress: number;
|
|
4
|
-
};
|
|
5
|
-
export type ImageOcrLine = {
|
|
6
|
-
text: string;
|
|
7
|
-
confidence?: number;
|
|
8
|
-
bbox?: {
|
|
9
|
-
x0: number;
|
|
10
|
-
y0: number;
|
|
11
|
-
x1: number;
|
|
12
|
-
y1: number;
|
|
13
|
-
};
|
|
14
|
-
};
|
|
15
|
-
export type ImageTextContent = {
|
|
16
|
-
text: string;
|
|
17
|
-
confidence?: number;
|
|
18
|
-
language: string;
|
|
19
|
-
lines: ImageOcrLine[];
|
|
20
|
-
};
|
|
21
|
-
export declare const ImageOcrPageSegModes: {
|
|
22
|
-
readonly OSD_ONLY: "0";
|
|
23
|
-
readonly AUTO_OSD: "1";
|
|
24
|
-
readonly AUTO_ONLY: "2";
|
|
25
|
-
readonly AUTO: "3";
|
|
26
|
-
readonly SINGLE_COLUMN: "4";
|
|
27
|
-
readonly SINGLE_BLOCK_VERT_TEXT: "5";
|
|
28
|
-
readonly SINGLE_BLOCK: "6";
|
|
29
|
-
readonly SINGLE_LINE: "7";
|
|
30
|
-
readonly SINGLE_WORD: "8";
|
|
31
|
-
readonly CIRCLE_WORD: "9";
|
|
32
|
-
readonly SINGLE_CHAR: "10";
|
|
33
|
-
readonly SPARSE_TEXT: "11";
|
|
34
|
-
readonly SPARSE_TEXT_OSD: "12";
|
|
35
|
-
readonly RAW_LINE: "13";
|
|
36
|
-
};
|
|
37
|
-
export type ImageOcrPageSegMode = (typeof ImageOcrPageSegModes)[keyof typeof ImageOcrPageSegModes];
|
|
38
|
-
export type ImageOcrPreprocessOptions = {
|
|
39
|
-
grayscale?: boolean;
|
|
40
|
-
contrast?: number;
|
|
41
|
-
scale?: number;
|
|
42
|
-
threshold?: number;
|
|
43
|
-
maxDimension?: number;
|
|
44
|
-
};
|
|
45
|
-
export type ImageOcrRectangle = {
|
|
46
|
-
left: number;
|
|
47
|
-
top: number;
|
|
48
|
-
width: number;
|
|
49
|
-
height: number;
|
|
50
|
-
};
|
|
51
|
-
export type ImageOcrRecognizeOptions = {
|
|
52
|
-
rectangle?: ImageOcrRectangle;
|
|
53
|
-
rotateAuto?: boolean;
|
|
54
|
-
rotateRadians?: number;
|
|
55
|
-
};
|
|
56
|
-
export type ImageOcrParameters = {
|
|
57
|
-
tessedit_pageseg_mode?: ImageOcrPageSegMode;
|
|
58
|
-
tessedit_char_whitelist?: string;
|
|
59
|
-
tessedit_char_blacklist?: string;
|
|
60
|
-
preserve_interword_spaces?: "0" | "1";
|
|
61
|
-
user_defined_dpi?: string;
|
|
62
|
-
[key: string]: string | undefined;
|
|
63
|
-
};
|
|
64
|
-
export type ImageOcrPreset = "default" | "document" | "driver-license";
|
|
65
|
-
export type GetImageTextContentOptions = {
|
|
66
|
-
language?: string;
|
|
67
|
-
preset?: ImageOcrPreset;
|
|
68
|
-
onProgress?: (progress: ImageOcrProgress) => void;
|
|
69
|
-
preprocess?: ImageOcrPreprocessOptions;
|
|
70
|
-
recognizeOptions?: ImageOcrRecognizeOptions;
|
|
71
|
-
parameters?: ImageOcrParameters;
|
|
72
|
-
workerOptions?: {
|
|
73
|
-
workerPath?: string;
|
|
74
|
-
corePath?: string;
|
|
75
|
-
langPath?: string;
|
|
76
|
-
};
|
|
77
|
-
};
|
|
78
|
-
export declare const getImageTextContent: (image: File, options?: GetImageTextContentOptions) => Promise<ImageTextContent>;
|
|
@@ -1,222 +0,0 @@
|
|
|
1
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
-
});
|
|
9
|
-
};
|
|
10
|
-
export const ImageOcrPageSegModes = {
|
|
11
|
-
OSD_ONLY: "0",
|
|
12
|
-
AUTO_OSD: "1",
|
|
13
|
-
AUTO_ONLY: "2",
|
|
14
|
-
AUTO: "3",
|
|
15
|
-
SINGLE_COLUMN: "4",
|
|
16
|
-
SINGLE_BLOCK_VERT_TEXT: "5",
|
|
17
|
-
SINGLE_BLOCK: "6",
|
|
18
|
-
SINGLE_LINE: "7",
|
|
19
|
-
SINGLE_WORD: "8",
|
|
20
|
-
CIRCLE_WORD: "9",
|
|
21
|
-
SINGLE_CHAR: "10",
|
|
22
|
-
SPARSE_TEXT: "11",
|
|
23
|
-
SPARSE_TEXT_OSD: "12",
|
|
24
|
-
RAW_LINE: "13",
|
|
25
|
-
};
|
|
26
|
-
const defaultDocumentOptions = {
|
|
27
|
-
preprocess: {
|
|
28
|
-
grayscale: true,
|
|
29
|
-
contrast: 1.25,
|
|
30
|
-
scale: 2,
|
|
31
|
-
maxDimension: 2800,
|
|
32
|
-
},
|
|
33
|
-
recognizeOptions: {
|
|
34
|
-
rotateAuto: true,
|
|
35
|
-
},
|
|
36
|
-
parameters: {
|
|
37
|
-
tessedit_pageseg_mode: ImageOcrPageSegModes.AUTO,
|
|
38
|
-
preserve_interword_spaces: "1",
|
|
39
|
-
user_defined_dpi: "300",
|
|
40
|
-
},
|
|
41
|
-
};
|
|
42
|
-
const defaultDriverLicenseOptions = {
|
|
43
|
-
preprocess: {
|
|
44
|
-
grayscale: true,
|
|
45
|
-
contrast: 1.45,
|
|
46
|
-
scale: 2.5,
|
|
47
|
-
maxDimension: 3200,
|
|
48
|
-
},
|
|
49
|
-
recognizeOptions: {
|
|
50
|
-
rotateAuto: true,
|
|
51
|
-
},
|
|
52
|
-
parameters: {
|
|
53
|
-
tessedit_pageseg_mode: ImageOcrPageSegModes.SPARSE_TEXT,
|
|
54
|
-
preserve_interword_spaces: "1",
|
|
55
|
-
user_defined_dpi: "300",
|
|
56
|
-
},
|
|
57
|
-
};
|
|
58
|
-
const mergeOptions = (defaults, options) => {
|
|
59
|
-
var _a, _b;
|
|
60
|
-
return Object.assign(Object.assign(Object.assign({}, defaults), options), { language: (_b = (_a = options.language) !== null && _a !== void 0 ? _a : defaults.language) !== null && _b !== void 0 ? _b : "eng", preprocess: Object.assign(Object.assign({}, defaults.preprocess), options.preprocess), recognizeOptions: Object.assign(Object.assign({}, defaults.recognizeOptions), options.recognizeOptions), parameters: Object.assign(Object.assign({}, defaults.parameters), options.parameters), workerOptions: Object.assign(Object.assign({}, defaults.workerOptions), options.workerOptions) });
|
|
61
|
-
};
|
|
62
|
-
const getResolvedOptions = (options) => {
|
|
63
|
-
switch (options.preset) {
|
|
64
|
-
case "document":
|
|
65
|
-
return mergeOptions(defaultDocumentOptions, options);
|
|
66
|
-
case "driver-license":
|
|
67
|
-
return mergeOptions(defaultDriverLicenseOptions, options);
|
|
68
|
-
default:
|
|
69
|
-
return mergeOptions({}, options);
|
|
70
|
-
}
|
|
71
|
-
};
|
|
72
|
-
const getFallbackLines = (text) => {
|
|
73
|
-
return text
|
|
74
|
-
.split(/\r?\n/)
|
|
75
|
-
.map((line) => line.trim())
|
|
76
|
-
.filter(Boolean)
|
|
77
|
-
.map((line) => ({ text: line }));
|
|
78
|
-
};
|
|
79
|
-
const getLinesFromPage = (page) => {
|
|
80
|
-
var _a, _b;
|
|
81
|
-
const lines = (_b = (_a = page.blocks) === null || _a === void 0 ? void 0 : _a.flatMap((block) => block.paragraphs.flatMap((paragraph) => paragraph.lines))) !== null && _b !== void 0 ? _b : [];
|
|
82
|
-
if (!lines.length) {
|
|
83
|
-
return getFallbackLines(page.text);
|
|
84
|
-
}
|
|
85
|
-
return lines
|
|
86
|
-
.filter((line) => line.text.trim().length > 0)
|
|
87
|
-
.map((line) => ({
|
|
88
|
-
text: line.text,
|
|
89
|
-
confidence: line.confidence,
|
|
90
|
-
bbox: line.bbox
|
|
91
|
-
? {
|
|
92
|
-
x0: line.bbox.x0,
|
|
93
|
-
y0: line.bbox.y0,
|
|
94
|
-
x1: line.bbox.x1,
|
|
95
|
-
y1: line.bbox.y1,
|
|
96
|
-
}
|
|
97
|
-
: undefined,
|
|
98
|
-
}));
|
|
99
|
-
};
|
|
100
|
-
const loadImageBitmap = (image) => __awaiter(void 0, void 0, void 0, function* () {
|
|
101
|
-
if (typeof createImageBitmap === "function") {
|
|
102
|
-
return createImageBitmap(image);
|
|
103
|
-
}
|
|
104
|
-
const objectUrl = URL.createObjectURL(image);
|
|
105
|
-
try {
|
|
106
|
-
const img = new Image();
|
|
107
|
-
img.decoding = "async";
|
|
108
|
-
img.src = objectUrl;
|
|
109
|
-
yield img.decode();
|
|
110
|
-
const canvas = document.createElement("canvas");
|
|
111
|
-
canvas.width = img.naturalWidth;
|
|
112
|
-
canvas.height = img.naturalHeight;
|
|
113
|
-
const context = canvas.getContext("2d");
|
|
114
|
-
if (!context) {
|
|
115
|
-
throw new Error("Canvas rendering is not available.");
|
|
116
|
-
}
|
|
117
|
-
context.drawImage(img, 0, 0);
|
|
118
|
-
return createImageBitmap(canvas);
|
|
119
|
-
}
|
|
120
|
-
finally {
|
|
121
|
-
URL.revokeObjectURL(objectUrl);
|
|
122
|
-
}
|
|
123
|
-
});
|
|
124
|
-
const hasPreprocessOptions = (preprocess) => {
|
|
125
|
-
return Boolean((preprocess === null || preprocess === void 0 ? void 0 : preprocess.grayscale) ||
|
|
126
|
-
(preprocess === null || preprocess === void 0 ? void 0 : preprocess.contrast) ||
|
|
127
|
-
(preprocess === null || preprocess === void 0 ? void 0 : preprocess.threshold) !== undefined ||
|
|
128
|
-
(preprocess === null || preprocess === void 0 ? void 0 : preprocess.scale));
|
|
129
|
-
};
|
|
130
|
-
const clampByte = (value) => Math.max(0, Math.min(255, value));
|
|
131
|
-
const preprocessImage = (image, preprocess) => __awaiter(void 0, void 0, void 0, function* () {
|
|
132
|
-
var _a, _b, _c;
|
|
133
|
-
if (!hasPreprocessOptions(preprocess)) {
|
|
134
|
-
return image;
|
|
135
|
-
}
|
|
136
|
-
const bitmap = yield loadImageBitmap(image);
|
|
137
|
-
const requestedScale = (_a = preprocess === null || preprocess === void 0 ? void 0 : preprocess.scale) !== null && _a !== void 0 ? _a : 1;
|
|
138
|
-
const maxDimension = (_b = preprocess === null || preprocess === void 0 ? void 0 : preprocess.maxDimension) !== null && _b !== void 0 ? _b : 3000;
|
|
139
|
-
const largestSourceDimension = Math.max(bitmap.width, bitmap.height);
|
|
140
|
-
const scale = Math.min(Math.max(requestedScale, 1), maxDimension / largestSourceDimension);
|
|
141
|
-
const width = Math.max(1, Math.round(bitmap.width * scale));
|
|
142
|
-
const height = Math.max(1, Math.round(bitmap.height * scale));
|
|
143
|
-
const canvas = document.createElement("canvas");
|
|
144
|
-
canvas.width = width;
|
|
145
|
-
canvas.height = height;
|
|
146
|
-
const context = canvas.getContext("2d", { willReadFrequently: true });
|
|
147
|
-
if (!context) {
|
|
148
|
-
throw new Error("Canvas rendering is not available.");
|
|
149
|
-
}
|
|
150
|
-
context.imageSmoothingEnabled = true;
|
|
151
|
-
context.imageSmoothingQuality = "high";
|
|
152
|
-
context.drawImage(bitmap, 0, 0, width, height);
|
|
153
|
-
bitmap.close();
|
|
154
|
-
const imageData = context.getImageData(0, 0, width, height);
|
|
155
|
-
const { data } = imageData;
|
|
156
|
-
const contrast = (_c = preprocess === null || preprocess === void 0 ? void 0 : preprocess.contrast) !== null && _c !== void 0 ? _c : 1;
|
|
157
|
-
const contrastFactor = contrast === 1 ? 1 : (259 * (255 * contrast - 255 + 255)) / (255 * (259 - (255 * contrast - 255)));
|
|
158
|
-
for (let index = 0; index < data.length; index += 4) {
|
|
159
|
-
let red = data[index];
|
|
160
|
-
let green = data[index + 1];
|
|
161
|
-
let blue = data[index + 2];
|
|
162
|
-
if ((preprocess === null || preprocess === void 0 ? void 0 : preprocess.grayscale) || (preprocess === null || preprocess === void 0 ? void 0 : preprocess.threshold) !== undefined) {
|
|
163
|
-
const luminance = 0.299 * red + 0.587 * green + 0.114 * blue;
|
|
164
|
-
red = luminance;
|
|
165
|
-
green = luminance;
|
|
166
|
-
blue = luminance;
|
|
167
|
-
}
|
|
168
|
-
if (contrast !== 1) {
|
|
169
|
-
red = clampByte(contrastFactor * (red - 128) + 128);
|
|
170
|
-
green = clampByte(contrastFactor * (green - 128) + 128);
|
|
171
|
-
blue = clampByte(contrastFactor * (blue - 128) + 128);
|
|
172
|
-
}
|
|
173
|
-
if ((preprocess === null || preprocess === void 0 ? void 0 : preprocess.threshold) !== undefined) {
|
|
174
|
-
const threshold = clampByte(preprocess.threshold);
|
|
175
|
-
const value = red >= threshold ? 255 : 0;
|
|
176
|
-
red = value;
|
|
177
|
-
green = value;
|
|
178
|
-
blue = value;
|
|
179
|
-
}
|
|
180
|
-
data[index] = red;
|
|
181
|
-
data[index + 1] = green;
|
|
182
|
-
data[index + 2] = blue;
|
|
183
|
-
}
|
|
184
|
-
context.putImageData(imageData, 0, 0);
|
|
185
|
-
return new Promise((resolve, reject) => {
|
|
186
|
-
canvas.toBlob((blob) => {
|
|
187
|
-
if (!blob) {
|
|
188
|
-
reject(new Error("Unable to prepare image for OCR."));
|
|
189
|
-
return;
|
|
190
|
-
}
|
|
191
|
-
resolve(blob);
|
|
192
|
-
}, "image/png");
|
|
193
|
-
});
|
|
194
|
-
});
|
|
195
|
-
export const getImageTextContent = (image_1, ...args_1) => __awaiter(void 0, [image_1, ...args_1], void 0, function* (image, options = {}) {
|
|
196
|
-
if (!image.type.startsWith("image/")) {
|
|
197
|
-
throw new Error("getImageTextContent requires an image File.");
|
|
198
|
-
}
|
|
199
|
-
const { language, onProgress, parameters, preprocess, recognizeOptions, workerOptions, } = getResolvedOptions(options);
|
|
200
|
-
const { createWorker } = yield import("tesseract.js");
|
|
201
|
-
let worker;
|
|
202
|
-
try {
|
|
203
|
-
worker = yield createWorker(language, undefined, Object.assign(Object.assign({}, workerOptions), { logger: ({ progress, status }) => {
|
|
204
|
-
onProgress === null || onProgress === void 0 ? void 0 : onProgress({ progress, status });
|
|
205
|
-
} }));
|
|
206
|
-
if (parameters && Object.keys(parameters).length > 0) {
|
|
207
|
-
yield worker.setParameters(parameters);
|
|
208
|
-
}
|
|
209
|
-
const ocrImage = yield preprocessImage(image, preprocess);
|
|
210
|
-
const result = yield worker.recognize(ocrImage, recognizeOptions, { text: true, blocks: true });
|
|
211
|
-
const { data } = result;
|
|
212
|
-
return {
|
|
213
|
-
text: data.text,
|
|
214
|
-
confidence: data.confidence,
|
|
215
|
-
language,
|
|
216
|
-
lines: getLinesFromPage(data),
|
|
217
|
-
};
|
|
218
|
-
}
|
|
219
|
-
finally {
|
|
220
|
-
yield (worker === null || worker === void 0 ? void 0 : worker.terminate());
|
|
221
|
-
}
|
|
222
|
-
});
|