@aspiresys/visor 1.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ocr.js +2 -1
- package/dist/text.js +38 -19
- package/package.json +1 -1
package/dist/ocr.js
CHANGED
|
@@ -70,7 +70,8 @@ async function extractTextFromRegion(region) {
|
|
|
70
70
|
const result = await worker.recognize(processed, {}, {
|
|
71
71
|
blocks: true,
|
|
72
72
|
hocr: true,
|
|
73
|
-
tsv: true
|
|
73
|
+
tsv: true,
|
|
74
|
+
tessedit_char_whitelist: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.:/@ "
|
|
74
75
|
});
|
|
75
76
|
(0, logger_1.log)("[OCR] Extracted Text:");
|
|
76
77
|
(0, logger_1.log)(result.data.text);
|
package/dist/text.js
CHANGED
|
@@ -9,19 +9,29 @@ async function findText(text, region) {
|
|
|
9
9
|
if (!tsv) {
|
|
10
10
|
return null;
|
|
11
11
|
}
|
|
12
|
-
const
|
|
13
|
-
const
|
|
14
|
-
for (const
|
|
15
|
-
const cols =
|
|
16
|
-
if (cols[0] !== "5")
|
|
12
|
+
const rows = tsv.split("\n");
|
|
13
|
+
const lineGroups = {};
|
|
14
|
+
for (const row of rows) {
|
|
15
|
+
const cols = row.split("\t");
|
|
16
|
+
if (cols[0] !== "5") {
|
|
17
17
|
continue;
|
|
18
|
+
}
|
|
18
19
|
const wordText = cols[11];
|
|
19
|
-
if (!wordText)
|
|
20
|
+
if (!wordText) {
|
|
20
21
|
continue;
|
|
22
|
+
}
|
|
21
23
|
const confidence = Number(cols[10]);
|
|
22
|
-
if (confidence < 40)
|
|
24
|
+
if (confidence < 40) {
|
|
23
25
|
continue;
|
|
24
|
-
|
|
26
|
+
}
|
|
27
|
+
const blockNum = cols[2];
|
|
28
|
+
const parNum = cols[3];
|
|
29
|
+
const lineNum = cols[4];
|
|
30
|
+
const key = `${blockNum}-${parNum}-${lineNum}`;
|
|
31
|
+
if (!lineGroups[key]) {
|
|
32
|
+
lineGroups[key] = [];
|
|
33
|
+
}
|
|
34
|
+
lineGroups[key].push({
|
|
25
35
|
text: wordText,
|
|
26
36
|
x: Number(cols[6]),
|
|
27
37
|
y: Number(cols[7]),
|
|
@@ -30,18 +40,27 @@ async function findText(text, region) {
|
|
|
30
40
|
confidence
|
|
31
41
|
});
|
|
32
42
|
}
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
43
|
+
for (const key in lineGroups) {
|
|
44
|
+
const words = lineGroups[key];
|
|
45
|
+
words.sort((a, b) => a.x - b.x);
|
|
46
|
+
//const combinedText = words.map(w => w.text).join(" ");
|
|
47
|
+
const combinedText = words.map(w => w.text).join(" ").replace(/\s+/g, " ").trim();
|
|
48
|
+
if (combinedText.toLowerCase().replace(/[^a-z0-9 ]/gi, "").includes(text.toLowerCase().replace(/[^a-z0-9 ]/gi, ""))) {
|
|
49
|
+
const minX = Math.min(...words.map(w => w.x));
|
|
50
|
+
const minY = Math.min(...words.map(w => w.y));
|
|
51
|
+
const maxX = Math.max(...words.map(w => w.x + w.width));
|
|
52
|
+
const maxY = Math.max(...words.map(w => w.y + w.height));
|
|
53
|
+
const avgConfidence = words.reduce((sum, w) => sum + w.confidence, 0) / words.length;
|
|
54
|
+
return {
|
|
55
|
+
x: minX,
|
|
56
|
+
y: minY,
|
|
57
|
+
width: maxX - minX,
|
|
58
|
+
height: maxY - minY,
|
|
59
|
+
confidence: avgConfidence
|
|
60
|
+
};
|
|
61
|
+
}
|
|
37
62
|
}
|
|
38
|
-
return
|
|
39
|
-
x: match.x,
|
|
40
|
-
y: match.y,
|
|
41
|
-
width: match.width,
|
|
42
|
-
height: match.height,
|
|
43
|
-
confidence: match.confidence
|
|
44
|
-
};
|
|
63
|
+
return null;
|
|
45
64
|
}
|
|
46
65
|
async function existsText(text, region) {
|
|
47
66
|
const match = await findText(text, region);
|