@areb0s/ocr-common 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/build/Ocr.d.ts +15 -0
  2. package/build/Ocr.js +24 -0
  3. package/build/Ocr.js.map +1 -0
  4. package/build/backend/FileUtilsBase.d.ts +3 -0
  5. package/build/backend/FileUtilsBase.js +6 -0
  6. package/build/backend/FileUtilsBase.js.map +1 -0
  7. package/build/backend/ImageRawBase.d.ts +8 -0
  8. package/build/backend/ImageRawBase.js +14 -0
  9. package/build/backend/ImageRawBase.js.map +1 -0
  10. package/build/backend/backend.d.ts +14 -0
  11. package/build/backend/backend.js +14 -0
  12. package/build/backend/backend.js.map +1 -0
  13. package/build/backend/index.d.ts +3 -0
  14. package/build/backend/index.js +4 -0
  15. package/build/backend/index.js.map +1 -0
  16. package/build/backend/splitIntoLineImages.d.ts +2 -0
  17. package/build/backend/splitIntoLineImages.js +231 -0
  18. package/build/backend/splitIntoLineImages.js.map +1 -0
  19. package/build/index.d.ts +6 -0
  20. package/build/index.js +6 -0
  21. package/build/index.js.map +1 -0
  22. package/build/models/Detection.d.ts +13 -0
  23. package/build/models/Detection.js +82 -0
  24. package/build/models/Detection.js.map +1 -0
  25. package/build/models/ModelBase.d.ts +14 -0
  26. package/build/models/ModelBase.js +53 -0
  27. package/build/models/ModelBase.js.map +1 -0
  28. package/build/models/Recognition.d.ts +12 -0
  29. package/build/models/Recognition.js +190 -0
  30. package/build/models/Recognition.js.map +1 -0
  31. package/build/models/index.d.ts +2 -0
  32. package/build/models/index.js +3 -0
  33. package/build/models/index.js.map +1 -0
  34. package/build/types/index.d.ts +1 -0
  35. package/build/types/index.js +2 -0
  36. package/build/types/index.js.map +1 -0
  37. package/build/types/types.d.ts +67 -0
  38. package/build/types/types.js +5 -0
  39. package/build/types/types.js.map +1 -0
  40. package/package.json +34 -0
  41. package/src/Ocr.ts +34 -0
  42. package/src/backend/FileUtilsBase.ts +5 -0
  43. package/src/backend/ImageRawBase.ts +17 -0
  44. package/src/backend/backend.ts +29 -0
  45. package/src/backend/index.ts +3 -0
  46. package/src/backend/splitIntoLineImages.ts +296 -0
  47. package/src/index.ts +8 -0
  48. package/src/models/Detection.ts +96 -0
  49. package/src/models/ModelBase.ts +74 -0
  50. package/src/models/Recognition.ts +217 -0
  51. package/src/models/index.ts +2 -0
  52. package/src/types/global.d.ts +7 -0
  53. package/src/types/index.ts +1 -0
  54. package/src/types/types.ts +90 -0
  55. package/tsconfig.json +10 -0
@@ -0,0 +1,296 @@
1
+ import cv from '@techstark/opencv-js'
2
+ import clipper from 'js-clipper'
3
+ import { ImageRaw } from '#common/backend'
4
+ import type { LineImage, ImageRaw as ImageRawType } from '#common/types'
5
+
6
+ type pointType = [number, number]
7
+ type BoxType = [pointType, pointType, pointType, pointType]
8
+ type pointsType = pointType[]
9
+
10
+ export async function splitIntoLineImages(image: ImageRawType, sourceImage: ImageRawType): Promise<LineImage[]> {
11
+ const w = image.width
12
+ const h = image.height
13
+ const srcData = sourceImage
14
+
15
+ const edgeRect: { box: BoxType; image: ImageRawType }[] = []
16
+
17
+ const src = cvImread(image)
18
+
19
+ cv.cvtColor(src, src, cv.COLOR_RGBA2GRAY, 0)
20
+ const contours = new cv.MatVector()
21
+ const hierarchy = new cv.Mat()
22
+
23
+ cv.findContours(src, contours, hierarchy, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE)
24
+
25
+ for (let i = 0; i < contours.size(); i++) {
26
+ const minSize = 3
27
+ const cnt = contours.get(i)
28
+ const { points, sside } = getMiniBoxes(cnt)
29
+ if (sside < minSize) continue
30
+ // TODO sort fast
31
+
32
+ const clipBox = unclip(points)
33
+
34
+ const boxMap = cv.matFromArray(clipBox.length / 2, 1, cv.CV_32SC2, clipBox)
35
+
36
+ const resultObj = getMiniBoxes(boxMap)
37
+ const box = resultObj.points
38
+ if (resultObj.sside < minSize + 2) {
39
+ continue
40
+ }
41
+ function clip(n: number, min: number, max: number) {
42
+ return Math.max(min, Math.min(n, max))
43
+ }
44
+
45
+ const rx = srcData.width / w
46
+ const ry = srcData.height / h
47
+
48
+ for (let i = 0; i < box.length; i++) {
49
+ box[i][0] *= rx
50
+ box[i][1] *= ry
51
+ }
52
+
53
+ const box1 = orderPointsClockwise(box)
54
+ box1.forEach((item) => {
55
+ item[0] = clip(Math.round(item[0]), 0, srcData.width)
56
+ item[1] = clip(Math.round(item[1]), 0, srcData.height)
57
+ })
58
+ const rect_width = int(linalgNorm(box1[0], box1[1]))
59
+ const rect_height = int(linalgNorm(box1[0], box1[3]))
60
+ if (rect_width <= 3 || rect_height <= 3) continue
61
+
62
+ const c = getRotateCropImage(srcData, box)
63
+
64
+ edgeRect.push({
65
+ box,
66
+ image: c,
67
+ })
68
+ }
69
+
70
+ src.delete()
71
+ contours.delete()
72
+ hierarchy.delete()
73
+
74
+ return edgeRect
75
+ }
76
+
77
+ function getMiniBoxes(contour: any) {
78
+ const boundingBox = cv.minAreaRect(contour)
79
+ const points = Array.from(boxPoints(boundingBox.center, boundingBox.size, boundingBox.angle)).sort(
80
+ (a, b) => a[0] - b[0],
81
+ ) as pointsType
82
+
83
+ let index_1 = 0,
84
+ index_2 = 1,
85
+ index_3 = 2,
86
+ index_4 = 3
87
+ if (points[1][1] > points[0][1]) {
88
+ index_1 = 0
89
+ index_4 = 1
90
+ } else {
91
+ index_1 = 1
92
+ index_4 = 0
93
+ }
94
+ if (points[3][1] > points[2][1]) {
95
+ index_2 = 2
96
+ index_3 = 3
97
+ } else {
98
+ index_2 = 3
99
+ index_3 = 2
100
+ }
101
+
102
+ const box = [points[index_1], points[index_2], points[index_3], points[index_4]] as BoxType
103
+ const side = Math.min(boundingBox.size.height, boundingBox.size.width)
104
+ return { points: box, sside: side }
105
+ }
106
+
107
+ function unclip(box: pointsType) {
108
+ const unclip_ratio = 1.5
109
+ const area = Math.abs(polygonPolygonArea(box))
110
+ const length = polygonPolygonLength(box)
111
+ const distance = (area * unclip_ratio) / length
112
+ const tmpArr: { X: number; Y: number }[] = []
113
+ box.forEach((item) => {
114
+ const obj = {
115
+ X: 0,
116
+ Y: 0,
117
+ }
118
+ obj.X = item[0]
119
+ obj.Y = item[1]
120
+ tmpArr.push(obj)
121
+ })
122
+ const offset = new clipper.ClipperOffset()
123
+ offset.AddPath(tmpArr, clipper.JoinType.jtRound, clipper.EndType.etClosedPolygon)
124
+ const expanded: { X: number; Y: number }[][] = []
125
+ offset.Execute(expanded, distance)
126
+ let expandedArr: pointsType = []
127
+ expanded[0] &&
128
+ expanded[0].forEach((item) => {
129
+ expandedArr.push([item.X, item.Y])
130
+ })
131
+ expandedArr = [].concat(...(<any>expandedArr))
132
+
133
+ return expandedArr
134
+ }
135
+
136
+ function orderPointsClockwise(pts: BoxType) {
137
+ const rect: BoxType = [
138
+ [0, 0],
139
+ [0, 0],
140
+ [0, 0],
141
+ [0, 0],
142
+ ]
143
+ const s = pts.map((pt) => pt[0] + pt[1])
144
+ rect[0] = pts[s.indexOf(Math.min(...s))]
145
+ rect[2] = pts[s.indexOf(Math.max(...s))]
146
+ const tmp = pts.filter((pt) => pt !== rect[0] && pt !== rect[2])
147
+ const diff = tmp[1].map((e, i) => e - tmp[0][i])
148
+ rect[1] = tmp[diff.indexOf(Math.min(...diff))]
149
+ rect[3] = tmp[diff.indexOf(Math.max(...diff))]
150
+ return rect
151
+ }
152
+
153
+ function linalgNorm(p0: pointType, p1: pointType) {
154
+ return Math.sqrt(Math.pow(p0[0] - p1[0], 2) + Math.pow(p0[1] - p1[1], 2))
155
+ }
156
+
157
+ function int(num: number) {
158
+ return num > 0 ? Math.floor(num) : Math.ceil(num)
159
+ }
160
+
161
+ function getRotateCropImage(imageRaw: ImageRawType, points: BoxType): ImageRawType {
162
+ const img_crop_width = int(Math.max(linalgNorm(points[0], points[1]), linalgNorm(points[2], points[3])))
163
+ const img_crop_height = int(Math.max(linalgNorm(points[0], points[3]), linalgNorm(points[1], points[2])))
164
+ const pts_std = [
165
+ [0, 0],
166
+ [img_crop_width, 0],
167
+ [img_crop_width, img_crop_height],
168
+ [0, img_crop_height],
169
+ ]
170
+
171
+ const srcTri = cv.matFromArray(4, 1, cv.CV_32FC2, flatten(points))
172
+ const dstTri = cv.matFromArray(4, 1, cv.CV_32FC2, flatten(pts_std))
173
+
174
+ // 获取到目标矩阵
175
+ const M = cv.getPerspectiveTransform(srcTri, dstTri)
176
+ const src = cvImread(imageRaw)
177
+ const dst = new cv.Mat()
178
+ const dsize = new cv.Size(img_crop_width, img_crop_height)
179
+ // 透视转换
180
+ cv.warpPerspective(src, dst, M, dsize, cv.INTER_CUBIC, cv.BORDER_REPLICATE, new cv.Scalar())
181
+
182
+ const dst_img_height = (<any>dst).matSize[0]
183
+ const dst_img_width = (<any>dst).matSize[1]
184
+ let dst_rot
185
+ // 图像旋转
186
+ if (dst_img_height / dst_img_width >= 1.5) {
187
+ dst_rot = new cv.Mat()
188
+ const dsize_rot = new cv.Size(dst.rows, dst.cols)
189
+ const center = new cv.Point(dst.cols / 2, dst.cols / 2)
190
+ const M = cv.getRotationMatrix2D(center, 90, 1)
191
+ cv.warpAffine(dst, dst_rot, M, dsize_rot, cv.INTER_CUBIC, cv.BORDER_REPLICATE, new cv.Scalar())
192
+ }
193
+
194
+ src.delete()
195
+ srcTri.delete()
196
+ dstTri.delete()
197
+ if (dst_rot) {
198
+ dst.delete()
199
+ }
200
+
201
+ return cvImshow(dst_rot || dst)
202
+ }
203
+
204
+ function boxPoints(center: { x: number; y: number }, size: { width: number; height: number }, angle: number) {
205
+ const width = size.width
206
+ const height = size.height
207
+
208
+ const theta = (angle * Math.PI) / 180.0
209
+ const cosTheta = Math.cos(theta)
210
+ const sinTheta = Math.sin(theta)
211
+
212
+ const cx = center.x
213
+ const cy = center.y
214
+
215
+ const dx = width * 0.5
216
+ const dy = height * 0.5
217
+
218
+ const rotatedPoints: any[] = []
219
+
220
+ // Top-Left
221
+ const x1 = cx - dx * cosTheta + dy * sinTheta
222
+ const y1 = cy - dx * sinTheta - dy * cosTheta
223
+ rotatedPoints.push([x1, y1])
224
+
225
+ // Top-Right
226
+ const x2 = cx + dx * cosTheta + dy * sinTheta
227
+ const y2 = cy + dx * sinTheta - dy * cosTheta
228
+ rotatedPoints.push([x2, y2])
229
+
230
+ // Bottom-Right
231
+ const x3 = cx + dx * cosTheta - dy * sinTheta
232
+ const y3 = cy + dx * sinTheta + dy * cosTheta
233
+ rotatedPoints.push([x3, y3])
234
+
235
+ // Bottom-Left
236
+ const x4 = cx - dx * cosTheta - dy * sinTheta
237
+ const y4 = cy - dx * sinTheta + dy * cosTheta
238
+ rotatedPoints.push([x4, y4])
239
+
240
+ return rotatedPoints
241
+ }
242
+
243
+ function polygonPolygonArea(polygon: pointsType) {
244
+ let i = -1,
245
+ n = polygon.length,
246
+ a: pointType,
247
+ b = polygon[n - 1],
248
+ area = 0
249
+
250
+ while (++i < n) {
251
+ a = b
252
+ b = polygon[i]
253
+ area += a[1] * b[0] - a[0] * b[1]
254
+ }
255
+
256
+ return area / 2
257
+ }
258
+
259
+ function polygonPolygonLength(polygon: pointsType) {
260
+ let i = -1,
261
+ n = polygon.length,
262
+ b = polygon[n - 1],
263
+ xa: number,
264
+ ya: number,
265
+ xb = b[0],
266
+ yb = b[1],
267
+ perimeter = 0
268
+
269
+ while (++i < n) {
270
+ xa = xb
271
+ ya = yb
272
+ b = polygon[i]
273
+ xb = b[0]
274
+ yb = b[1]
275
+ xa -= xb
276
+ ya -= yb
277
+ perimeter += Math.hypot(xa, ya)
278
+ }
279
+
280
+ return perimeter
281
+ }
282
+
283
+ function flatten(arr: number[] | number[][]) {
284
+ return arr
285
+ .toString()
286
+ .split(',')
287
+ .map((item) => +item)
288
+ }
289
+
290
+ function cvImread(image: ImageRawType) {
291
+ return cv.matFromImageData(image)
292
+ }
293
+
294
+ function cvImshow(mat: cv.Mat): ImageRawType {
295
+ return new ImageRaw({ data: mat.data, width: mat.cols, height: mat.rows })
296
+ }
package/src/index.ts ADDED
@@ -0,0 +1,8 @@
1
+ import { Ocr } from './Ocr'
2
+
3
+ export default Ocr
4
+
5
+ export { registerBackend } from './backend/backend'
6
+ export * from './backend/FileUtilsBase'
7
+ export * from './backend/ImageRawBase'
8
+ export type * from './types'
@@ -0,0 +1,96 @@
1
+ import type { InferenceSession as InferenceSessionCommon, Tensor } from 'onnxruntime-common'
2
+ import invariant from 'tiny-invariant'
3
+ import { ImageRaw, InferenceSession, defaultModels, splitIntoLineImages } from '#common/backend'
4
+ import type { ImageRawData, BrowserImageInput, ImageRaw as ImageRawType, ModelCreateOptions, Size } from '#common/types'
5
+ import { ModelBase } from './ModelBase'
6
+
7
+ const BASE_SIZE = 32
8
+
9
+ export class Detection extends ModelBase {
10
+ static async create({ models, onnxOptions = {}, ...restOptions }: ModelCreateOptions) {
11
+ const detectionPath = models?.detectionPath || defaultModels?.detectionPath
12
+ invariant(detectionPath, 'detectionPath is required')
13
+ const model = await InferenceSession.create(detectionPath, onnxOptions)
14
+ return new Detection({ model, options: restOptions })
15
+ }
16
+
17
+ async run(input: string | ImageRawData | BrowserImageInput, { onnxOptions = {} }: { onnxOptions?: InferenceSessionCommon.RunOptions } = {}) {
18
+ // Use ImageRaw.from() factory method if available (browser), otherwise fallback to legacy handling
19
+ const image = typeof (ImageRaw as any).from === 'function'
20
+ ? await (ImageRaw as any).from(input)
21
+ : typeof input === "string"
22
+ ? await ImageRaw.open(input)
23
+ : new ImageRaw(input as ImageRawData)
24
+
25
+ // Resize image to multiple of 32
26
+ // - image width and height must be a multiple of 32
27
+ // - bigger image -> more accurate result, but takes longer time
28
+ // inputImage = await Image.resize(image, multipleOfBaseSize(image, { maxSize: 960 }))
29
+ const inputImage = await image.resize(multipleOfBaseSize(image))
30
+ this.debugImage(inputImage, 'out1-multiple-of-base-size.jpg')
31
+
32
+ // Covert image data to model data
33
+ // - Using `(RGB / 255 - mean) / std` formula
34
+ // - omit reshapeOptions (mean/std) is more accurate, can creaet a run option for them
35
+ const modelData = this.imageToInput(inputImage, {
36
+ // mean: [0.485, 0.456, 0.406],
37
+ // std: [0.229, 0.224, 0.225],
38
+ })
39
+
40
+ // Run the model
41
+ // console.time('Detection')
42
+ const modelOutput = await this.runModel({ modelData, onnxOptions })
43
+ // console.timeEnd('Detection')
44
+
45
+ // Convert output data back to image data
46
+ // - output value is from 0 to 1, a probability, if value > 0.3, it is a text
47
+ // - returns a black and white image
48
+ const outputImage = outputToImage(modelOutput, 0.03)
49
+ this.debugImage(outputImage, 'out2-black-white.jpg')
50
+
51
+ // Find text boxes, split image into lines
52
+ // - findContours from the image
53
+ // - returns text boxes and line images
54
+ const lineImages = await splitIntoLineImages(outputImage, inputImage)
55
+ this.debugBoxImage(inputImage, lineImages, 'boxes.jpg')
56
+
57
+ return {
58
+ lineImages,
59
+ resizedImageWidth: inputImage.width,
60
+ resizedImageHeight: inputImage.height,
61
+ }
62
+ }
63
+ }
64
+
65
+ function multipleOfBaseSize(image: ImageRawType, { maxSize }: { maxSize?: number } = {}): Size {
66
+ let width = image.width
67
+ let height = image.height
68
+ if (maxSize && Math.max(width, height) > maxSize) {
69
+ const ratio = width > height ? maxSize / width : maxSize / height
70
+ width = width * ratio
71
+ height = height * ratio
72
+ }
73
+ const newWidth = Math.max(
74
+ // Math.round
75
+ // Math.ceil
76
+ Math.ceil(width / BASE_SIZE) * BASE_SIZE,
77
+ BASE_SIZE,
78
+ )
79
+ const newHeight = Math.max(Math.ceil(height / BASE_SIZE) * BASE_SIZE, BASE_SIZE)
80
+ return { width: newWidth, height: newHeight }
81
+ }
82
+
83
+ function outputToImage(output: Tensor, threshold: number): ImageRawType {
84
+ const height = output.dims[2]
85
+ const width = output.dims[3]
86
+ const data = new Uint8Array(width * height * 4)
87
+ for (const [outIndex, outValue] of output.data.entries()) {
88
+ const n = outIndex * 4
89
+ const value = (outValue as number) > threshold ? 255 : 0
90
+ data[n] = value // R
91
+ data[n + 1] = value // G
92
+ data[n + 2] = value // B
93
+ data[n + 3] = 255 // A
94
+ }
95
+ return new ImageRaw({ data, width, height })
96
+ }
@@ -0,0 +1,74 @@
1
+ import { type InferenceSession as InferenceSessionCommon, Tensor } from 'onnxruntime-common'
2
+ import type {
3
+ ImageRaw,
4
+ InferenceSession,
5
+ LineImage,
6
+ ModelBaseConstructorArg,
7
+ ModelBaseOptions,
8
+ ModelData,
9
+ ReshapeOptions,
10
+ } from '#common/types'
11
+
12
+ export class ModelBase {
13
+ options: ModelBaseOptions
14
+ #model: InferenceSession
15
+
16
+ constructor({ model, options }: ModelBaseConstructorArg) {
17
+ this.#model = model
18
+ this.options = options
19
+ }
20
+
21
+ async runModel({
22
+ modelData,
23
+ onnxOptions = {},
24
+ }: { modelData: ModelData; onnxOptions?: InferenceSessionCommon.RunOptions }) {
25
+ const input = this.#prepareInput(modelData)
26
+ const outputs = await this.#model.run(
27
+ {
28
+ [this.#model.inputNames[0]]: input,
29
+ },
30
+ onnxOptions,
31
+ )
32
+ const output = outputs[this.#model.outputNames[0]]
33
+ return output
34
+ }
35
+
36
+ #prepareInput(modelData: ModelData) {
37
+ const input = Float32Array.from(modelData.data)
38
+ return new Tensor('float32', input, [1, 3, modelData.height, modelData.width])
39
+ }
40
+
41
+ imageToInput(image: ImageRaw, { mean = [0, 0, 0], std = [1, 1, 1] }: ReshapeOptions): ModelData {
42
+ const R: number[] = []
43
+ const G: number[] = []
44
+ const B: number[] = []
45
+ for (let i = 0; i < image.data.length; i += 4) {
46
+ R.push((image.data[i] / 255 - mean[0]) / std[0])
47
+ G.push((image.data[i + 1] / 255 - mean[1]) / std[1])
48
+ B.push((image.data[i + 2] / 255 - mean[2]) / std[2])
49
+ }
50
+ const newData = [...B, ...G, ...R]
51
+ return {
52
+ data: newData,
53
+ width: image.width,
54
+ height: image.height,
55
+ }
56
+ }
57
+
58
+ debugImage(image: ImageRaw | any, path: string) {
59
+ const { debugOutputDir, isDebug } = this.options
60
+ if (!isDebug || !debugOutputDir) {
61
+ return
62
+ }
63
+ image.write(`${debugOutputDir}/${path}`)
64
+ }
65
+
66
+ async debugBoxImage(sourceImage: ImageRaw | any, lineImages: LineImage[], path: string) {
67
+ const { debugOutputDir, isDebug } = this.options
68
+ if (!isDebug || !debugOutputDir) {
69
+ return
70
+ }
71
+ const boxImage = await sourceImage.drawBox(lineImages)
72
+ boxImage.write(`${debugOutputDir}/${path}`)
73
+ }
74
+ }
@@ -0,0 +1,217 @@
1
+ import type { InferenceSession as InferenceSessionCommon, Tensor } from 'onnxruntime-common'
2
+ import invariant from 'tiny-invariant'
3
+ import { FileUtils, InferenceSession, defaultModels } from '#common/backend'
4
+ import type { Dictionary, Line, LineImage, ModelBaseConstructorArg, ModelCreateOptions } from '#common/types'
5
+ import { ModelBase } from './ModelBase'
6
+
7
+ export class Recognition extends ModelBase {
8
+ #dictionary: Dictionary
9
+
10
+ static async create({ models, onnxOptions = {}, ...restOptions }: ModelCreateOptions) {
11
+ const recognitionPath = models?.recognitionPath || defaultModels?.recognitionPath
12
+ invariant(recognitionPath, 'recognitionPath is required')
13
+ const dictionaryPath = models?.dictionaryPath || defaultModels?.dictionaryPath
14
+ invariant(dictionaryPath, 'dictionaryPath is required')
15
+ const model = await InferenceSession.create(recognitionPath, onnxOptions)
16
+ const dictionaryText = await FileUtils.read(dictionaryPath)
17
+ const dictionary = [...dictionaryText.split('\n'), ' ']
18
+ return new Recognition({ model, options: restOptions }, dictionary)
19
+ }
20
+
21
+ constructor(options: ModelBaseConstructorArg, dictionary: Dictionary) {
22
+ super(options)
23
+ this.#dictionary = dictionary
24
+ }
25
+
26
+ async run(lineImages: LineImage[], { onnxOptions = {} }: { onnxOptions?: InferenceSessionCommon.RunOptions } = {}) {
27
+ const modelDatas = await Promise.all(
28
+ // Detect text from each line image
29
+ lineImages.map(async (lineImage, index) => {
30
+ // Resize Image to 48px height
31
+ // - height must <= 48
32
+ // - height: 48 is more accurate then 40, but same as 30
33
+ const image = await (lineImage.image as any).resize({
34
+ height: 48,
35
+ })
36
+ this.debugImage(lineImage.image, `out9-line-${index}.jpg`)
37
+ this.debugImage(image, `out9-line-${index}-resized.jpg`)
38
+
39
+ // transform image data to model data
40
+ const modelData = this.imageToInput(image, {
41
+ // mean: [0.5, 0.5, 0.5],
42
+ // std: [0.5, 0.5, 0.5],
43
+ })
44
+ return modelData
45
+ }),
46
+ )
47
+
48
+ const allLines: Line[] = []
49
+ // console.time('Recognition')
50
+ for (const modelData of modelDatas) {
51
+ // Run model for each line image
52
+ const output = await this.runModel({ modelData, onnxOptions })
53
+ // use Dictoinary to decode output to text
54
+ const lines = await this.decodeText(output)
55
+ allLines.unshift(...lines)
56
+ }
57
+ // console.timeEnd('Recognition')
58
+ const result = calculateBox({ lines: allLines, lineImages })
59
+ return result
60
+ }
61
+
62
+ decodeText(output: Tensor) {
63
+ const data = output
64
+ const predLen = data.dims[2]
65
+ const line: Line[] = []
66
+ let ml = data.dims[0] - 1
67
+ for (let l = 0; l < data.data.length; l += predLen * data.dims[1]) {
68
+ const predsIdx: number[] = []
69
+ const predsProb: number[] = []
70
+
71
+ for (let i = l; i < l + predLen * data.dims[1]; i += predLen) {
72
+ const tmpArr = data.data.slice(i, i + predLen) as Float32Array
73
+ const tmpMax = tmpArr.reduce((a, b) => Math.max(a, b), Number.NEGATIVE_INFINITY)
74
+ const tmpIdx = tmpArr.indexOf(tmpMax)
75
+ predsProb.push(tmpMax)
76
+ predsIdx.push(tmpIdx)
77
+ }
78
+ line[ml] = decode(this.#dictionary, predsIdx, predsProb, true)
79
+ ml--
80
+ }
81
+ return line
82
+ }
83
+ }
84
+
85
+ function decode(dictionary: string[], textIndex: number[], textProb: number[], isRemoveDuplicate: boolean) {
86
+ const ignoredTokens = [0]
87
+ const charList = []
88
+ const confList = []
89
+ for (let idx = 0; idx < textIndex.length; idx++) {
90
+ if (textIndex[idx] in ignoredTokens) {
91
+ continue
92
+ }
93
+ if (isRemoveDuplicate) {
94
+ if (idx > 0 && textIndex[idx - 1] === textIndex[idx]) {
95
+ continue
96
+ }
97
+ }
98
+ charList.push(dictionary[textIndex[idx] - 1])
99
+ if (textProb) {
100
+ confList.push(textProb[idx])
101
+ } else {
102
+ confList.push(1)
103
+ }
104
+ }
105
+ let text = ''
106
+ let mean = 0
107
+ if (charList.length) {
108
+ text = charList.join('')
109
+ let sum = 0
110
+ confList.forEach((item) => {
111
+ sum += item
112
+ })
113
+ mean = sum / confList.length
114
+ }
115
+ return { text, mean }
116
+ }
117
+
118
+ function calculateBox({
119
+ lines,
120
+ lineImages,
121
+ }: {
122
+ lines: Line[]
123
+ lineImages: LineImage[]
124
+ }) {
125
+ let mainLine = lines
126
+ const box = lineImages
127
+ for (const i in mainLine) {
128
+ const b = box[mainLine.length - Number(i) - 1].box
129
+ for (const p of b) {
130
+ p[0] = p[0]
131
+ p[1] = p[1]
132
+ }
133
+ mainLine[i]['box'] = b
134
+ }
135
+ mainLine = mainLine.filter((x) => x.mean >= 0.5)
136
+ mainLine = afAfRec(mainLine)
137
+ return mainLine
138
+ }
139
+
140
+ function afAfRec(lines: Line[]) {
141
+ const outputLines: Line[] = []
142
+ const indexes: Map<BoxType, number> = new Map()
143
+ for (const index in lines) {
144
+ const box: any = lines[index].box
145
+ indexes.set(box, Number(index))
146
+ }
147
+
148
+ const groupedBoxes = groupBoxesByMidlineDifference([...indexes.keys()])
149
+
150
+ for (const boxes of groupedBoxes) {
151
+ const texts = []
152
+ let mean = 0
153
+ for (const box of boxes) {
154
+ const index = indexes.get(box)
155
+ if (index === undefined) {
156
+ continue
157
+ }
158
+ const line = lines[index]
159
+ texts.push(line.text)
160
+ mean += line.mean
161
+ }
162
+ let outputBox = undefined
163
+ if (boxes.at(0) && boxes.at(-1)) {
164
+ outputBox = [boxes.at(0)![0], boxes.at(-1)![1], boxes.at(-1)![2], boxes.at(0)![3]]
165
+ }
166
+ outputLines.push({
167
+ mean: mean / boxes.length,
168
+ text: texts.join(' '),
169
+ box: outputBox,
170
+ })
171
+ }
172
+ return outputLines
173
+ }
174
+
175
+ function calculateAverageHeight(boxes: BoxType[]): number {
176
+ let totalHeight = 0
177
+ for (const box of boxes) {
178
+ const [[, y1], , [, y2]] = box
179
+ const height = y2 - y1
180
+ totalHeight += height
181
+ }
182
+ return totalHeight / boxes.length
183
+ }
184
+
185
+ function groupBoxesByMidlineDifference(boxes: BoxType[]): BoxType[][] {
186
+ const averageHeight = calculateAverageHeight(boxes)
187
+ const result: BoxType[][] = []
188
+ for (const box of boxes) {
189
+ const [[, y1], , [, y2]] = box
190
+ const midline = (y1 + y2) / 2
191
+ const group = result.find((b) => {
192
+ const [[, groupY1], , [, groupY2]] = b[0]
193
+ const groupMidline = (groupY1 + groupY2) / 2
194
+ return Math.abs(groupMidline - midline) < averageHeight / 2
195
+ })
196
+ if (group) {
197
+ group.push(box)
198
+ } else {
199
+ result.push([box])
200
+ }
201
+ }
202
+
203
+ for (const group of result) {
204
+ group.sort((a, b) => {
205
+ const [ltA] = a
206
+ const [ltB] = b
207
+ return ltA[0] - ltB[0]
208
+ })
209
+ }
210
+
211
+ result.sort((a, b) => a[0][0][1] - b[0][0][1])
212
+
213
+ return result
214
+ }
215
+
216
+ type pointType = [number, number]
217
+ type BoxType = [pointType, pointType, pointType, pointType]
@@ -0,0 +1,2 @@
1
+ export * from './Detection'
2
+ export * from './Recognition'
@@ -0,0 +1,7 @@
1
+ declare module 'opencv.js'
2
+
3
+ declare module 'js-clipper'
4
+
5
+ declare module 'canvas'
6
+
7
+ declare module 'jsdom'
@@ -0,0 +1 @@
1
+ export * from './types'