@shenghuabi/knowledge 1.0.22 → 1.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ocr.mjs +52 -52
- package/ocr.mjs.map +1 -1
- package/package.json +4 -4
- package/worker/ocr/index.d.ts +1 -1
- package/worker/ocr.mjs +1 -1
- package/worker/ocr.mjs.map +2 -2
package/ocr.mjs
CHANGED
|
@@ -242,106 +242,106 @@ var ModelConfig = [
|
|
|
242
242
|
{
|
|
243
243
|
label: "简体中文",
|
|
244
244
|
key: "ch_mobile",
|
|
245
|
-
det: "det/ch_PP-
|
|
246
|
-
rec: "rec/ch_PP-
|
|
247
|
-
dict: "rec/ch_PP-
|
|
248
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
245
|
+
det: "det/ch_PP-OCRv4_det_mobile.onnx",
|
|
246
|
+
rec: "rec/ch_PP-OCRv4_rec_mobile.onnx",
|
|
247
|
+
dict: "rec/ch_PP-OCRv4_rec_mobile/ppocr_keys_v1.txt",
|
|
248
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
249
249
|
},
|
|
250
250
|
{
|
|
251
251
|
label: "简体中文(服务器)",
|
|
252
252
|
key: "ch_server",
|
|
253
|
-
det: "det/ch_PP-
|
|
254
|
-
rec: "rec/ch_PP-
|
|
255
|
-
dict: "rec/ch_PP-
|
|
256
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
253
|
+
det: "det/ch_PP-OCRv4_det_server.onnx",
|
|
254
|
+
rec: "rec/ch_PP-OCRv4_rec_server.onnx",
|
|
255
|
+
dict: "rec/ch_PP-OCRv4_rec_server/ppocr_keys_v1.txt",
|
|
256
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
257
257
|
},
|
|
258
258
|
{
|
|
259
259
|
label: "繁體中文",
|
|
260
260
|
key: "chinese_cht",
|
|
261
|
-
det: "det/ch_PP-
|
|
262
|
-
rec: "rec/chinese_cht_PP-
|
|
263
|
-
dict: "rec/chinese_cht_PP-
|
|
264
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
261
|
+
det: "det/ch_PP-OCRv4_det_server.onnx",
|
|
262
|
+
rec: "rec/chinese_cht_PP-OCRv3_rec_mobile.onnx",
|
|
263
|
+
dict: "rec/chinese_cht_PP-OCRv3_rec_mobile/chinese_cht_dict.txt",
|
|
264
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
265
265
|
},
|
|
266
266
|
{
|
|
267
267
|
label: "英文",
|
|
268
268
|
key: "en_mobile",
|
|
269
|
-
det: "det/en_PP-
|
|
270
|
-
rec: "rec/en_PP-
|
|
271
|
-
dict: "rec/en_PP-
|
|
272
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
269
|
+
det: "det/en_PP-OCRv3_det_mobile.onnx",
|
|
270
|
+
rec: "rec/en_PP-OCRv4_rec_mobile.onnx",
|
|
271
|
+
dict: "rec/en_PP-OCRv4_rec_mobile/en_dict.txt",
|
|
272
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
273
273
|
},
|
|
274
274
|
{
|
|
275
275
|
label: "阿拉伯文",
|
|
276
276
|
key: "ar_mobile",
|
|
277
|
-
det: "det/
|
|
278
|
-
rec: "rec/arabic_PP-
|
|
279
|
-
dict: "rec/arabic_PP-
|
|
280
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
277
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
278
|
+
rec: "rec/arabic_PP-OCRv4_rec_mobile.onnx",
|
|
279
|
+
dict: "rec/arabic_PP-OCRv4_rec_mobile/arabic_dict.txt",
|
|
280
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
281
281
|
},
|
|
282
282
|
{
|
|
283
283
|
label: "塞尔维亚文",
|
|
284
284
|
key: "cyrillic_mobile",
|
|
285
|
-
det: "det/
|
|
286
|
-
rec: "rec/cyrillic_PP-
|
|
287
|
-
dict: "rec/cyrillic_PP-
|
|
288
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
285
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
286
|
+
rec: "rec/cyrillic_PP-OCRv3_rec_mobile.onnx",
|
|
287
|
+
dict: "rec/cyrillic_PP-OCRv3_rec_mobile/cyrillic_dict.txt",
|
|
288
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
289
289
|
},
|
|
290
290
|
{
|
|
291
291
|
label: "梵文",
|
|
292
292
|
key: "devanagari_mobile",
|
|
293
|
-
det: "det/
|
|
294
|
-
rec: "rec/devanagari_PP-
|
|
295
|
-
dict: "rec/devanagari_PP-
|
|
296
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
293
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
294
|
+
rec: "rec/devanagari_PP-OCRv4_rec_mobile.onnx",
|
|
295
|
+
dict: "rec/devanagari_PP-OCRv4_rec_mobile/devanagari_dict.txt",
|
|
296
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
297
297
|
},
|
|
298
298
|
{
|
|
299
299
|
label: "日文",
|
|
300
300
|
key: "japan_mobile",
|
|
301
|
-
det: "det/
|
|
302
|
-
rec: "rec/japan_PP-
|
|
303
|
-
dict: "rec/japan_PP-
|
|
304
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
301
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
302
|
+
rec: "rec/japan_PP-OCRv4_rec_mobile.onnx",
|
|
303
|
+
dict: "rec/japan_PP-OCRv4_rec_mobile/japan_dict.txt",
|
|
304
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
305
305
|
},
|
|
306
306
|
{
|
|
307
307
|
label: "卡纳达语",
|
|
308
308
|
key: "ka_mobile",
|
|
309
|
-
det: "det/
|
|
310
|
-
rec: "rec/ka_PP-
|
|
311
|
-
dict: "rec/ka_PP-
|
|
312
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
309
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
310
|
+
rec: "rec/ka_PP-OCRv4_rec_mobile.onnx",
|
|
311
|
+
dict: "rec/ka_PP-OCRv4_rec_mobile/ka_dict.txt",
|
|
312
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
313
313
|
},
|
|
314
314
|
{
|
|
315
315
|
label: "韩文",
|
|
316
316
|
key: "korean_mobile",
|
|
317
|
-
det: "det/
|
|
318
|
-
rec: "rec/korean_PP-
|
|
319
|
-
dict: "rec/korean_PP-
|
|
320
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
317
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
318
|
+
rec: "rec/korean_PP-OCRv4_rec_mobile.onnx",
|
|
319
|
+
dict: "rec/korean_PP-OCRv4_rec_mobile/korean_dict.txt",
|
|
320
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
321
321
|
},
|
|
322
322
|
{
|
|
323
323
|
label: "拉丁文",
|
|
324
324
|
key: "latin_mobile",
|
|
325
|
-
det: "det/
|
|
326
|
-
rec: "rec/latin_PP-
|
|
327
|
-
dict: "rec/latin_PP-
|
|
328
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
325
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
326
|
+
rec: "rec/latin_PP-OCRv3_rec_mobile.onnx",
|
|
327
|
+
dict: "rec/latin_PP-OCRv3_rec_mobile/latin_dict.txt",
|
|
328
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
329
329
|
},
|
|
330
330
|
{
|
|
331
331
|
label: "泰米尔文",
|
|
332
332
|
key: "ta_mobile",
|
|
333
|
-
det: "det/
|
|
334
|
-
rec: "rec/ta_PP-
|
|
335
|
-
dict: "rec/ta_PP-
|
|
336
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
333
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
334
|
+
rec: "rec/ta_PP-OCRv4_rec_mobile.onnx",
|
|
335
|
+
dict: "rec/ta_PP-OCRv4_rec_mobile/ta_dict.txt",
|
|
336
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
337
337
|
},
|
|
338
338
|
{
|
|
339
339
|
label: "泰卢固文",
|
|
340
340
|
key: "te_mobile",
|
|
341
|
-
det: "det/
|
|
342
|
-
rec: "rec/te_PP-
|
|
343
|
-
dict: "rec/te_PP-
|
|
344
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
341
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
342
|
+
rec: "rec/te_PP-OCRv4_rec_mobile.onnx",
|
|
343
|
+
dict: "rec/te_PP-OCRv4_rec_mobile/te_dict.txt",
|
|
344
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
345
345
|
}
|
|
346
346
|
];
|
|
347
347
|
export {
|
package/ocr.mjs.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../packages/ocr/ocr.ts", "../packages/ocr/ImageRaw.ts", "../packages/ocr/FileUtils.ts", "../packages/image/convert.ts", "../packages/image/extract.ts", "../packages/image/image-metadata.ts", "../packages/ocr/model-config.ts"],
|
|
4
|
-
"sourcesContent": ["import BaseOcr, {\n ModelCreateOptions,\n registerBackend,\n} from '@gutenye/ocr-common';\nimport { splitIntoLineImages } from '@gutenye/ocr-common/splitIntoLineImages';\nimport { ImageRaw } from './ImageRaw';\nimport { FileUtils } from './FileUtils';\nimport { InferenceSession } from 'onnxruntime-node';\nimport fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { convertToRaw } from '../image';\nimport * as v from 'valibot';\nconst ImageAdjustDefine = v.object({\n padding: v.pipe(\n v.optional(\n v.union([\n v.pipe(\n v.number(),\n v.transform((item) => ({\n top: item,\n left: item,\n right: item,\n bottom: item,\n })),\n ),\n v.object({\n left: v.number(),\n right: v.number(),\n top: v.number(),\n bottom: v.number(),\n }),\n ]),\n { top: 50, right: 50, left: 50, bottom: 50 },\n ),\n ),\n maxSideLen: v.optional(v.union([v.pipe(v.number())]), 1920),\n // threshold: v.optional(v.union([v.pipe(v.number())]), 0.3),\n});\nexport type ImageAdjustType = v.InferInput<typeof ImageAdjustDefine>;\nregisterBackend({\n FileUtils,\n ImageRaw,\n InferenceSession,\n splitIntoLineImages,\n defaultModels: undefined,\n});\n\nasync function convert(\n this: BaseOcr,\n input: string | Uint8Array,\n options: ImageAdjustType = {},\n) {\n const resolveOptions = v.parse(ImageAdjustDefine, options);\n //100 80 0.8\n // 50 40\n let { raw } = await convertToRaw(input);\n const metadata = await raw.metadata();\n const maxSize = Math.max(metadata.width!, metadata.height!);\n if (maxSize > resolveOptions.maxSideLen) {\n let ratio = metadata.width! / metadata.height!;\n ratio = ratio > 1 ? 1 / ratio : ratio;\n raw = raw.resize({\n width: Math.round(\n maxSize === metadata.width!\n ? resolveOptions.maxSideLen\n : ratio * resolveOptions.maxSideLen,\n ),\n height: Math.round(\n maxSize === metadata.height!\n ? resolveOptions.maxSideLen\n : ratio * resolveOptions.maxSideLen,\n ),\n });\n }\n raw = raw.extend({ ...resolveOptions.padding, background: '#fff' });\n raw = raw.ensureAlpha(1);\n return this.detect(raw as any);\n}\nexport class Ocr extends BaseOcr {\n static override async create(options: ModelCreateOptions = {}) {\n const ocr = await BaseOcr.create(options);\n if (options.debugOutputDir) {\n await fs.mkdir(path.normalize(options.debugOutputDir), {\n recursive: true,\n });\n }\n (ocr as any).convert = convert.bind(ocr);\n return ocr as BaseOcr & { convert: typeof convert };\n }\n}\n", "import filePath from 'node:path';\nimport { ImageRawBase } from '@gutenye/ocr-common';\nimport type { ImageRawData, LineImage, SizeOption } from '@gutenye/ocr-common';\nimport sharp from 'sharp';\nexport class ImageRaw extends ImageRawBase {\n #sharp!: sharp.Sharp;\n\n static async open(path: string): Promise<ImageRaw> {\n // let { raw } = await convertToRaw(path);\n return new ImageRaw(await toImageRaw(path as any));\n }\n\n constructor(imageRawData: ImageRawData) {\n super(imageRawData);\n this.#sharp = toSharp(imageRawData);\n }\n\n async write(path: string) {\n const ext = filePath.extname(path).slice(1);\n return this.#sharp.toFormat(ext as keyof sharp.FormatEnum).toFile(path);\n }\n\n async resize(size: SizeOption) {\n return this.#apply(\n this.#sharp.resize({\n width: size.width,\n height: size.height,\n fit: 'contain',\n }),\n );\n }\n\n async drawBox(lineImages: LineImage[]) {\n const svg = `\n <svg width=\"${this.width}\" height=\"${this.height}\">\n ${lineImages\n .map((lineImage) => {\n const [p1, p2, p3, p4] = lineImage.box;\n return `<polygon points=\"${p1[0]},${p1[1]} ${p2[0]},${p2[1]} ${p3[0]},${p3[1]} ${p4[0]},${p4[1]}\" fill=\"none\" stroke=\"red\" />`;\n })\n .join('\\n')}\n </svg>\n `;\n return this.#apply(\n this.#sharp.composite([{ input: Buffer.from(svg), left: 0, top: 0 }]),\n );\n }\n\n async #apply(sharp: sharp.Sharp) {\n this.#sharp = sharp;\n const result = await toImageRaw(sharp);\n this.data = result.data;\n this.width = result.width;\n this.height = result.height;\n return this;\n }\n}\n\nasync function toImageRaw(sharp: sharp.Sharp) {\n const result = await sharp.raw().toBuffer({ resolveWithObject: true });\n return {\n data: result.data,\n width: result.info.width,\n height: result.info.height,\n };\n}\nfunction toSharp(imageRawData: ImageRawData) {\n return sharp(imageRawData.data, {\n raw: {\n width: imageRawData.width,\n height: imageRawData.height,\n channels: 4,\n },\n });\n}\n", "import fs from 'node:fs/promises';\nimport { FileUtilsBase } from '@gutenye/ocr-common';\nimport { path } from '@cyia/vfs2';\nexport class FileUtils extends FileUtilsBase {\n static override async read(filePath: string) {\n return await fs.readFile(path.normalize(filePath), 'utf8');\n }\n}\n", "import * as v from 'valibot';\nimport * as fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { fileTypeFromBuffer } from 'file-type';\nimport sharp from 'sharp';\nimport heicdecode from 'heic-decode';\nimport { decode } from 'bmp-js';\n\nconst BASE64_HEAD_REPLACE_REG = /^data:image\\/[\\w]+;base64,/;\n\nconst InputDefine = v.union([\n v.pipe(\n v.string(),\n v.check((input) => BASE64_HEAD_REPLACE_REG.test(input)),\n v.transform((base64) => {\n const result = base64.match(BASE64_HEAD_REPLACE_REG)!;\n return new Uint8Array(\n Buffer.from(base64.slice(result[0].length), 'base64'),\n );\n }),\n ),\n v.pipe(\n v.string(),\n v.transform((filePath) =>\n fs\n .readFile(path.normalize(filePath))\n .then((buffer) => new Uint8Array(buffer)),\n ),\n ),\n v.pipe(v.custom<Uint8Array>((input) => input instanceof Uint8Array)),\n]);\nexport async function decodeToBuffer(input: string | Uint8Array) {\n const buffer = await v.parse(InputDefine, input);\n return buffer;\n}\n/**\n * ocr处理用\n * 支持路径,base64,uint8array\n */\nexport async function convertToRaw(input: string | Uint8Array) {\n const buffer = await decodeToBuffer(input);\n const type = await fileTypeFromBuffer(buffer);\n if (!type) {\n throw new Error(`不支持的图片类型`);\n }\n if (type.mime === 'image/bmp') {\n const data = decode(Buffer.from(buffer));\n const resolvedBuffer = data.data;\n //ABGR =>RGBA\n for (let i = 0; i < resolvedBuffer.length; i += 4) {\n const alpha = resolvedBuffer[i];\n const blue = resolvedBuffer[i + 1];\n const green = resolvedBuffer[i + 2];\n const red = resolvedBuffer[i + 3];\n resolvedBuffer[i] = red;\n resolvedBuffer[i + 1] = green;\n resolvedBuffer[i + 2] = blue;\n resolvedBuffer[i + 3] = (data as any).is_with_alpha ? alpha : 0xff;\n }\n const result = sharp(resolvedBuffer, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n }).ensureAlpha(1);\n return { type: 'image/png', raw: result };\n } else if (type?.mime === 'image/heic' || type?.mime === 'image/heif') {\n const data = await heicdecode({\n buffer: buffer as any,\n });\n const result = sharp(data.data, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n });\n return { type: 'image/png', raw: result };\n } else {\n const result = sharp(buffer);\n return { type: type.mime, raw: result };\n }\n}\n// todo 未来其实应该直接是Buffer转通道颜色\n/**\n * 转换为兼容的图片格式\n */\nexport async function convertToCompatibleBuffer(input: string | Uint8Array) {\n const result2 = await convertToRaw(input);\n\n return {\n type: result2.type,\n buffer: new Uint8Array(await result2.raw.png().toBuffer()),\n };\n}\n\nexport function bufferToImageBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return `data:${input.type};base64,${Buffer.from(input.buffer).toString('base64')}`;\n}\nexport function bufferToFileBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return Buffer.from(input.buffer).toString('base64');\n}\n", "import sharp from 'sharp';\nimport { getImageMetadata } from './image-metadata';\nexport async function imageExtract(\n buffer: Buffer,\n position: sharp.Region,\n padding: number = 0,\n) {\n let metadata =await getImageMetadata(buffer);\n let left = Math.min(\n Math.max(Math.round(position.left - padding), 0),\n metadata.width,\n );\n let top = Math.min(\n Math.max(Math.round(position.top - padding), 0),\n metadata.height,\n );\n return sharp(buffer)\n .extract({\n left,\n top,\n width: Math.min(\n Math.max(Math.round(position.width + padding * 2), 0),\n metadata.width - left,\n ),\n height: Math.min(\n Math.max(Math.round(position.height + padding * 2), 0),\n metadata.height - top,\n ),\n })\n .toBuffer();\n}\n", "import sharp from 'sharp';\n\nexport function getImageMetadata(buffer: Buffer) {\n let instance = sharp(buffer);\n return instance.metadata();\n}\n", "export const ModelConfig = [\n {\n label: '简体中文',\n key: 'ch_mobile',\n det: 'det/ch_PP-OCRv4_det_infer.onnx',\n rec: 'rec/ch_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/ch_PP-OCRv4_rec_infer/ppocr_keys_v1.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '简体中文(服务器)',\n key: 'ch_server',\n det: 'det/ch_PP-OCRv4_det_server_infer.onnx',\n rec: 'rec/ch_PP-OCRv4_rec_server_infer.onnx',\n dict: 'rec/ch_PP-OCRv4_rec_server_infer/ppocr_keys_v1.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '繁體中文',\n key: 'chinese_cht',\n det: 'det/ch_PP-OCRv4_det_infer.onnx',\n rec: 'rec/chinese_cht_PP-OCRv3_rec_infer.onnx',\n dict: 'rec/chinese_cht_PP-OCRv3_rec_infer/chinese_cht_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '英文',\n key: 'en_mobile',\n det: 'det/en_PP-OCRv3_det_infer.onnx',\n rec: 'rec/en_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/en_PP-OCRv4_rec_infer/en_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '阿拉伯文',\n key: 'ar_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/arabic_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/arabic_PP-OCRv4_rec_infer/arabic_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '塞尔维亚文',\n key: 'cyrillic_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/cyrillic_PP-OCRv3_rec_infer.onnx',\n dict: 'rec/cyrillic_PP-OCRv3_rec_infer/cyrillic_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '梵文',\n key: 'devanagari_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/devanagari_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/devanagari_PP-OCRv4_rec_infer/devanagari_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '日文',\n key: 'japan_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/japan_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/japan_PP-OCRv4_rec_infer/japan_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '卡纳达语',\n key: 'ka_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/ka_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/ka_PP-OCRv4_rec_infer/ka_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '韩文',\n key: 'korean_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/korean_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/korean_PP-OCRv4_rec_infer/korean_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '拉丁文',\n key: 'latin_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/latin_PP-OCRv3_rec_infer.onnx',\n dict: 'rec/latin_PP-OCRv3_rec_infer/latin_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '泰米尔文',\n key: 'ta_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/ta_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/ta_PP-OCRv4_rec_infer/ta_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '泰卢固文',\n key: 'te_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/te_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/te_PP-OCRv4_rec_infer/te_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n];\n"],
|
|
4
|
+
"sourcesContent": ["import BaseOcr, {\n ModelCreateOptions,\n registerBackend,\n} from '@gutenye/ocr-common';\nimport { splitIntoLineImages } from '@gutenye/ocr-common/splitIntoLineImages';\nimport { ImageRaw } from './ImageRaw';\nimport { FileUtils } from './FileUtils';\nimport { InferenceSession } from 'onnxruntime-node';\nimport fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { convertToRaw } from '../image';\nimport * as v from 'valibot';\nconst ImageAdjustDefine = v.object({\n padding: v.pipe(\n v.optional(\n v.union([\n v.pipe(\n v.number(),\n v.transform((item) => ({\n top: item,\n left: item,\n right: item,\n bottom: item,\n })),\n ),\n v.object({\n left: v.number(),\n right: v.number(),\n top: v.number(),\n bottom: v.number(),\n }),\n ]),\n { top: 50, right: 50, left: 50, bottom: 50 },\n ),\n ),\n maxSideLen: v.optional(v.union([v.pipe(v.number())]), 1920),\n // threshold: v.optional(v.union([v.pipe(v.number())]), 0.3),\n});\nexport type ImageAdjustType = v.InferInput<typeof ImageAdjustDefine>;\nregisterBackend({\n FileUtils,\n ImageRaw,\n InferenceSession,\n splitIntoLineImages,\n defaultModels: undefined,\n});\n\nasync function convert(\n this: BaseOcr,\n input: string | Uint8Array,\n options: ImageAdjustType = {},\n) {\n const resolveOptions = v.parse(ImageAdjustDefine, options);\n //100 80 0.8\n // 50 40\n let { raw } = await convertToRaw(input);\n const metadata = await raw.metadata();\n const maxSize = Math.max(metadata.width!, metadata.height!);\n if (maxSize > resolveOptions.maxSideLen) {\n let ratio = metadata.width! / metadata.height!;\n ratio = ratio > 1 ? 1 / ratio : ratio;\n raw = raw.resize({\n width: Math.round(\n maxSize === metadata.width!\n ? resolveOptions.maxSideLen\n : ratio * resolveOptions.maxSideLen,\n ),\n height: Math.round(\n maxSize === metadata.height!\n ? resolveOptions.maxSideLen\n : ratio * resolveOptions.maxSideLen,\n ),\n });\n }\n raw = raw.extend({ ...resolveOptions.padding, background: '#fff' });\n raw = raw.ensureAlpha(1);\n return this.detect(raw as any);\n}\nexport class Ocr extends BaseOcr {\n static override async create(options: ModelCreateOptions = {}) {\n const ocr = await BaseOcr.create(options);\n if (options.debugOutputDir) {\n await fs.mkdir(path.normalize(options.debugOutputDir), {\n recursive: true,\n });\n }\n (ocr as any).convert = convert.bind(ocr);\n return ocr as BaseOcr & { convert: typeof convert };\n }\n}\n", "import filePath from 'node:path';\nimport { ImageRawBase } from '@gutenye/ocr-common';\nimport type { ImageRawData, LineImage, SizeOption } from '@gutenye/ocr-common';\nimport sharp from 'sharp';\nexport class ImageRaw extends ImageRawBase {\n #sharp!: sharp.Sharp;\n\n static async open(path: string): Promise<ImageRaw> {\n // let { raw } = await convertToRaw(path);\n return new ImageRaw(await toImageRaw(path as any));\n }\n\n constructor(imageRawData: ImageRawData) {\n super(imageRawData);\n this.#sharp = toSharp(imageRawData);\n }\n\n async write(path: string) {\n const ext = filePath.extname(path).slice(1);\n return this.#sharp.toFormat(ext as keyof sharp.FormatEnum).toFile(path);\n }\n\n async resize(size: SizeOption) {\n return this.#apply(\n this.#sharp.resize({\n width: size.width,\n height: size.height,\n fit: 'contain',\n }),\n );\n }\n\n async drawBox(lineImages: LineImage[]) {\n const svg = `\n <svg width=\"${this.width}\" height=\"${this.height}\">\n ${lineImages\n .map((lineImage) => {\n const [p1, p2, p3, p4] = lineImage.box;\n return `<polygon points=\"${p1[0]},${p1[1]} ${p2[0]},${p2[1]} ${p3[0]},${p3[1]} ${p4[0]},${p4[1]}\" fill=\"none\" stroke=\"red\" />`;\n })\n .join('\\n')}\n </svg>\n `;\n return this.#apply(\n this.#sharp.composite([{ input: Buffer.from(svg), left: 0, top: 0 }]),\n );\n }\n\n async #apply(sharp: sharp.Sharp) {\n this.#sharp = sharp;\n const result = await toImageRaw(sharp);\n this.data = result.data;\n this.width = result.width;\n this.height = result.height;\n return this;\n }\n}\n\nasync function toImageRaw(sharp: sharp.Sharp) {\n const result = await sharp.raw().toBuffer({ resolveWithObject: true });\n return {\n data: result.data,\n width: result.info.width,\n height: result.info.height,\n };\n}\nfunction toSharp(imageRawData: ImageRawData) {\n return sharp(imageRawData.data, {\n raw: {\n width: imageRawData.width,\n height: imageRawData.height,\n channels: 4,\n },\n });\n}\n", "import fs from 'node:fs/promises';\nimport { FileUtilsBase } from '@gutenye/ocr-common';\nimport { path } from '@cyia/vfs2';\nexport class FileUtils extends FileUtilsBase {\n static override async read(filePath: string) {\n return await fs.readFile(path.normalize(filePath), 'utf8');\n }\n}\n", "import * as v from 'valibot';\nimport * as fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { fileTypeFromBuffer } from 'file-type';\nimport sharp from 'sharp';\nimport heicdecode from 'heic-decode';\nimport { decode } from 'bmp-js';\n\nconst BASE64_HEAD_REPLACE_REG = /^data:image\\/[\\w]+;base64,/;\n\nconst InputDefine = v.union([\n v.pipe(\n v.string(),\n v.check((input) => BASE64_HEAD_REPLACE_REG.test(input)),\n v.transform((base64) => {\n const result = base64.match(BASE64_HEAD_REPLACE_REG)!;\n return new Uint8Array(\n Buffer.from(base64.slice(result[0].length), 'base64'),\n );\n }),\n ),\n v.pipe(\n v.string(),\n v.transform((filePath) =>\n fs\n .readFile(path.normalize(filePath))\n .then((buffer) => new Uint8Array(buffer)),\n ),\n ),\n v.pipe(v.custom<Uint8Array>((input) => input instanceof Uint8Array)),\n]);\nexport async function decodeToBuffer(input: string | Uint8Array) {\n const buffer = await v.parse(InputDefine, input);\n return buffer;\n}\n/**\n * ocr处理用\n * 支持路径,base64,uint8array\n */\nexport async function convertToRaw(input: string | Uint8Array) {\n const buffer = await decodeToBuffer(input);\n const type = await fileTypeFromBuffer(buffer);\n if (!type) {\n throw new Error(`不支持的图片类型`);\n }\n if (type.mime === 'image/bmp') {\n const data = decode(Buffer.from(buffer));\n const resolvedBuffer = data.data;\n //ABGR =>RGBA\n for (let i = 0; i < resolvedBuffer.length; i += 4) {\n const alpha = resolvedBuffer[i];\n const blue = resolvedBuffer[i + 1];\n const green = resolvedBuffer[i + 2];\n const red = resolvedBuffer[i + 3];\n resolvedBuffer[i] = red;\n resolvedBuffer[i + 1] = green;\n resolvedBuffer[i + 2] = blue;\n resolvedBuffer[i + 3] = (data as any).is_with_alpha ? alpha : 0xff;\n }\n const result = sharp(resolvedBuffer, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n }).ensureAlpha(1);\n return { type: 'image/png', raw: result };\n } else if (type?.mime === 'image/heic' || type?.mime === 'image/heif') {\n const data = await heicdecode({\n buffer: buffer as any,\n });\n const result = sharp(data.data, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n });\n return { type: 'image/png', raw: result };\n } else {\n const result = sharp(buffer);\n return { type: type.mime, raw: result };\n }\n}\n// todo 未来其实应该直接是Buffer转通道颜色\n/**\n * 转换为兼容的图片格式\n */\nexport async function convertToCompatibleBuffer(input: string | Uint8Array) {\n const result2 = await convertToRaw(input);\n\n return {\n type: result2.type,\n buffer: new Uint8Array(await result2.raw.png().toBuffer()),\n };\n}\n\nexport function bufferToImageBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return `data:${input.type};base64,${Buffer.from(input.buffer).toString('base64')}`;\n}\nexport function bufferToFileBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return Buffer.from(input.buffer).toString('base64');\n}\n", "import sharp from 'sharp';\nimport { getImageMetadata } from './image-metadata';\nexport async function imageExtract(\n buffer: Buffer,\n position: sharp.Region,\n padding: number = 0,\n) {\n let metadata =await getImageMetadata(buffer);\n let left = Math.min(\n Math.max(Math.round(position.left - padding), 0),\n metadata.width,\n );\n let top = Math.min(\n Math.max(Math.round(position.top - padding), 0),\n metadata.height,\n );\n return sharp(buffer)\n .extract({\n left,\n top,\n width: Math.min(\n Math.max(Math.round(position.width + padding * 2), 0),\n metadata.width - left,\n ),\n height: Math.min(\n Math.max(Math.round(position.height + padding * 2), 0),\n metadata.height - top,\n ),\n })\n .toBuffer();\n}\n", "import sharp from 'sharp';\n\nexport function getImageMetadata(buffer: Buffer) {\n let instance = sharp(buffer);\n return instance.metadata();\n}\n", "export const ModelConfig = [\n {\n label: '简体中文',\n key: 'ch_mobile',\n det: 'det/ch_PP-OCRv4_det_mobile.onnx',\n rec: 'rec/ch_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/ch_PP-OCRv4_rec_mobile/ppocr_keys_v1.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '简体中文(服务器)',\n key: 'ch_server',\n det: 'det/ch_PP-OCRv4_det_server.onnx',\n rec: 'rec/ch_PP-OCRv4_rec_server.onnx',\n dict: 'rec/ch_PP-OCRv4_rec_server/ppocr_keys_v1.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '繁體中文',\n key: 'chinese_cht',\n det: 'det/ch_PP-OCRv4_det_server.onnx',\n rec: 'rec/chinese_cht_PP-OCRv3_rec_mobile.onnx',\n dict: 'rec/chinese_cht_PP-OCRv3_rec_mobile/chinese_cht_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '英文',\n key: 'en_mobile',\n det: 'det/en_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/en_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/en_PP-OCRv4_rec_mobile/en_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '阿拉伯文',\n key: 'ar_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/arabic_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/arabic_PP-OCRv4_rec_mobile/arabic_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '塞尔维亚文',\n key: 'cyrillic_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/cyrillic_PP-OCRv3_rec_mobile.onnx',\n dict: 'rec/cyrillic_PP-OCRv3_rec_mobile/cyrillic_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '梵文',\n key: 'devanagari_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/devanagari_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/devanagari_PP-OCRv4_rec_mobile/devanagari_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '日文',\n key: 'japan_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/japan_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/japan_PP-OCRv4_rec_mobile/japan_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '卡纳达语',\n key: 'ka_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/ka_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/ka_PP-OCRv4_rec_mobile/ka_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '韩文',\n key: 'korean_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/korean_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/korean_PP-OCRv4_rec_mobile/korean_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '拉丁文',\n key: 'latin_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/latin_PP-OCRv3_rec_mobile.onnx',\n dict: 'rec/latin_PP-OCRv3_rec_mobile/latin_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '泰米尔文',\n key: 'ta_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/ta_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/ta_PP-OCRv4_rec_mobile/ta_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '泰卢固文',\n key: 'te_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/te_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/te_PP-OCRv4_rec_mobile/te_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n];\n"],
|
|
5
5
|
"mappings": ";AAAA,OAAO;AAAA,EAEL;AAAA,OACK;AACP,SAAS,2BAA2B;;;ACJpC,OAAO,cAAc;AACrB,SAAS,oBAAoB;AAE7B,OAAO,WAAW;AACX,IAAM,WAAN,MAAM,kBAAiB,aAAa;AAAA,EACzC;AAAA,EAEA,aAAa,KAAKA,OAAiC;AAEjD,WAAO,IAAI,UAAS,MAAM,WAAWA,KAAW,CAAC;AAAA,EACnD;AAAA,EAEA,YAAY,cAA4B;AACtC,UAAM,YAAY;AAClB,SAAK,SAAS,QAAQ,YAAY;AAAA,EACpC;AAAA,EAEA,MAAM,MAAMA,OAAc;AACxB,UAAM,MAAM,SAAS,QAAQA,KAAI,EAAE,MAAM,CAAC;AAC1C,WAAO,KAAK,OAAO,SAAS,GAA6B,EAAE,OAAOA,KAAI;AAAA,EACxE;AAAA,EAEA,MAAM,OAAO,MAAkB;AAC7B,WAAO,KAAK;AAAA,MACV,KAAK,OAAO,OAAO;AAAA,QACjB,OAAO,KAAK;AAAA,QACZ,QAAQ,KAAK;AAAA,QACb,KAAK;AAAA,MACP,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,MAAM,QAAQ,YAAyB;AACrC,UAAM,MAAM;AAAA,oBACI,KAAK,KAAK,aAAa,KAAK,MAAM;AAAA,UAC5C,WACC,IAAI,CAAC,cAAc;AAClB,YAAM,CAAC,IAAI,IAAI,IAAI,EAAE,IAAI,UAAU;AACnC,aAAO,oBAAoB,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC;AAAA,IACjG,CAAC,EACA,KAAK,IAAI,CAAC;AAAA;AAAA;AAGjB,WAAO,KAAK;AAAA,MACV,KAAK,OAAO,UAAU,CAAC,EAAE,OAAO,OAAO,KAAK,GAAG,GAAG,MAAM,GAAG,KAAK,EAAE,CAAC,CAAC;AAAA,IACtE;AAAA,EACF;AAAA,EAEA,MAAM,OAAOC,QAAoB;AAC/B,SAAK,SAASA;AACd,UAAM,SAAS,MAAM,WAAWA,MAAK;AACrC,SAAK,OAAO,OAAO;AACnB,SAAK,QAAQ,OAAO;AACpB,SAAK,SAAS,OAAO;AACrB,WAAO;AAAA,EACT;AACF;AAEA,eAAe,WAAWA,QAAoB;AAC5C,QAAM,SAAS,MAAMA,OAAM,IAAI,EAAE,SAAS,EAAE,mBAAmB,KAAK,CAAC;AACrE,SAAO;AAAA,IACL,MAAM,OAAO;AAAA,IACb,OAAO,OAAO,KAAK;AAAA,IACnB,QAAQ,OAAO,KAAK;AAAA,EACtB;AACF;AACA,SAAS,QAAQ,cAA4B;AAC3C,SAAO,MAAM,aAAa,MAAM;AAAA,IAC9B,KAAK;AAAA,MACH,OAAO,aAAa;AAAA,MACpB,QAAQ,aAAa;AAAA,MACrB,UAAU;AAAA,IACZ;AAAA,EACF,CAAC;AACH;;;AC1EA,OAAO,QAAQ;AACf,SAAS,qBAAqB;AAC9B,SAAS,YAAY;AACd,IAAM,YAAN,cAAwB,cAAc;AAAA,EAC3C,aAAsB,KAAKC,WAAkB;AAC3C,WAAO,MAAM,GAAG,SAAS,KAAK,UAAUA,SAAQ,GAAG,MAAM;AAAA,EAC3D;AACF;;;AFAA,SAAS,wBAAwB;AACjC,OAAOC,SAAQ;AACf,SAAS,QAAAC,aAAY;;;AGTrB,YAAY,OAAO;AACnB,YAAYC,SAAQ;AACpB,SAAS,QAAAC,aAAY;AACrB,SAAS,0BAA0B;AACnC,OAAOC,YAAW;AAClB,OAAO,gBAAgB;AACvB,SAAS,cAAc;AAEvB,IAAM,0BAA0B;AAEhC,IAAM,cAAgB,QAAM;AAAA,EACxB;AAAA,IACE,SAAO;AAAA,IACP,QAAM,CAAC,UAAU,wBAAwB,KAAK,KAAK,CAAC;AAAA,IACpD,YAAU,CAAC,WAAW;AACtB,YAAM,SAAS,OAAO,MAAM,uBAAuB;AACnD,aAAO,IAAI;AAAA,QACT,OAAO,KAAK,OAAO,MAAM,OAAO,CAAC,EAAE,MAAM,GAAG,QAAQ;AAAA,MACtD;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EACE;AAAA,IACE,SAAO;AAAA,IACP;AAAA,MAAU,CAACC,cAER,aAASF,MAAK,UAAUE,SAAQ,CAAC,EACjC,KAAK,CAAC,WAAW,IAAI,WAAW,MAAM,CAAC;AAAA,IAC5C;AAAA,EACF;AAAA,EACE,OAAO,SAAmB,CAAC,UAAU,iBAAiB,UAAU,CAAC;AACrE,CAAC;AACD,eAAsB,eAAe,OAA4B;AAC/D,QAAM,SAAS,MAAQ,QAAM,aAAa,KAAK;AAC/C,SAAO;AACT;AAKA,eAAsB,aAAa,OAA4B;AAC7D,QAAM,SAAS,MAAM,eAAe,KAAK;AACzC,QAAM,OAAO,MAAM,mBAAmB,MAAM;AAC5C,MAAI,CAAC,MAAM;AACT,UAAM,IAAI,MAAM,UAAU;AAAA,EAC5B;AACA,MAAI,KAAK,SAAS,aAAa;AAC7B,UAAM,OAAO,OAAO,OAAO,KAAK,MAAM,CAAC;AACvC,UAAM,iBAAiB,KAAK;AAE5B,aAAS,IAAI,GAAG,IAAI,eAAe,QAAQ,KAAK,GAAG;AACjD,YAAM,QAAQ,eAAe,CAAC;AAC9B,YAAM,OAAO,eAAe,IAAI,CAAC;AACjC,YAAM,QAAQ,eAAe,IAAI,CAAC;AAClC,YAAM,MAAM,eAAe,IAAI,CAAC;AAChC,qBAAe,CAAC,IAAI;AACpB,qBAAe,IAAI,CAAC,IAAI;AACxB,qBAAe,IAAI,CAAC,IAAI;AACxB,qBAAe,IAAI,CAAC,IAAK,KAAa,gBAAgB,QAAQ;AAAA,IAChE;AACA,UAAM,SAASD,OAAM,gBAAgB;AAAA,MACnC,KAAK;AAAA,QACH,OAAO,KAAK;AAAA,QACZ,QAAQ,KAAK;AAAA,QACb,UAAU;AAAA,MACZ;AAAA,IACF,CAAC,EAAE,YAAY,CAAC;AAChB,WAAO,EAAE,MAAM,aAAa,KAAK,OAAO;AAAA,EAC1C,WAAW,MAAM,SAAS,gBAAgB,MAAM,SAAS,cAAc;AACrE,UAAM,OAAO,MAAM,WAAW;AAAA,MAC5B;AAAA,IACF,CAAC;AACD,UAAM,SAASA,OAAM,KAAK,MAAM;AAAA,MAC9B,KAAK;AAAA,QACH,OAAO,KAAK;AAAA,QACZ,QAAQ,KAAK;AAAA,QACb,UAAU;AAAA,MACZ;AAAA,IACF,CAAC;AACD,WAAO,EAAE,MAAM,aAAa,KAAK,OAAO;AAAA,EAC1C,OAAO;AACL,UAAM,SAASA,OAAM,MAAM;AAC3B,WAAO,EAAE,MAAM,KAAK,MAAM,KAAK,OAAO;AAAA,EACxC;AACF;;;ACnFA,OAAOE,YAAW;;;ACAlB,OAAOC,YAAW;;;ALWlB,YAAYC,QAAO;AACnB,IAAM,oBAAsB,UAAO;AAAA,EACjC,SAAW;AAAA,IACP;AAAA,MACE,SAAM;AAAA,QACJ;AAAA,UACE,UAAO;AAAA,UACP,aAAU,CAAC,UAAU;AAAA,YACrB,KAAK;AAAA,YACL,MAAM;AAAA,YACN,OAAO;AAAA,YACP,QAAQ;AAAA,UACV,EAAE;AAAA,QACJ;AAAA,QACE,UAAO;AAAA,UACP,MAAQ,UAAO;AAAA,UACf,OAAS,UAAO;AAAA,UAChB,KAAO,UAAO;AAAA,UACd,QAAU,UAAO;AAAA,QACnB,CAAC;AAAA,MACH,CAAC;AAAA,MACD,EAAE,KAAK,IAAI,OAAO,IAAI,MAAM,IAAI,QAAQ,GAAG;AAAA,IAC7C;AAAA,EACF;AAAA,EACA,YAAc,YAAW,SAAM,CAAG,QAAO,UAAO,CAAC,CAAC,CAAC,GAAG,IAAI;AAAA;AAE5D,CAAC;AAED,gBAAgB;AAAA,EACd;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AACjB,CAAC;AAED,eAAe,QAEb,OACA,UAA2B,CAAC,GAC5B;AACA,QAAM,iBAAmB,SAAM,mBAAmB,OAAO;AAGzD,MAAI,EAAE,IAAI,IAAI,MAAM,aAAa,KAAK;AACtC,QAAM,WAAW,MAAM,IAAI,SAAS;AACpC,QAAM,UAAU,KAAK,IAAI,SAAS,OAAQ,SAAS,MAAO;AAC1D,MAAI,UAAU,eAAe,YAAY;AACvC,QAAI,QAAQ,SAAS,QAAS,SAAS;AACvC,YAAQ,QAAQ,IAAI,IAAI,QAAQ;AAChC,UAAM,IAAI,OAAO;AAAA,MACf,OAAO,KAAK;AAAA,QACV,YAAY,SAAS,QACjB,eAAe,aACf,QAAQ,eAAe;AAAA,MAC7B;AAAA,MACA,QAAQ,KAAK;AAAA,QACX,YAAY,SAAS,SACjB,eAAe,aACf,QAAQ,eAAe;AAAA,MAC7B;AAAA,IACF,CAAC;AAAA,EACH;AACA,QAAM,IAAI,OAAO,EAAE,GAAG,eAAe,SAAS,YAAY,OAAO,CAAC;AAClE,QAAM,IAAI,YAAY,CAAC;AACvB,SAAO,KAAK,OAAO,GAAU;AAC/B;AACO,IAAM,MAAN,cAAkB,QAAQ;AAAA,EAC/B,aAAsB,OAAO,UAA8B,CAAC,GAAG;AAC7D,UAAM,MAAM,MAAM,QAAQ,OAAO,OAAO;AACxC,QAAI,QAAQ,gBAAgB;AAC1B,YAAMC,IAAG,MAAMC,MAAK,UAAU,QAAQ,cAAc,GAAG;AAAA,QACrD,WAAW;AAAA,MACb,CAAC;AAAA,IACH;AACA,IAAC,IAAY,UAAU,QAAQ,KAAK,GAAG;AACvC,WAAO;AAAA,EACT;AACF;;;AMzFO,IAAM,cAAc;AAAA,EACzB;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AACF;",
|
|
6
6
|
"names": ["path", "sharp", "filePath", "fs", "path", "fs", "path", "sharp", "filePath", "sharp", "sharp", "v", "fs", "path"]
|
|
7
7
|
}
|
package/package.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@shenghuabi/knowledge",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.23",
|
|
4
4
|
"description": "知识库",
|
|
5
5
|
"author": "wszgrcy",
|
|
6
6
|
"sideEffects": false,
|
|
7
7
|
"peerDependencies": {
|
|
8
|
-
"@cyia/vfs2": "^1.
|
|
8
|
+
"@cyia/vfs2": "^1.5.2",
|
|
9
9
|
"handlebars": "^4.7.8",
|
|
10
10
|
"lru-cache": "^11.2.1",
|
|
11
11
|
"rfdc": "^1.4.1",
|
|
@@ -19,8 +19,8 @@
|
|
|
19
19
|
"html-to-text": "^9.0.5",
|
|
20
20
|
"fastq": "^1.19.1",
|
|
21
21
|
"sharp": "0.34.2",
|
|
22
|
-
"@cyia/dl": "^1.
|
|
23
|
-
"@cyia/external-call": "^1.
|
|
22
|
+
"@cyia/dl": "^1.5.2",
|
|
23
|
+
"@cyia/external-call": "^1.5.2"
|
|
24
24
|
},
|
|
25
25
|
"dependencies": {
|
|
26
26
|
"html-entities": "^2.6.0",
|
package/worker/ocr/index.d.ts
CHANGED
package/worker/ocr.mjs
CHANGED
|
@@ -9,7 +9,7 @@ var BaseUrl = "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/
|
|
|
9
9
|
var DictUrl = "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/paddle/PP-OCRv4";
|
|
10
10
|
async function init(ocrConfig) {
|
|
11
11
|
const messageCb = (message) => {
|
|
12
|
-
ocrConfig.port
|
|
12
|
+
ocrConfig.port?.postMessage({ type: "progress", message });
|
|
13
13
|
};
|
|
14
14
|
const modelConfig = ModelConfig.find((item) => item.key === ocrConfig.key);
|
|
15
15
|
const fs = createNormalizeVfs({ dir: ocrConfig.modelDir });
|
package/worker/ocr.mjs.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../packages/worker/ocr/index.ts"],
|
|
4
|
-
"sourcesContent": ["import { ImageAdjustType, Ocr, ModelConfig } from '@shenghuabi/knowledge/ocr';\nimport { createNormalizeVfs, path } from '@cyia/vfs2';\n// import * as ort from 'onnxruntime-node';\nimport { MessagePort } from 'worker_threads';\nimport { getUniqueObjectKey } from '@shenghuabi/knowledge/util';\nimport { downloadFile } from '@cyia/dl';\nlet key!: string;\nlet ocrInstance: ReturnType<(typeof Ocr)['create']>;\nconst BaseUrl =\n 'https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/onnx/PP-OCRv4';\n\nconst DictUrl =\n 'https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/paddle/PP-OCRv4';\nasync function init(ocrConfig: {\n key: string;\n modelDir: string;\n port
|
|
5
|
-
"mappings": ";AAAA,SAA0B,KAAK,mBAAmB;AAClD,SAAS,oBAAoB,YAAY;AAGzC,SAAS,0BAA0B;AACnC,SAAS,oBAAoB;AAC7B,IAAI;AACJ,IAAI;AACJ,IAAM,UACJ;AAEF,IAAM,UACJ;AACF,eAAe,KAAK,WAIjB;AACD,QAAM,YAAY,CAAC,YAAiB;AAClC,cAAU,
|
|
4
|
+
"sourcesContent": ["import { ImageAdjustType, Ocr, ModelConfig } from '@shenghuabi/knowledge/ocr';\nimport { createNormalizeVfs, path } from '@cyia/vfs2';\n// import * as ort from 'onnxruntime-node';\nimport { MessagePort } from 'worker_threads';\nimport { getUniqueObjectKey } from '@shenghuabi/knowledge/util';\nimport { downloadFile } from '@cyia/dl';\nlet key!: string;\nlet ocrInstance: ReturnType<(typeof Ocr)['create']>;\nconst BaseUrl =\n 'https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/onnx/PP-OCRv4';\n\nconst DictUrl =\n 'https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/paddle/PP-OCRv4';\nasync function init(ocrConfig: {\n key: string;\n modelDir: string;\n port?: MessagePort;\n}) {\n const messageCb = (message: any) => {\n ocrConfig.port?.postMessage({ type: 'progress', message });\n };\n const modelConfig = ModelConfig.find((item) => item.key === ocrConfig.key)!;\n const fs = createNormalizeVfs({ dir: ocrConfig.modelDir });\n // 自动下载模型\n const absDetectionPath = path.join(ocrConfig.modelDir, modelConfig.det);\n if (!(await fs.exists(modelConfig.det))) {\n await downloadFile(`${BaseUrl}/${modelConfig.det}`, {\n savePath: absDetectionPath,\n message: messageCb,\n });\n }\n const absRecognitionPath = path.join(ocrConfig.modelDir, modelConfig.rec);\n if (!(await fs.exists(modelConfig.rec))) {\n await downloadFile(`${BaseUrl}/${modelConfig.rec}`, {\n savePath: absRecognitionPath,\n message: messageCb,\n });\n }\n const absDictionaryPath = path.join(ocrConfig.modelDir, modelConfig.dict);\n if (!(await fs.exists(modelConfig.dict))) {\n await downloadFile(`${DictUrl}/${modelConfig.dict}`, {\n savePath: absDictionaryPath,\n message: messageCb,\n });\n }\n}\n// 改为init和convert\nasync function convert(input: {\n filePath: string | Uint8Array;\n ocrConfig: { key: string; modelDir: string; device?: 'dml' | 'cuda' | 'cpu' };\n options?: ImageAdjustType;\n}) {\n const inputKey = getUniqueObjectKey(input.ocrConfig);\n if (key !== inputKey) {\n const modelConfig = ModelConfig.find(\n (item) => item.key === input.ocrConfig.key,\n )!;\n // 自动下载模型\n const absDetectionPath = path.join(\n input.ocrConfig.modelDir,\n modelConfig.det,\n );\n const absRecognitionPath = path.join(\n input.ocrConfig.modelDir,\n modelConfig.rec,\n );\n const absDictionaryPath = path.join(\n input.ocrConfig.modelDir,\n modelConfig.dict,\n );\n ocrInstance = Ocr.create({\n onnxOptions: {\n executionProviders: input.ocrConfig.device\n ? [input.ocrConfig.device]\n : ['dml', 'cuda', 'cpu'],\n executionMode: 'parallel',\n },\n models: {\n detectionPath: absDetectionPath,\n recognitionPath: absRecognitionPath,\n dictionaryPath: absDictionaryPath,\n },\n });\n key = inputKey;\n }\n return (await ocrInstance).convert(input.filePath, input.options);\n}\n\nexport { init, convert };\n"],
|
|
5
|
+
"mappings": ";AAAA,SAA0B,KAAK,mBAAmB;AAClD,SAAS,oBAAoB,YAAY;AAGzC,SAAS,0BAA0B;AACnC,SAAS,oBAAoB;AAC7B,IAAI;AACJ,IAAI;AACJ,IAAM,UACJ;AAEF,IAAM,UACJ;AACF,eAAe,KAAK,WAIjB;AACD,QAAM,YAAY,CAAC,YAAiB;AAClC,cAAU,MAAM,YAAY,EAAE,MAAM,YAAY,QAAQ,CAAC;AAAA,EAC3D;AACA,QAAM,cAAc,YAAY,KAAK,CAAC,SAAS,KAAK,QAAQ,UAAU,GAAG;AACzE,QAAM,KAAK,mBAAmB,EAAE,KAAK,UAAU,SAAS,CAAC;AAEzD,QAAM,mBAAmB,KAAK,KAAK,UAAU,UAAU,YAAY,GAAG;AACtE,MAAI,CAAE,MAAM,GAAG,OAAO,YAAY,GAAG,GAAI;AACvC,UAAM,aAAa,GAAG,OAAO,IAAI,YAAY,GAAG,IAAI;AAAA,MAClD,UAAU;AAAA,MACV,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACA,QAAM,qBAAqB,KAAK,KAAK,UAAU,UAAU,YAAY,GAAG;AACxE,MAAI,CAAE,MAAM,GAAG,OAAO,YAAY,GAAG,GAAI;AACvC,UAAM,aAAa,GAAG,OAAO,IAAI,YAAY,GAAG,IAAI;AAAA,MAClD,UAAU;AAAA,MACV,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACA,QAAM,oBAAoB,KAAK,KAAK,UAAU,UAAU,YAAY,IAAI;AACxE,MAAI,CAAE,MAAM,GAAG,OAAO,YAAY,IAAI,GAAI;AACxC,UAAM,aAAa,GAAG,OAAO,IAAI,YAAY,IAAI,IAAI;AAAA,MACnD,UAAU;AAAA,MACV,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACF;AAEA,eAAe,QAAQ,OAIpB;AACD,QAAM,WAAW,mBAAmB,MAAM,SAAS;AACnD,MAAI,QAAQ,UAAU;AACpB,UAAM,cAAc,YAAY;AAAA,MAC9B,CAAC,SAAS,KAAK,QAAQ,MAAM,UAAU;AAAA,IACzC;AAEA,UAAM,mBAAmB,KAAK;AAAA,MAC5B,MAAM,UAAU;AAAA,MAChB,YAAY;AAAA,IACd;AACA,UAAM,qBAAqB,KAAK;AAAA,MAC9B,MAAM,UAAU;AAAA,MAChB,YAAY;AAAA,IACd;AACA,UAAM,oBAAoB,KAAK;AAAA,MAC7B,MAAM,UAAU;AAAA,MAChB,YAAY;AAAA,IACd;AACA,kBAAc,IAAI,OAAO;AAAA,MACvB,aAAa;AAAA,QACX,oBAAoB,MAAM,UAAU,SAChC,CAAC,MAAM,UAAU,MAAM,IACvB,CAAC,OAAO,QAAQ,KAAK;AAAA,QACzB,eAAe;AAAA,MACjB;AAAA,MACA,QAAQ;AAAA,QACN,eAAe;AAAA,QACf,iBAAiB;AAAA,QACjB,gBAAgB;AAAA,MAClB;AAAA,IACF,CAAC;AACD,UAAM;AAAA,EACR;AACA,UAAQ,MAAM,aAAa,QAAQ,MAAM,UAAU,MAAM,OAAO;AAClE;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|