@shenghuabi/knowledge 1.0.22 → 1.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ocr.mjs +52 -52
- package/ocr.mjs.map +1 -1
- package/package.json +6 -6
- package/worker/custom-cache.d.ts +2 -19
- package/worker/ocr/index.d.ts +1 -1
- package/worker/ocr.mjs +1 -1
- package/worker/ocr.mjs.map +2 -2
- package/worker/reranker.mjs +15 -47
- package/worker/reranker.mjs.map +2 -2
- package/worker/text2vec.mjs +15 -47
- package/worker/text2vec.mjs.map +2 -2
package/ocr.mjs
CHANGED
|
@@ -242,106 +242,106 @@ var ModelConfig = [
|
|
|
242
242
|
{
|
|
243
243
|
label: "简体中文",
|
|
244
244
|
key: "ch_mobile",
|
|
245
|
-
det: "det/ch_PP-
|
|
246
|
-
rec: "rec/ch_PP-
|
|
247
|
-
dict: "rec/ch_PP-
|
|
248
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
245
|
+
det: "det/ch_PP-OCRv4_det_mobile.onnx",
|
|
246
|
+
rec: "rec/ch_PP-OCRv4_rec_mobile.onnx",
|
|
247
|
+
dict: "rec/ch_PP-OCRv4_rec_mobile/ppocr_keys_v1.txt",
|
|
248
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
249
249
|
},
|
|
250
250
|
{
|
|
251
251
|
label: "简体中文(服务器)",
|
|
252
252
|
key: "ch_server",
|
|
253
|
-
det: "det/ch_PP-
|
|
254
|
-
rec: "rec/ch_PP-
|
|
255
|
-
dict: "rec/ch_PP-
|
|
256
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
253
|
+
det: "det/ch_PP-OCRv4_det_server.onnx",
|
|
254
|
+
rec: "rec/ch_PP-OCRv4_rec_server.onnx",
|
|
255
|
+
dict: "rec/ch_PP-OCRv4_rec_server/ppocr_keys_v1.txt",
|
|
256
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
257
257
|
},
|
|
258
258
|
{
|
|
259
259
|
label: "繁體中文",
|
|
260
260
|
key: "chinese_cht",
|
|
261
|
-
det: "det/ch_PP-
|
|
262
|
-
rec: "rec/chinese_cht_PP-
|
|
263
|
-
dict: "rec/chinese_cht_PP-
|
|
264
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
261
|
+
det: "det/ch_PP-OCRv4_det_server.onnx",
|
|
262
|
+
rec: "rec/chinese_cht_PP-OCRv3_rec_mobile.onnx",
|
|
263
|
+
dict: "rec/chinese_cht_PP-OCRv3_rec_mobile/chinese_cht_dict.txt",
|
|
264
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
265
265
|
},
|
|
266
266
|
{
|
|
267
267
|
label: "英文",
|
|
268
268
|
key: "en_mobile",
|
|
269
|
-
det: "det/en_PP-
|
|
270
|
-
rec: "rec/en_PP-
|
|
271
|
-
dict: "rec/en_PP-
|
|
272
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
269
|
+
det: "det/en_PP-OCRv3_det_mobile.onnx",
|
|
270
|
+
rec: "rec/en_PP-OCRv4_rec_mobile.onnx",
|
|
271
|
+
dict: "rec/en_PP-OCRv4_rec_mobile/en_dict.txt",
|
|
272
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
273
273
|
},
|
|
274
274
|
{
|
|
275
275
|
label: "阿拉伯文",
|
|
276
276
|
key: "ar_mobile",
|
|
277
|
-
det: "det/
|
|
278
|
-
rec: "rec/arabic_PP-
|
|
279
|
-
dict: "rec/arabic_PP-
|
|
280
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
277
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
278
|
+
rec: "rec/arabic_PP-OCRv4_rec_mobile.onnx",
|
|
279
|
+
dict: "rec/arabic_PP-OCRv4_rec_mobile/arabic_dict.txt",
|
|
280
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
281
281
|
},
|
|
282
282
|
{
|
|
283
283
|
label: "塞尔维亚文",
|
|
284
284
|
key: "cyrillic_mobile",
|
|
285
|
-
det: "det/
|
|
286
|
-
rec: "rec/cyrillic_PP-
|
|
287
|
-
dict: "rec/cyrillic_PP-
|
|
288
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
285
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
286
|
+
rec: "rec/cyrillic_PP-OCRv3_rec_mobile.onnx",
|
|
287
|
+
dict: "rec/cyrillic_PP-OCRv3_rec_mobile/cyrillic_dict.txt",
|
|
288
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
289
289
|
},
|
|
290
290
|
{
|
|
291
291
|
label: "梵文",
|
|
292
292
|
key: "devanagari_mobile",
|
|
293
|
-
det: "det/
|
|
294
|
-
rec: "rec/devanagari_PP-
|
|
295
|
-
dict: "rec/devanagari_PP-
|
|
296
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
293
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
294
|
+
rec: "rec/devanagari_PP-OCRv4_rec_mobile.onnx",
|
|
295
|
+
dict: "rec/devanagari_PP-OCRv4_rec_mobile/devanagari_dict.txt",
|
|
296
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
297
297
|
},
|
|
298
298
|
{
|
|
299
299
|
label: "日文",
|
|
300
300
|
key: "japan_mobile",
|
|
301
|
-
det: "det/
|
|
302
|
-
rec: "rec/japan_PP-
|
|
303
|
-
dict: "rec/japan_PP-
|
|
304
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
301
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
302
|
+
rec: "rec/japan_PP-OCRv4_rec_mobile.onnx",
|
|
303
|
+
dict: "rec/japan_PP-OCRv4_rec_mobile/japan_dict.txt",
|
|
304
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
305
305
|
},
|
|
306
306
|
{
|
|
307
307
|
label: "卡纳达语",
|
|
308
308
|
key: "ka_mobile",
|
|
309
|
-
det: "det/
|
|
310
|
-
rec: "rec/ka_PP-
|
|
311
|
-
dict: "rec/ka_PP-
|
|
312
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
309
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
310
|
+
rec: "rec/ka_PP-OCRv4_rec_mobile.onnx",
|
|
311
|
+
dict: "rec/ka_PP-OCRv4_rec_mobile/ka_dict.txt",
|
|
312
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
313
313
|
},
|
|
314
314
|
{
|
|
315
315
|
label: "韩文",
|
|
316
316
|
key: "korean_mobile",
|
|
317
|
-
det: "det/
|
|
318
|
-
rec: "rec/korean_PP-
|
|
319
|
-
dict: "rec/korean_PP-
|
|
320
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
317
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
318
|
+
rec: "rec/korean_PP-OCRv4_rec_mobile.onnx",
|
|
319
|
+
dict: "rec/korean_PP-OCRv4_rec_mobile/korean_dict.txt",
|
|
320
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
321
321
|
},
|
|
322
322
|
{
|
|
323
323
|
label: "拉丁文",
|
|
324
324
|
key: "latin_mobile",
|
|
325
|
-
det: "det/
|
|
326
|
-
rec: "rec/latin_PP-
|
|
327
|
-
dict: "rec/latin_PP-
|
|
328
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
325
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
326
|
+
rec: "rec/latin_PP-OCRv3_rec_mobile.onnx",
|
|
327
|
+
dict: "rec/latin_PP-OCRv3_rec_mobile/latin_dict.txt",
|
|
328
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
329
329
|
},
|
|
330
330
|
{
|
|
331
331
|
label: "泰米尔文",
|
|
332
332
|
key: "ta_mobile",
|
|
333
|
-
det: "det/
|
|
334
|
-
rec: "rec/ta_PP-
|
|
335
|
-
dict: "rec/ta_PP-
|
|
336
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
333
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
334
|
+
rec: "rec/ta_PP-OCRv4_rec_mobile.onnx",
|
|
335
|
+
dict: "rec/ta_PP-OCRv4_rec_mobile/ta_dict.txt",
|
|
336
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
337
337
|
},
|
|
338
338
|
{
|
|
339
339
|
label: "泰卢固文",
|
|
340
340
|
key: "te_mobile",
|
|
341
|
-
det: "det/
|
|
342
|
-
rec: "rec/te_PP-
|
|
343
|
-
dict: "rec/te_PP-
|
|
344
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
341
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
342
|
+
rec: "rec/te_PP-OCRv4_rec_mobile.onnx",
|
|
343
|
+
dict: "rec/te_PP-OCRv4_rec_mobile/te_dict.txt",
|
|
344
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
345
345
|
}
|
|
346
346
|
];
|
|
347
347
|
export {
|
package/ocr.mjs.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../packages/ocr/ocr.ts", "../packages/ocr/ImageRaw.ts", "../packages/ocr/FileUtils.ts", "../packages/image/convert.ts", "../packages/image/extract.ts", "../packages/image/image-metadata.ts", "../packages/ocr/model-config.ts"],
|
|
4
|
-
"sourcesContent": ["import BaseOcr, {\n ModelCreateOptions,\n registerBackend,\n} from '@gutenye/ocr-common';\nimport { splitIntoLineImages } from '@gutenye/ocr-common/splitIntoLineImages';\nimport { ImageRaw } from './ImageRaw';\nimport { FileUtils } from './FileUtils';\nimport { InferenceSession } from 'onnxruntime-node';\nimport fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { convertToRaw } from '../image';\nimport * as v from 'valibot';\nconst ImageAdjustDefine = v.object({\n padding: v.pipe(\n v.optional(\n v.union([\n v.pipe(\n v.number(),\n v.transform((item) => ({\n top: item,\n left: item,\n right: item,\n bottom: item,\n })),\n ),\n v.object({\n left: v.number(),\n right: v.number(),\n top: v.number(),\n bottom: v.number(),\n }),\n ]),\n { top: 50, right: 50, left: 50, bottom: 50 },\n ),\n ),\n maxSideLen: v.optional(v.union([v.pipe(v.number())]), 1920),\n // threshold: v.optional(v.union([v.pipe(v.number())]), 0.3),\n});\nexport type ImageAdjustType = v.InferInput<typeof ImageAdjustDefine>;\nregisterBackend({\n FileUtils,\n ImageRaw,\n InferenceSession,\n splitIntoLineImages,\n defaultModels: undefined,\n});\n\nasync function convert(\n this: BaseOcr,\n input: string | Uint8Array,\n options: ImageAdjustType = {},\n) {\n const resolveOptions = v.parse(ImageAdjustDefine, options);\n //100 80 0.8\n // 50 40\n let { raw } = await convertToRaw(input);\n const metadata = await raw.metadata();\n const maxSize = Math.max(metadata.width!, metadata.height!);\n if (maxSize > resolveOptions.maxSideLen) {\n let ratio = metadata.width! / metadata.height!;\n ratio = ratio > 1 ? 1 / ratio : ratio;\n raw = raw.resize({\n width: Math.round(\n maxSize === metadata.width!\n ? resolveOptions.maxSideLen\n : ratio * resolveOptions.maxSideLen,\n ),\n height: Math.round(\n maxSize === metadata.height!\n ? resolveOptions.maxSideLen\n : ratio * resolveOptions.maxSideLen,\n ),\n });\n }\n raw = raw.extend({ ...resolveOptions.padding, background: '#fff' });\n raw = raw.ensureAlpha(1);\n return this.detect(raw as any);\n}\nexport class Ocr extends BaseOcr {\n static override async create(options: ModelCreateOptions = {}) {\n const ocr = await BaseOcr.create(options);\n if (options.debugOutputDir) {\n await fs.mkdir(path.normalize(options.debugOutputDir), {\n recursive: true,\n });\n }\n (ocr as any).convert = convert.bind(ocr);\n return ocr as BaseOcr & { convert: typeof convert };\n }\n}\n", "import filePath from 'node:path';\nimport { ImageRawBase } from '@gutenye/ocr-common';\nimport type { ImageRawData, LineImage, SizeOption } from '@gutenye/ocr-common';\nimport sharp from 'sharp';\nexport class ImageRaw extends ImageRawBase {\n #sharp!: sharp.Sharp;\n\n static async open(path: string): Promise<ImageRaw> {\n // let { raw } = await convertToRaw(path);\n return new ImageRaw(await toImageRaw(path as any));\n }\n\n constructor(imageRawData: ImageRawData) {\n super(imageRawData);\n this.#sharp = toSharp(imageRawData);\n }\n\n async write(path: string) {\n const ext = filePath.extname(path).slice(1);\n return this.#sharp.toFormat(ext as keyof sharp.FormatEnum).toFile(path);\n }\n\n async resize(size: SizeOption) {\n return this.#apply(\n this.#sharp.resize({\n width: size.width,\n height: size.height,\n fit: 'contain',\n }),\n );\n }\n\n async drawBox(lineImages: LineImage[]) {\n const svg = `\n <svg width=\"${this.width}\" height=\"${this.height}\">\n ${lineImages\n .map((lineImage) => {\n const [p1, p2, p3, p4] = lineImage.box;\n return `<polygon points=\"${p1[0]},${p1[1]} ${p2[0]},${p2[1]} ${p3[0]},${p3[1]} ${p4[0]},${p4[1]}\" fill=\"none\" stroke=\"red\" />`;\n })\n .join('\\n')}\n </svg>\n `;\n return this.#apply(\n this.#sharp.composite([{ input: Buffer.from(svg), left: 0, top: 0 }]),\n );\n }\n\n async #apply(sharp: sharp.Sharp) {\n this.#sharp = sharp;\n const result = await toImageRaw(sharp);\n this.data = result.data;\n this.width = result.width;\n this.height = result.height;\n return this;\n }\n}\n\nasync function toImageRaw(sharp: sharp.Sharp) {\n const result = await sharp.raw().toBuffer({ resolveWithObject: true });\n return {\n data: result.data,\n width: result.info.width,\n height: result.info.height,\n };\n}\nfunction toSharp(imageRawData: ImageRawData) {\n return sharp(imageRawData.data, {\n raw: {\n width: imageRawData.width,\n height: imageRawData.height,\n channels: 4,\n },\n });\n}\n", "import fs from 'node:fs/promises';\nimport { FileUtilsBase } from '@gutenye/ocr-common';\nimport { path } from '@cyia/vfs2';\nexport class FileUtils extends FileUtilsBase {\n static override async read(filePath: string) {\n return await fs.readFile(path.normalize(filePath), 'utf8');\n }\n}\n", "import * as v from 'valibot';\nimport * as fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { fileTypeFromBuffer } from 'file-type';\nimport sharp from 'sharp';\nimport heicdecode from 'heic-decode';\nimport { decode } from 'bmp-js';\n\nconst BASE64_HEAD_REPLACE_REG = /^data:image\\/[\\w]+;base64,/;\n\nconst InputDefine = v.union([\n v.pipe(\n v.string(),\n v.check((input) => BASE64_HEAD_REPLACE_REG.test(input)),\n v.transform((base64) => {\n const result = base64.match(BASE64_HEAD_REPLACE_REG)!;\n return new Uint8Array(\n Buffer.from(base64.slice(result[0].length), 'base64'),\n );\n }),\n ),\n v.pipe(\n v.string(),\n v.transform((filePath) =>\n fs\n .readFile(path.normalize(filePath))\n .then((buffer) => new Uint8Array(buffer)),\n ),\n ),\n v.pipe(v.custom<Uint8Array>((input) => input instanceof Uint8Array)),\n]);\nexport async function decodeToBuffer(input: string | Uint8Array) {\n const buffer = await v.parse(InputDefine, input);\n return buffer;\n}\n/**\n * ocr处理用\n * 支持路径,base64,uint8array\n */\nexport async function convertToRaw(input: string | Uint8Array) {\n const buffer = await decodeToBuffer(input);\n const type = await fileTypeFromBuffer(buffer);\n if (!type) {\n throw new Error(`不支持的图片类型`);\n }\n if (type.mime === 'image/bmp') {\n const data = decode(Buffer.from(buffer));\n const resolvedBuffer = data.data;\n //ABGR =>RGBA\n for (let i = 0; i < resolvedBuffer.length; i += 4) {\n const alpha = resolvedBuffer[i];\n const blue = resolvedBuffer[i + 1];\n const green = resolvedBuffer[i + 2];\n const red = resolvedBuffer[i + 3];\n resolvedBuffer[i] = red;\n resolvedBuffer[i + 1] = green;\n resolvedBuffer[i + 2] = blue;\n resolvedBuffer[i + 3] = (data as any).is_with_alpha ? alpha : 0xff;\n }\n const result = sharp(resolvedBuffer, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n }).ensureAlpha(1);\n return { type: 'image/png', raw: result };\n } else if (type?.mime === 'image/heic' || type?.mime === 'image/heif') {\n const data = await heicdecode({\n buffer: buffer as any,\n });\n const result = sharp(data.data, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n });\n return { type: 'image/png', raw: result };\n } else {\n const result = sharp(buffer);\n return { type: type.mime, raw: result };\n }\n}\n// todo 未来其实应该直接是Buffer转通道颜色\n/**\n * 转换为兼容的图片格式\n */\nexport async function convertToCompatibleBuffer(input: string | Uint8Array) {\n const result2 = await convertToRaw(input);\n\n return {\n type: result2.type,\n buffer: new Uint8Array(await result2.raw.png().toBuffer()),\n };\n}\n\nexport function bufferToImageBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return `data:${input.type};base64,${Buffer.from(input.buffer).toString('base64')}`;\n}\nexport function bufferToFileBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return Buffer.from(input.buffer).toString('base64');\n}\n", "import sharp from 'sharp';\nimport { getImageMetadata } from './image-metadata';\nexport async function imageExtract(\n buffer: Buffer,\n position: sharp.Region,\n padding: number = 0,\n) {\n let metadata =await getImageMetadata(buffer);\n let left = Math.min(\n Math.max(Math.round(position.left - padding), 0),\n metadata.width,\n );\n let top = Math.min(\n Math.max(Math.round(position.top - padding), 0),\n metadata.height,\n );\n return sharp(buffer)\n .extract({\n left,\n top,\n width: Math.min(\n Math.max(Math.round(position.width + padding * 2), 0),\n metadata.width - left,\n ),\n height: Math.min(\n Math.max(Math.round(position.height + padding * 2), 0),\n metadata.height - top,\n ),\n })\n .toBuffer();\n}\n", "import sharp from 'sharp';\n\nexport function getImageMetadata(buffer: Buffer) {\n let instance = sharp(buffer);\n return instance.metadata();\n}\n", "export const ModelConfig = [\n {\n label: '简体中文',\n key: 'ch_mobile',\n det: 'det/ch_PP-OCRv4_det_infer.onnx',\n rec: 'rec/ch_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/ch_PP-OCRv4_rec_infer/ppocr_keys_v1.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '简体中文(服务器)',\n key: 'ch_server',\n det: 'det/ch_PP-OCRv4_det_server_infer.onnx',\n rec: 'rec/ch_PP-OCRv4_rec_server_infer.onnx',\n dict: 'rec/ch_PP-OCRv4_rec_server_infer/ppocr_keys_v1.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '繁體中文',\n key: 'chinese_cht',\n det: 'det/ch_PP-OCRv4_det_infer.onnx',\n rec: 'rec/chinese_cht_PP-OCRv3_rec_infer.onnx',\n dict: 'rec/chinese_cht_PP-OCRv3_rec_infer/chinese_cht_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '英文',\n key: 'en_mobile',\n det: 'det/en_PP-OCRv3_det_infer.onnx',\n rec: 'rec/en_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/en_PP-OCRv4_rec_infer/en_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '阿拉伯文',\n key: 'ar_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/arabic_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/arabic_PP-OCRv4_rec_infer/arabic_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '塞尔维亚文',\n key: 'cyrillic_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/cyrillic_PP-OCRv3_rec_infer.onnx',\n dict: 'rec/cyrillic_PP-OCRv3_rec_infer/cyrillic_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '梵文',\n key: 'devanagari_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/devanagari_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/devanagari_PP-OCRv4_rec_infer/devanagari_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '日文',\n key: 'japan_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/japan_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/japan_PP-OCRv4_rec_infer/japan_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '卡纳达语',\n key: 'ka_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/ka_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/ka_PP-OCRv4_rec_infer/ka_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '韩文',\n key: 'korean_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/korean_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/korean_PP-OCRv4_rec_infer/korean_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '拉丁文',\n key: 'latin_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/latin_PP-OCRv3_rec_infer.onnx',\n dict: 'rec/latin_PP-OCRv3_rec_infer/latin_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '泰米尔文',\n key: 'ta_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/ta_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/ta_PP-OCRv4_rec_infer/ta_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '泰卢固文',\n key: 'te_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/te_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/te_PP-OCRv4_rec_infer/te_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n];\n"],
|
|
4
|
+
"sourcesContent": ["import BaseOcr, {\n ModelCreateOptions,\n registerBackend,\n} from '@gutenye/ocr-common';\nimport { splitIntoLineImages } from '@gutenye/ocr-common/splitIntoLineImages';\nimport { ImageRaw } from './ImageRaw';\nimport { FileUtils } from './FileUtils';\nimport { InferenceSession } from 'onnxruntime-node';\nimport fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { convertToRaw } from '../image';\nimport * as v from 'valibot';\nconst ImageAdjustDefine = v.object({\n padding: v.pipe(\n v.optional(\n v.union([\n v.pipe(\n v.number(),\n v.transform((item) => ({\n top: item,\n left: item,\n right: item,\n bottom: item,\n })),\n ),\n v.object({\n left: v.number(),\n right: v.number(),\n top: v.number(),\n bottom: v.number(),\n }),\n ]),\n { top: 50, right: 50, left: 50, bottom: 50 },\n ),\n ),\n maxSideLen: v.optional(v.union([v.pipe(v.number())]), 1920),\n // threshold: v.optional(v.union([v.pipe(v.number())]), 0.3),\n});\nexport type ImageAdjustType = v.InferInput<typeof ImageAdjustDefine>;\nregisterBackend({\n FileUtils,\n ImageRaw,\n InferenceSession,\n splitIntoLineImages,\n defaultModels: undefined,\n});\n\nasync function convert(\n this: BaseOcr,\n input: string | Uint8Array,\n options: ImageAdjustType = {},\n) {\n const resolveOptions = v.parse(ImageAdjustDefine, options);\n //100 80 0.8\n // 50 40\n let { raw } = await convertToRaw(input);\n const metadata = await raw.metadata();\n const maxSize = Math.max(metadata.width!, metadata.height!);\n if (maxSize > resolveOptions.maxSideLen) {\n let ratio = metadata.width! / metadata.height!;\n ratio = ratio > 1 ? 1 / ratio : ratio;\n raw = raw.resize({\n width: Math.round(\n maxSize === metadata.width!\n ? resolveOptions.maxSideLen\n : ratio * resolveOptions.maxSideLen,\n ),\n height: Math.round(\n maxSize === metadata.height!\n ? resolveOptions.maxSideLen\n : ratio * resolveOptions.maxSideLen,\n ),\n });\n }\n raw = raw.extend({ ...resolveOptions.padding, background: '#fff' });\n raw = raw.ensureAlpha(1);\n return this.detect(raw as any);\n}\nexport class Ocr extends BaseOcr {\n static override async create(options: ModelCreateOptions = {}) {\n const ocr = await BaseOcr.create(options);\n if (options.debugOutputDir) {\n await fs.mkdir(path.normalize(options.debugOutputDir), {\n recursive: true,\n });\n }\n (ocr as any).convert = convert.bind(ocr);\n return ocr as BaseOcr & { convert: typeof convert };\n }\n}\n", "import filePath from 'node:path';\nimport { ImageRawBase } from '@gutenye/ocr-common';\nimport type { ImageRawData, LineImage, SizeOption } from '@gutenye/ocr-common';\nimport sharp from 'sharp';\nexport class ImageRaw extends ImageRawBase {\n #sharp!: sharp.Sharp;\n\n static async open(path: string): Promise<ImageRaw> {\n // let { raw } = await convertToRaw(path);\n return new ImageRaw(await toImageRaw(path as any));\n }\n\n constructor(imageRawData: ImageRawData) {\n super(imageRawData);\n this.#sharp = toSharp(imageRawData);\n }\n\n async write(path: string) {\n const ext = filePath.extname(path).slice(1);\n return this.#sharp.toFormat(ext as keyof sharp.FormatEnum).toFile(path);\n }\n\n async resize(size: SizeOption) {\n return this.#apply(\n this.#sharp.resize({\n width: size.width,\n height: size.height,\n fit: 'contain',\n }),\n );\n }\n\n async drawBox(lineImages: LineImage[]) {\n const svg = `\n <svg width=\"${this.width}\" height=\"${this.height}\">\n ${lineImages\n .map((lineImage) => {\n const [p1, p2, p3, p4] = lineImage.box;\n return `<polygon points=\"${p1[0]},${p1[1]} ${p2[0]},${p2[1]} ${p3[0]},${p3[1]} ${p4[0]},${p4[1]}\" fill=\"none\" stroke=\"red\" />`;\n })\n .join('\\n')}\n </svg>\n `;\n return this.#apply(\n this.#sharp.composite([{ input: Buffer.from(svg), left: 0, top: 0 }]),\n );\n }\n\n async #apply(sharp: sharp.Sharp) {\n this.#sharp = sharp;\n const result = await toImageRaw(sharp);\n this.data = result.data;\n this.width = result.width;\n this.height = result.height;\n return this;\n }\n}\n\nasync function toImageRaw(sharp: sharp.Sharp) {\n const result = await sharp.raw().toBuffer({ resolveWithObject: true });\n return {\n data: result.data,\n width: result.info.width,\n height: result.info.height,\n };\n}\nfunction toSharp(imageRawData: ImageRawData) {\n return sharp(imageRawData.data, {\n raw: {\n width: imageRawData.width,\n height: imageRawData.height,\n channels: 4,\n },\n });\n}\n", "import fs from 'node:fs/promises';\nimport { FileUtilsBase } from '@gutenye/ocr-common';\nimport { path } from '@cyia/vfs2';\nexport class FileUtils extends FileUtilsBase {\n static override async read(filePath: string) {\n return await fs.readFile(path.normalize(filePath), 'utf8');\n }\n}\n", "import * as v from 'valibot';\nimport * as fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { fileTypeFromBuffer } from 'file-type';\nimport sharp from 'sharp';\nimport heicdecode from 'heic-decode';\nimport { decode } from 'bmp-js';\n\nconst BASE64_HEAD_REPLACE_REG = /^data:image\\/[\\w]+;base64,/;\n\nconst InputDefine = v.union([\n v.pipe(\n v.string(),\n v.check((input) => BASE64_HEAD_REPLACE_REG.test(input)),\n v.transform((base64) => {\n const result = base64.match(BASE64_HEAD_REPLACE_REG)!;\n return new Uint8Array(\n Buffer.from(base64.slice(result[0].length), 'base64'),\n );\n }),\n ),\n v.pipe(\n v.string(),\n v.transform((filePath) =>\n fs\n .readFile(path.normalize(filePath))\n .then((buffer) => new Uint8Array(buffer)),\n ),\n ),\n v.pipe(v.custom<Uint8Array>((input) => input instanceof Uint8Array)),\n]);\nexport async function decodeToBuffer(input: string | Uint8Array) {\n const buffer = await v.parse(InputDefine, input);\n return buffer;\n}\n/**\n * ocr处理用\n * 支持路径,base64,uint8array\n */\nexport async function convertToRaw(input: string | Uint8Array) {\n const buffer = await decodeToBuffer(input);\n const type = await fileTypeFromBuffer(buffer);\n if (!type) {\n throw new Error(`不支持的图片类型`);\n }\n if (type.mime === 'image/bmp') {\n const data = decode(Buffer.from(buffer));\n const resolvedBuffer = data.data;\n //ABGR =>RGBA\n for (let i = 0; i < resolvedBuffer.length; i += 4) {\n const alpha = resolvedBuffer[i];\n const blue = resolvedBuffer[i + 1];\n const green = resolvedBuffer[i + 2];\n const red = resolvedBuffer[i + 3];\n resolvedBuffer[i] = red;\n resolvedBuffer[i + 1] = green;\n resolvedBuffer[i + 2] = blue;\n resolvedBuffer[i + 3] = (data as any).is_with_alpha ? alpha : 0xff;\n }\n const result = sharp(resolvedBuffer, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n }).ensureAlpha(1);\n return { type: 'image/png', raw: result };\n } else if (type?.mime === 'image/heic' || type?.mime === 'image/heif') {\n const data = await heicdecode({\n buffer: buffer as any,\n });\n const result = sharp(data.data, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n });\n return { type: 'image/png', raw: result };\n } else {\n const result = sharp(buffer);\n return { type: type.mime, raw: result };\n }\n}\n// todo 未来其实应该直接是Buffer转通道颜色\n/**\n * 转换为兼容的图片格式\n */\nexport async function convertToCompatibleBuffer(input: string | Uint8Array) {\n const result2 = await convertToRaw(input);\n\n return {\n type: result2.type,\n buffer: new Uint8Array(await result2.raw.png().toBuffer()),\n };\n}\n\nexport function bufferToImageBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return `data:${input.type};base64,${Buffer.from(input.buffer).toString('base64')}`;\n}\nexport function bufferToFileBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return Buffer.from(input.buffer).toString('base64');\n}\n", "import sharp from 'sharp';\nimport { getImageMetadata } from './image-metadata';\nexport async function imageExtract(\n buffer: Buffer,\n position: sharp.Region,\n padding: number = 0,\n) {\n let metadata =await getImageMetadata(buffer);\n let left = Math.min(\n Math.max(Math.round(position.left - padding), 0),\n metadata.width,\n );\n let top = Math.min(\n Math.max(Math.round(position.top - padding), 0),\n metadata.height,\n );\n return sharp(buffer)\n .extract({\n left,\n top,\n width: Math.min(\n Math.max(Math.round(position.width + padding * 2), 0),\n metadata.width - left,\n ),\n height: Math.min(\n Math.max(Math.round(position.height + padding * 2), 0),\n metadata.height - top,\n ),\n })\n .toBuffer();\n}\n", "import sharp from 'sharp';\n\nexport function getImageMetadata(buffer: Buffer) {\n let instance = sharp(buffer);\n return instance.metadata();\n}\n", "export const ModelConfig = [\n {\n label: '简体中文',\n key: 'ch_mobile',\n det: 'det/ch_PP-OCRv4_det_mobile.onnx',\n rec: 'rec/ch_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/ch_PP-OCRv4_rec_mobile/ppocr_keys_v1.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '简体中文(服务器)',\n key: 'ch_server',\n det: 'det/ch_PP-OCRv4_det_server.onnx',\n rec: 'rec/ch_PP-OCRv4_rec_server.onnx',\n dict: 'rec/ch_PP-OCRv4_rec_server/ppocr_keys_v1.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '繁體中文',\n key: 'chinese_cht',\n det: 'det/ch_PP-OCRv4_det_server.onnx',\n rec: 'rec/chinese_cht_PP-OCRv3_rec_mobile.onnx',\n dict: 'rec/chinese_cht_PP-OCRv3_rec_mobile/chinese_cht_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '英文',\n key: 'en_mobile',\n det: 'det/en_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/en_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/en_PP-OCRv4_rec_mobile/en_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '阿拉伯文',\n key: 'ar_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/arabic_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/arabic_PP-OCRv4_rec_mobile/arabic_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '塞尔维亚文',\n key: 'cyrillic_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/cyrillic_PP-OCRv3_rec_mobile.onnx',\n dict: 'rec/cyrillic_PP-OCRv3_rec_mobile/cyrillic_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '梵文',\n key: 'devanagari_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/devanagari_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/devanagari_PP-OCRv4_rec_mobile/devanagari_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '日文',\n key: 'japan_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/japan_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/japan_PP-OCRv4_rec_mobile/japan_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '卡纳达语',\n key: 'ka_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/ka_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/ka_PP-OCRv4_rec_mobile/ka_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '韩文',\n key: 'korean_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/korean_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/korean_PP-OCRv4_rec_mobile/korean_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '拉丁文',\n key: 'latin_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/latin_PP-OCRv3_rec_mobile.onnx',\n dict: 'rec/latin_PP-OCRv3_rec_mobile/latin_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '泰米尔文',\n key: 'ta_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/ta_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/ta_PP-OCRv4_rec_mobile/ta_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '泰卢固文',\n key: 'te_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/te_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/te_PP-OCRv4_rec_mobile/te_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n];\n"],
|
|
5
5
|
"mappings": ";AAAA,OAAO;AAAA,EAEL;AAAA,OACK;AACP,SAAS,2BAA2B;;;ACJpC,OAAO,cAAc;AACrB,SAAS,oBAAoB;AAE7B,OAAO,WAAW;AACX,IAAM,WAAN,MAAM,kBAAiB,aAAa;AAAA,EACzC;AAAA,EAEA,aAAa,KAAKA,OAAiC;AAEjD,WAAO,IAAI,UAAS,MAAM,WAAWA,KAAW,CAAC;AAAA,EACnD;AAAA,EAEA,YAAY,cAA4B;AACtC,UAAM,YAAY;AAClB,SAAK,SAAS,QAAQ,YAAY;AAAA,EACpC;AAAA,EAEA,MAAM,MAAMA,OAAc;AACxB,UAAM,MAAM,SAAS,QAAQA,KAAI,EAAE,MAAM,CAAC;AAC1C,WAAO,KAAK,OAAO,SAAS,GAA6B,EAAE,OAAOA,KAAI;AAAA,EACxE;AAAA,EAEA,MAAM,OAAO,MAAkB;AAC7B,WAAO,KAAK;AAAA,MACV,KAAK,OAAO,OAAO;AAAA,QACjB,OAAO,KAAK;AAAA,QACZ,QAAQ,KAAK;AAAA,QACb,KAAK;AAAA,MACP,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,MAAM,QAAQ,YAAyB;AACrC,UAAM,MAAM;AAAA,oBACI,KAAK,KAAK,aAAa,KAAK,MAAM;AAAA,UAC5C,WACC,IAAI,CAAC,cAAc;AAClB,YAAM,CAAC,IAAI,IAAI,IAAI,EAAE,IAAI,UAAU;AACnC,aAAO,oBAAoB,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC;AAAA,IACjG,CAAC,EACA,KAAK,IAAI,CAAC;AAAA;AAAA;AAGjB,WAAO,KAAK;AAAA,MACV,KAAK,OAAO,UAAU,CAAC,EAAE,OAAO,OAAO,KAAK,GAAG,GAAG,MAAM,GAAG,KAAK,EAAE,CAAC,CAAC;AAAA,IACtE;AAAA,EACF;AAAA,EAEA,MAAM,OAAOC,QAAoB;AAC/B,SAAK,SAASA;AACd,UAAM,SAAS,MAAM,WAAWA,MAAK;AACrC,SAAK,OAAO,OAAO;AACnB,SAAK,QAAQ,OAAO;AACpB,SAAK,SAAS,OAAO;AACrB,WAAO;AAAA,EACT;AACF;AAEA,eAAe,WAAWA,QAAoB;AAC5C,QAAM,SAAS,MAAMA,OAAM,IAAI,EAAE,SAAS,EAAE,mBAAmB,KAAK,CAAC;AACrE,SAAO;AAAA,IACL,MAAM,OAAO;AAAA,IACb,OAAO,OAAO,KAAK;AAAA,IACnB,QAAQ,OAAO,KAAK;AAAA,EACtB;AACF;AACA,SAAS,QAAQ,cAA4B;AAC3C,SAAO,MAAM,aAAa,MAAM;AAAA,IAC9B,KAAK;AAAA,MACH,OAAO,aAAa;AAAA,MACpB,QAAQ,aAAa;AAAA,MACrB,UAAU;AAAA,IACZ;AAAA,EACF,CAAC;AACH;;;AC1EA,OAAO,QAAQ;AACf,SAAS,qBAAqB;AAC9B,SAAS,YAAY;AACd,IAAM,YAAN,cAAwB,cAAc;AAAA,EAC3C,aAAsB,KAAKC,WAAkB;AAC3C,WAAO,MAAM,GAAG,SAAS,KAAK,UAAUA,SAAQ,GAAG,MAAM;AAAA,EAC3D;AACF;;;AFAA,SAAS,wBAAwB;AACjC,OAAOC,SAAQ;AACf,SAAS,QAAAC,aAAY;;;AGTrB,YAAY,OAAO;AACnB,YAAYC,SAAQ;AACpB,SAAS,QAAAC,aAAY;AACrB,SAAS,0BAA0B;AACnC,OAAOC,YAAW;AAClB,OAAO,gBAAgB;AACvB,SAAS,cAAc;AAEvB,IAAM,0BAA0B;AAEhC,IAAM,cAAgB,QAAM;AAAA,EACxB;AAAA,IACE,SAAO;AAAA,IACP,QAAM,CAAC,UAAU,wBAAwB,KAAK,KAAK,CAAC;AAAA,IACpD,YAAU,CAAC,WAAW;AACtB,YAAM,SAAS,OAAO,MAAM,uBAAuB;AACnD,aAAO,IAAI;AAAA,QACT,OAAO,KAAK,OAAO,MAAM,OAAO,CAAC,EAAE,MAAM,GAAG,QAAQ;AAAA,MACtD;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EACE;AAAA,IACE,SAAO;AAAA,IACP;AAAA,MAAU,CAACC,cAER,aAASF,MAAK,UAAUE,SAAQ,CAAC,EACjC,KAAK,CAAC,WAAW,IAAI,WAAW,MAAM,CAAC;AAAA,IAC5C;AAAA,EACF;AAAA,EACE,OAAO,SAAmB,CAAC,UAAU,iBAAiB,UAAU,CAAC;AACrE,CAAC;AACD,eAAsB,eAAe,OAA4B;AAC/D,QAAM,SAAS,MAAQ,QAAM,aAAa,KAAK;AAC/C,SAAO;AACT;AAKA,eAAsB,aAAa,OAA4B;AAC7D,QAAM,SAAS,MAAM,eAAe,KAAK;AACzC,QAAM,OAAO,MAAM,mBAAmB,MAAM;AAC5C,MAAI,CAAC,MAAM;AACT,UAAM,IAAI,MAAM,UAAU;AAAA,EAC5B;AACA,MAAI,KAAK,SAAS,aAAa;AAC7B,UAAM,OAAO,OAAO,OAAO,KAAK,MAAM,CAAC;AACvC,UAAM,iBAAiB,KAAK;AAE5B,aAAS,IAAI,GAAG,IAAI,eAAe,QAAQ,KAAK,GAAG;AACjD,YAAM,QAAQ,eAAe,CAAC;AAC9B,YAAM,OAAO,eAAe,IAAI,CAAC;AACjC,YAAM,QAAQ,eAAe,IAAI,CAAC;AAClC,YAAM,MAAM,eAAe,IAAI,CAAC;AAChC,qBAAe,CAAC,IAAI;AACpB,qBAAe,IAAI,CAAC,IAAI;AACxB,qBAAe,IAAI,CAAC,IAAI;AACxB,qBAAe,IAAI,CAAC,IAAK,KAAa,gBAAgB,QAAQ;AAAA,IAChE;AACA,UAAM,SAASD,OAAM,gBAAgB;AAAA,MACnC,KAAK;AAAA,QACH,OAAO,KAAK;AAAA,QACZ,QAAQ,KAAK;AAAA,QACb,UAAU;AAAA,MACZ;AAAA,IACF,CAAC,EAAE,YAAY,CAAC;AAChB,WAAO,EAAE,MAAM,aAAa,KAAK,OAAO;AAAA,EAC1C,WAAW,MAAM,SAAS,gBAAgB,MAAM,SAAS,cAAc;AACrE,UAAM,OAAO,MAAM,WAAW;AAAA,MAC5B;AAAA,IACF,CAAC;AACD,UAAM,SAASA,OAAM,KAAK,MAAM;AAAA,MAC9B,KAAK;AAAA,QACH,OAAO,KAAK;AAAA,QACZ,QAAQ,KAAK;AAAA,QACb,UAAU;AAAA,MACZ;AAAA,IACF,CAAC;AACD,WAAO,EAAE,MAAM,aAAa,KAAK,OAAO;AAAA,EAC1C,OAAO;AACL,UAAM,SAASA,OAAM,MAAM;AAC3B,WAAO,EAAE,MAAM,KAAK,MAAM,KAAK,OAAO;AAAA,EACxC;AACF;;;ACnFA,OAAOE,YAAW;;;ACAlB,OAAOC,YAAW;;;ALWlB,YAAYC,QAAO;AACnB,IAAM,oBAAsB,UAAO;AAAA,EACjC,SAAW;AAAA,IACP;AAAA,MACE,SAAM;AAAA,QACJ;AAAA,UACE,UAAO;AAAA,UACP,aAAU,CAAC,UAAU;AAAA,YACrB,KAAK;AAAA,YACL,MAAM;AAAA,YACN,OAAO;AAAA,YACP,QAAQ;AAAA,UACV,EAAE;AAAA,QACJ;AAAA,QACE,UAAO;AAAA,UACP,MAAQ,UAAO;AAAA,UACf,OAAS,UAAO;AAAA,UAChB,KAAO,UAAO;AAAA,UACd,QAAU,UAAO;AAAA,QACnB,CAAC;AAAA,MACH,CAAC;AAAA,MACD,EAAE,KAAK,IAAI,OAAO,IAAI,MAAM,IAAI,QAAQ,GAAG;AAAA,IAC7C;AAAA,EACF;AAAA,EACA,YAAc,YAAW,SAAM,CAAG,QAAO,UAAO,CAAC,CAAC,CAAC,GAAG,IAAI;AAAA;AAE5D,CAAC;AAED,gBAAgB;AAAA,EACd;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AACjB,CAAC;AAED,eAAe,QAEb,OACA,UAA2B,CAAC,GAC5B;AACA,QAAM,iBAAmB,SAAM,mBAAmB,OAAO;AAGzD,MAAI,EAAE,IAAI,IAAI,MAAM,aAAa,KAAK;AACtC,QAAM,WAAW,MAAM,IAAI,SAAS;AACpC,QAAM,UAAU,KAAK,IAAI,SAAS,OAAQ,SAAS,MAAO;AAC1D,MAAI,UAAU,eAAe,YAAY;AACvC,QAAI,QAAQ,SAAS,QAAS,SAAS;AACvC,YAAQ,QAAQ,IAAI,IAAI,QAAQ;AAChC,UAAM,IAAI,OAAO;AAAA,MACf,OAAO,KAAK;AAAA,QACV,YAAY,SAAS,QACjB,eAAe,aACf,QAAQ,eAAe;AAAA,MAC7B;AAAA,MACA,QAAQ,KAAK;AAAA,QACX,YAAY,SAAS,SACjB,eAAe,aACf,QAAQ,eAAe;AAAA,MAC7B;AAAA,IACF,CAAC;AAAA,EACH;AACA,QAAM,IAAI,OAAO,EAAE,GAAG,eAAe,SAAS,YAAY,OAAO,CAAC;AAClE,QAAM,IAAI,YAAY,CAAC;AACvB,SAAO,KAAK,OAAO,GAAU;AAC/B;AACO,IAAM,MAAN,cAAkB,QAAQ;AAAA,EAC/B,aAAsB,OAAO,UAA8B,CAAC,GAAG;AAC7D,UAAM,MAAM,MAAM,QAAQ,OAAO,OAAO;AACxC,QAAI,QAAQ,gBAAgB;AAC1B,YAAMC,IAAG,MAAMC,MAAK,UAAU,QAAQ,cAAc,GAAG;AAAA,QACrD,WAAW;AAAA,MACb,CAAC;AAAA,IACH;AACA,IAAC,IAAY,UAAU,QAAQ,KAAK,GAAG;AACvC,WAAO;AAAA,EACT;AACF;;;AMzFO,IAAM,cAAc;AAAA,EACzB;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AACF;",
|
|
6
6
|
"names": ["path", "sharp", "filePath", "fs", "path", "fs", "path", "sharp", "filePath", "sharp", "sharp", "v", "fs", "path"]
|
|
7
7
|
}
|
package/package.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@shenghuabi/knowledge",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.24",
|
|
4
4
|
"description": "知识库",
|
|
5
5
|
"author": "wszgrcy",
|
|
6
6
|
"sideEffects": false,
|
|
7
7
|
"peerDependencies": {
|
|
8
|
-
"@cyia/vfs2": "^1.
|
|
8
|
+
"@cyia/vfs2": "^1.5.3",
|
|
9
9
|
"handlebars": "^4.7.8",
|
|
10
10
|
"lru-cache": "^11.2.1",
|
|
11
11
|
"rfdc": "^1.4.1",
|
|
@@ -19,14 +19,14 @@
|
|
|
19
19
|
"html-to-text": "^9.0.5",
|
|
20
20
|
"fastq": "^1.19.1",
|
|
21
21
|
"sharp": "0.34.2",
|
|
22
|
-
"@cyia/dl": "^1.
|
|
23
|
-
"@cyia/external-call": "^1.
|
|
22
|
+
"@cyia/dl": "^1.5.3",
|
|
23
|
+
"@cyia/external-call": "^1.5.3"
|
|
24
24
|
},
|
|
25
25
|
"dependencies": {
|
|
26
26
|
"html-entities": "^2.6.0",
|
|
27
27
|
"@qdrant/qdrant-js": "1.15.1",
|
|
28
28
|
"@cyia/mdict-reader": "^1.0.9",
|
|
29
|
-
"@langchain/community": "
|
|
29
|
+
"@langchain/community": "1.1.1",
|
|
30
30
|
"@langchain/core": "1.1.8",
|
|
31
31
|
"@langchain/textsplitters": "^1.0.1",
|
|
32
32
|
"@xhmikosr/decompress-tarbz2": "^8.0.2",
|
|
@@ -47,7 +47,7 @@
|
|
|
47
47
|
"@gutenye/ocr-common": "^1.4.8",
|
|
48
48
|
"bmp-js": "^0.1.0",
|
|
49
49
|
"onnxruntime-node": "1.20.1",
|
|
50
|
-
"@huggingface/transformers": "
|
|
50
|
+
"@huggingface/transformers": "4.2.0",
|
|
51
51
|
"xlsx": "^0.18.5",
|
|
52
52
|
"pdfjs-dist": "^5.4.449"
|
|
53
53
|
},
|
package/worker/custom-cache.d.ts
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import fs from 'fs';
|
|
2
1
|
import { InitOptions } from './set-transformers-config';
|
|
3
2
|
export interface NodeProxy {
|
|
4
3
|
match: (request: string) => Promise<ArrayBuffer | undefined>;
|
|
@@ -7,22 +6,6 @@ export interface NodeProxy {
|
|
|
7
6
|
export declare class FileProxyCache {
|
|
8
7
|
#private;
|
|
9
8
|
constructor(initOptions: InitOptions);
|
|
10
|
-
match(request: string): Promise<
|
|
11
|
-
put(request: string, response: Response
|
|
9
|
+
match(request: string): Promise<Response | undefined>;
|
|
10
|
+
put(request: string, response: Response): Promise<void>;
|
|
12
11
|
}
|
|
13
|
-
declare class FileResponse {
|
|
14
|
-
filePath: string;
|
|
15
|
-
headers: import("undici-types").Headers;
|
|
16
|
-
exists: boolean;
|
|
17
|
-
status: number;
|
|
18
|
-
statusText: string;
|
|
19
|
-
body: fs.ReadStream;
|
|
20
|
-
constructor(filePath: string);
|
|
21
|
-
updateContentType(): void;
|
|
22
|
-
clone(): FileResponse;
|
|
23
|
-
arrayBuffer(): Promise<ArrayBuffer>;
|
|
24
|
-
blob(): Promise<Blob>;
|
|
25
|
-
text(): Promise<string>;
|
|
26
|
-
json(): Promise<object>;
|
|
27
|
-
}
|
|
28
|
-
export {};
|
package/worker/ocr/index.d.ts
CHANGED
package/worker/ocr.mjs
CHANGED
|
@@ -9,7 +9,7 @@ var BaseUrl = "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/
|
|
|
9
9
|
var DictUrl = "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/paddle/PP-OCRv4";
|
|
10
10
|
async function init(ocrConfig) {
|
|
11
11
|
const messageCb = (message) => {
|
|
12
|
-
ocrConfig.port
|
|
12
|
+
ocrConfig.port?.postMessage({ type: "progress", message });
|
|
13
13
|
};
|
|
14
14
|
const modelConfig = ModelConfig.find((item) => item.key === ocrConfig.key);
|
|
15
15
|
const fs = createNormalizeVfs({ dir: ocrConfig.modelDir });
|
package/worker/ocr.mjs.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../packages/worker/ocr/index.ts"],
|
|
4
|
-
"sourcesContent": ["import { ImageAdjustType, Ocr, ModelConfig } from '@shenghuabi/knowledge/ocr';\nimport { createNormalizeVfs, path } from '@cyia/vfs2';\n// import * as ort from 'onnxruntime-node';\nimport { MessagePort } from 'worker_threads';\nimport { getUniqueObjectKey } from '@shenghuabi/knowledge/util';\nimport { downloadFile } from '@cyia/dl';\nlet key!: string;\nlet ocrInstance: ReturnType<(typeof Ocr)['create']>;\nconst BaseUrl =\n 'https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/onnx/PP-OCRv4';\n\nconst DictUrl =\n 'https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/paddle/PP-OCRv4';\nasync function init(ocrConfig: {\n key: string;\n modelDir: string;\n port
|
|
5
|
-
"mappings": ";AAAA,SAA0B,KAAK,mBAAmB;AAClD,SAAS,oBAAoB,YAAY;AAGzC,SAAS,0BAA0B;AACnC,SAAS,oBAAoB;AAC7B,IAAI;AACJ,IAAI;AACJ,IAAM,UACJ;AAEF,IAAM,UACJ;AACF,eAAe,KAAK,WAIjB;AACD,QAAM,YAAY,CAAC,YAAiB;AAClC,cAAU,
|
|
4
|
+
"sourcesContent": ["import { ImageAdjustType, Ocr, ModelConfig } from '@shenghuabi/knowledge/ocr';\nimport { createNormalizeVfs, path } from '@cyia/vfs2';\n// import * as ort from 'onnxruntime-node';\nimport { MessagePort } from 'worker_threads';\nimport { getUniqueObjectKey } from '@shenghuabi/knowledge/util';\nimport { downloadFile } from '@cyia/dl';\nlet key!: string;\nlet ocrInstance: ReturnType<(typeof Ocr)['create']>;\nconst BaseUrl =\n 'https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/onnx/PP-OCRv4';\n\nconst DictUrl =\n 'https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/paddle/PP-OCRv4';\nasync function init(ocrConfig: {\n key: string;\n modelDir: string;\n port?: MessagePort;\n}) {\n const messageCb = (message: any) => {\n ocrConfig.port?.postMessage({ type: 'progress', message });\n };\n const modelConfig = ModelConfig.find((item) => item.key === ocrConfig.key)!;\n const fs = createNormalizeVfs({ dir: ocrConfig.modelDir });\n // 自动下载模型\n const absDetectionPath = path.join(ocrConfig.modelDir, modelConfig.det);\n if (!(await fs.exists(modelConfig.det))) {\n await downloadFile(`${BaseUrl}/${modelConfig.det}`, {\n savePath: absDetectionPath,\n message: messageCb,\n });\n }\n const absRecognitionPath = path.join(ocrConfig.modelDir, modelConfig.rec);\n if (!(await fs.exists(modelConfig.rec))) {\n await downloadFile(`${BaseUrl}/${modelConfig.rec}`, {\n savePath: absRecognitionPath,\n message: messageCb,\n });\n }\n const absDictionaryPath = path.join(ocrConfig.modelDir, modelConfig.dict);\n if (!(await fs.exists(modelConfig.dict))) {\n await downloadFile(`${DictUrl}/${modelConfig.dict}`, {\n savePath: absDictionaryPath,\n message: messageCb,\n });\n }\n}\n// 改为init和convert\nasync function convert(input: {\n filePath: string | Uint8Array;\n ocrConfig: { key: string; modelDir: string; device?: 'dml' | 'cuda' | 'cpu' };\n options?: ImageAdjustType;\n}) {\n const inputKey = getUniqueObjectKey(input.ocrConfig);\n if (key !== inputKey) {\n const modelConfig = ModelConfig.find(\n (item) => item.key === input.ocrConfig.key,\n )!;\n // 自动下载模型\n const absDetectionPath = path.join(\n input.ocrConfig.modelDir,\n modelConfig.det,\n );\n const absRecognitionPath = path.join(\n input.ocrConfig.modelDir,\n modelConfig.rec,\n );\n const absDictionaryPath = path.join(\n input.ocrConfig.modelDir,\n modelConfig.dict,\n );\n ocrInstance = Ocr.create({\n onnxOptions: {\n executionProviders: input.ocrConfig.device\n ? [input.ocrConfig.device]\n : ['dml', 'cuda', 'cpu'],\n executionMode: 'parallel',\n },\n models: {\n detectionPath: absDetectionPath,\n recognitionPath: absRecognitionPath,\n dictionaryPath: absDictionaryPath,\n },\n });\n key = inputKey;\n }\n return (await ocrInstance).convert(input.filePath, input.options);\n}\n\nexport { init, convert };\n"],
|
|
5
|
+
"mappings": ";AAAA,SAA0B,KAAK,mBAAmB;AAClD,SAAS,oBAAoB,YAAY;AAGzC,SAAS,0BAA0B;AACnC,SAAS,oBAAoB;AAC7B,IAAI;AACJ,IAAI;AACJ,IAAM,UACJ;AAEF,IAAM,UACJ;AACF,eAAe,KAAK,WAIjB;AACD,QAAM,YAAY,CAAC,YAAiB;AAClC,cAAU,MAAM,YAAY,EAAE,MAAM,YAAY,QAAQ,CAAC;AAAA,EAC3D;AACA,QAAM,cAAc,YAAY,KAAK,CAAC,SAAS,KAAK,QAAQ,UAAU,GAAG;AACzE,QAAM,KAAK,mBAAmB,EAAE,KAAK,UAAU,SAAS,CAAC;AAEzD,QAAM,mBAAmB,KAAK,KAAK,UAAU,UAAU,YAAY,GAAG;AACtE,MAAI,CAAE,MAAM,GAAG,OAAO,YAAY,GAAG,GAAI;AACvC,UAAM,aAAa,GAAG,OAAO,IAAI,YAAY,GAAG,IAAI;AAAA,MAClD,UAAU;AAAA,MACV,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACA,QAAM,qBAAqB,KAAK,KAAK,UAAU,UAAU,YAAY,GAAG;AACxE,MAAI,CAAE,MAAM,GAAG,OAAO,YAAY,GAAG,GAAI;AACvC,UAAM,aAAa,GAAG,OAAO,IAAI,YAAY,GAAG,IAAI;AAAA,MAClD,UAAU;AAAA,MACV,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACA,QAAM,oBAAoB,KAAK,KAAK,UAAU,UAAU,YAAY,IAAI;AACxE,MAAI,CAAE,MAAM,GAAG,OAAO,YAAY,IAAI,GAAI;AACxC,UAAM,aAAa,GAAG,OAAO,IAAI,YAAY,IAAI,IAAI;AAAA,MACnD,UAAU;AAAA,MACV,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACF;AAEA,eAAe,QAAQ,OAIpB;AACD,QAAM,WAAW,mBAAmB,MAAM,SAAS;AACnD,MAAI,QAAQ,UAAU;AACpB,UAAM,cAAc,YAAY;AAAA,MAC9B,CAAC,SAAS,KAAK,QAAQ,MAAM,UAAU;AAAA,IACzC;AAEA,UAAM,mBAAmB,KAAK;AAAA,MAC5B,MAAM,UAAU;AAAA,MAChB,YAAY;AAAA,IACd;AACA,UAAM,qBAAqB,KAAK;AAAA,MAC9B,MAAM,UAAU;AAAA,MAChB,YAAY;AAAA,IACd;AACA,UAAM,oBAAoB,KAAK;AAAA,MAC7B,MAAM,UAAU;AAAA,MAChB,YAAY;AAAA,IACd;AACA,kBAAc,IAAI,OAAO;AAAA,MACvB,aAAa;AAAA,QACX,oBAAoB,MAAM,UAAU,SAChC,CAAC,MAAM,UAAU,MAAM,IACvB,CAAC,OAAO,QAAQ,KAAK;AAAA,QACzB,eAAe;AAAA,MACjB;AAAA,MACA,QAAQ;AAAA,QACN,eAAe;AAAA,QACf,iBAAiB;AAAA,QACjB,gBAAgB;AAAA,MAClB;AAAA,IACF,CAAC;AACD,UAAM;AAAA,EACR;AACA,UAAQ,MAAM,aAAa,QAAQ,MAAM,UAAU,MAAM,OAAO;AAClE;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/worker/reranker.mjs
CHANGED
|
@@ -29,6 +29,20 @@ var FileProxyCache = class {
|
|
|
29
29
|
this.#downloadConfig = initOptions.downloadConfig;
|
|
30
30
|
this.#initOptions = initOptions;
|
|
31
31
|
}
|
|
32
|
+
async #createResponse(filePath) {
|
|
33
|
+
const stats = await fs.promises.stat(filePath);
|
|
34
|
+
const extension = filePath.split(".").pop().toLowerCase();
|
|
35
|
+
const contentType = CONTENT_TYPE_MAP[extension] ?? "application/octet-stream";
|
|
36
|
+
const stream = fs.createReadStream(filePath);
|
|
37
|
+
return new Response(stream, {
|
|
38
|
+
status: 200,
|
|
39
|
+
statusText: "OK",
|
|
40
|
+
headers: {
|
|
41
|
+
"content-type": contentType,
|
|
42
|
+
"content-length": stats.size.toString()
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
}
|
|
32
46
|
async match(request) {
|
|
33
47
|
let filePath;
|
|
34
48
|
if (request.startsWith("http")) {
|
|
@@ -54,7 +68,7 @@ var FileProxyCache = class {
|
|
|
54
68
|
}
|
|
55
69
|
const exists = await this.#vfs.exists(filePath);
|
|
56
70
|
if (exists) {
|
|
57
|
-
return
|
|
71
|
+
return this.#createResponse(filePath);
|
|
58
72
|
}
|
|
59
73
|
return void 0;
|
|
60
74
|
}
|
|
@@ -62,7 +76,6 @@ var FileProxyCache = class {
|
|
|
62
76
|
throw new Error("no put");
|
|
63
77
|
}
|
|
64
78
|
};
|
|
65
|
-
var decoder = new TextDecoder("utf-8");
|
|
66
79
|
var CONTENT_TYPE_MAP = {
|
|
67
80
|
txt: "text/plain",
|
|
68
81
|
html: "text/html",
|
|
@@ -74,51 +87,6 @@ var CONTENT_TYPE_MAP = {
|
|
|
74
87
|
jpeg: "image/jpeg",
|
|
75
88
|
gif: "image/gif"
|
|
76
89
|
};
|
|
77
|
-
var FileResponse = class _FileResponse {
|
|
78
|
-
filePath;
|
|
79
|
-
headers;
|
|
80
|
-
exists = true;
|
|
81
|
-
status = 200;
|
|
82
|
-
statusText = "OK";
|
|
83
|
-
body;
|
|
84
|
-
constructor(filePath) {
|
|
85
|
-
this.filePath = filePath;
|
|
86
|
-
this.headers = new Headers();
|
|
87
|
-
this.updateContentType();
|
|
88
|
-
this.body = fs.createReadStream(filePath);
|
|
89
|
-
}
|
|
90
|
-
updateContentType() {
|
|
91
|
-
const stats = fs.statSync(this.filePath);
|
|
92
|
-
this.headers.set("content-length", stats.size.toString());
|
|
93
|
-
const extension = this.filePath.toString().split(".").pop().toLowerCase();
|
|
94
|
-
this.headers.set(
|
|
95
|
-
"content-type",
|
|
96
|
-
CONTENT_TYPE_MAP[extension] ?? "application/octet-stream"
|
|
97
|
-
);
|
|
98
|
-
}
|
|
99
|
-
clone() {
|
|
100
|
-
const response = new _FileResponse(this.filePath);
|
|
101
|
-
response.exists = this.exists;
|
|
102
|
-
response.status = this.status;
|
|
103
|
-
response.statusText = this.statusText;
|
|
104
|
-
response.headers = new Headers(this.headers);
|
|
105
|
-
return response;
|
|
106
|
-
}
|
|
107
|
-
async arrayBuffer() {
|
|
108
|
-
return fs.promises.readFile(this.filePath).then((buffer) => buffer.buffer);
|
|
109
|
-
}
|
|
110
|
-
async blob() {
|
|
111
|
-
return new Blob([await this.arrayBuffer()], {
|
|
112
|
-
type: this.headers.get("content-type")
|
|
113
|
-
});
|
|
114
|
-
}
|
|
115
|
-
async text() {
|
|
116
|
-
return decoder.decode(await this.arrayBuffer());
|
|
117
|
-
}
|
|
118
|
-
async json() {
|
|
119
|
-
return JSON.parse(await this.text());
|
|
120
|
-
}
|
|
121
|
-
};
|
|
122
90
|
|
|
123
91
|
// packages/worker/set-transformers-config.ts
|
|
124
92
|
function setTransformersConfig(options) {
|
package/worker/reranker.mjs.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../packages/worker/reranker/index.ts", "../../packages/worker/set-transformers-config.ts", "../../packages/worker/custom-cache.ts"],
|
|
4
|
-
"sourcesContent": ["import {\n AutoModelForSequenceClassification,\n AutoTokenizer,\n} from '@huggingface/transformers';\nimport type {\n Tensor,\n XLMRobertaModel,\n XLMRobertaTokenizer,\n} from '@huggingface/transformers';\n\nimport { InitOptions, setTransformersConfig } from '../set-transformers-config';\n\nclass ReRanderService {\n init = async (options: InitOptions) => {\n if (!this.model || !this.tokenizer) {\n await this.#downloadOnly(options);\n }\n return true;\n };\n convert = async (input: { value: string; docs: string[] }) => {\n const inputs = this.tokenizer!(\n new Array(input.docs.length).fill(input.value),\n {\n text_pair: input.docs,\n padding: true,\n truncation: true,\n },\n );\n const { logits } = await this.model!(inputs);\n return (logits as Tensor)\n .sigmoid()\n .tolist()\n .map(([score], i: number) => ({\n index: i,\n score,\n }))\n .sort((a, b) => b.score - a.score);\n };\n tokenizer?: XLMRobertaTokenizer;\n model?: XLMRobertaModel;\n async #downloadOnly(options: InitOptions) {\n setTransformersConfig(options);\n\n this.tokenizer = await AutoTokenizer.from_pretrained(options.modelName);\n this.model = await AutoModelForSequenceClassification.from_pretrained(\n options.modelName,\n {\n ...options.options,\n } as any,\n );\n }\n}\nconst instance = new ReRanderService();\nconst init = instance.init;\nconst convert = instance.convert;\nexport { init, convert };\n", "import { env, pipeline } from '@huggingface/transformers';\nimport { MessagePort } from 'worker_threads';\nimport { FileProxyCache } from './custom-cache';\nimport type { DownloadConfigType } from '@cyia/external-call';\ntype PipeLineOptions = Partial<NonNullable<Parameters<typeof pipeline>[2]>>;\nexport interface InitOptions {\n /** 文件夹 */\n dir: string;\n /** 模型 */\n modelName: string;\n /** 模型参数 */\n options: PipeLineOptions;\n /**直接链接 */\n remoteHost: string;\n downloadConfig?: DownloadConfigType;\n port?: MessagePort;\n hfToken?: string;\n}\nexport function setTransformersConfig(options: InitOptions) {\n env.useFS = false;\n env.localModelPath = options.dir;\n env.allowLocalModels = false;\n env.allowRemoteModels = true;\n env.cacheDir = options.dir;\n env.customCache = new FileProxyCache(options);\n env.useBrowserCache = false;\n env.useFSCache = true;\n env.useCustomCache = true;\n\n env.remoteHost = `https://${options.remoteHost}`;\n}\n", "import { env } from '@huggingface/transformers';\nimport { createNormalizeVfs, path } from '@cyia/vfs2';\nimport { downloadFile } from '@cyia/dl';\nimport fs from 'fs';\nimport { InitOptions } from './set-transformers-config';\nexport interface NodeProxy {\n match: (request: string) => Promise<ArrayBuffer | undefined>;\n put: (request: string, arraybuffer: ArrayBuffer) => Promise<void>;\n}\nexport class FileProxyCache {\n #path;\n #vfs;\n #sendMessage;\n #modelName;\n #downloadConfig;\n #initOptions;\n constructor(initOptions: InitOptions) {\n this.#modelName = initOptions.modelName;\n this.#sendMessage = (message: any) => {\n initOptions.port?.postMessage({ type: 'progress', message });\n };\n this.#path = initOptions.dir;\n this.#vfs = createNormalizeVfs({ dir: initOptions.dir });\n this.#downloadConfig = initOptions.downloadConfig;\n this.#initOptions = initOptions;\n }\n async match(request: string): Promise<
|
|
5
|
-
"mappings": ";AAAA;AAAA,EACE;AAAA,EACA;AAAA,OACK;;;ACHP,SAAS,OAAAA,YAAqB;;;ACA9B,SAAS,WAAW;AACpB,SAAS,oBAAoB,YAAY;AACzC,SAAS,oBAAoB;AAC7B,OAAO,QAAQ;AAMR,IAAM,iBAAN,MAAqB;AAAA,EAC1B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY,aAA0B;AACpC,SAAK,aAAa,YAAY;AAC9B,SAAK,eAAe,CAAC,YAAiB;AACpC,kBAAY,MAAM,YAAY,EAAE,MAAM,YAAY,QAAQ,CAAC;AAAA,IAC7D;AACA,SAAK,QAAQ,YAAY;AACzB,SAAK,OAAO,mBAAmB,EAAE,KAAK,YAAY,IAAI,CAAC;AACvD,SAAK,kBAAkB,YAAY;AACnC,SAAK,eAAe;AAAA,EACtB;AAAA,EACA,MAAM,MAAM,
|
|
4
|
+
"sourcesContent": ["import {\n AutoModelForSequenceClassification,\n AutoTokenizer,\n} from '@huggingface/transformers';\nimport type {\n Tensor,\n XLMRobertaModel,\n XLMRobertaTokenizer,\n} from '@huggingface/transformers';\n\nimport { InitOptions, setTransformersConfig } from '../set-transformers-config';\n\nclass ReRanderService {\n init = async (options: InitOptions) => {\n if (!this.model || !this.tokenizer) {\n await this.#downloadOnly(options);\n }\n return true;\n };\n convert = async (input: { value: string; docs: string[] }) => {\n const inputs = this.tokenizer!(\n new Array(input.docs.length).fill(input.value),\n {\n text_pair: input.docs,\n padding: true,\n truncation: true,\n },\n );\n const { logits } = await this.model!(inputs);\n return (logits as Tensor)\n .sigmoid()\n .tolist()\n .map(([score], i: number) => ({\n index: i,\n score,\n }))\n .sort((a, b) => b.score - a.score);\n };\n tokenizer?: XLMRobertaTokenizer;\n model?: XLMRobertaModel;\n async #downloadOnly(options: InitOptions) {\n setTransformersConfig(options);\n\n this.tokenizer = await AutoTokenizer.from_pretrained(options.modelName);\n this.model = await AutoModelForSequenceClassification.from_pretrained(\n options.modelName,\n {\n ...options.options,\n } as any,\n );\n }\n}\nconst instance = new ReRanderService();\nconst init = instance.init;\nconst convert = instance.convert;\nexport { init, convert };\n", "import { env, pipeline } from '@huggingface/transformers';\nimport { MessagePort } from 'worker_threads';\nimport { FileProxyCache } from './custom-cache';\nimport type { DownloadConfigType } from '@cyia/external-call';\ntype PipeLineOptions = Partial<NonNullable<Parameters<typeof pipeline>[2]>>;\nexport interface InitOptions {\n /** 文件夹 */\n dir: string;\n /** 模型 */\n modelName: string;\n /** 模型参数 */\n options: PipeLineOptions;\n /**直接链接 */\n remoteHost: string;\n downloadConfig?: DownloadConfigType;\n port?: MessagePort;\n hfToken?: string;\n}\nexport function setTransformersConfig(options: InitOptions) {\n env.useFS = false;\n env.localModelPath = options.dir;\n env.allowLocalModels = false;\n env.allowRemoteModels = true;\n env.cacheDir = options.dir;\n env.customCache = new FileProxyCache(options);\n env.useBrowserCache = false;\n env.useFSCache = true;\n env.useCustomCache = true;\n\n env.remoteHost = `https://${options.remoteHost}`;\n}\n", "import { env } from '@huggingface/transformers';\nimport { createNormalizeVfs, path } from '@cyia/vfs2';\nimport { downloadFile } from '@cyia/dl';\nimport fs from 'fs';\nimport { InitOptions } from './set-transformers-config';\nexport interface NodeProxy {\n match: (request: string) => Promise<ArrayBuffer | undefined>;\n put: (request: string, arraybuffer: ArrayBuffer) => Promise<void>;\n}\nexport class FileProxyCache {\n #path;\n #vfs;\n #sendMessage;\n #modelName;\n #downloadConfig;\n #initOptions;\n constructor(initOptions: InitOptions) {\n this.#modelName = initOptions.modelName;\n this.#sendMessage = (message: any) => {\n initOptions.port?.postMessage({ type: 'progress', message });\n };\n this.#path = initOptions.dir;\n this.#vfs = createNormalizeVfs({ dir: initOptions.dir });\n this.#downloadConfig = initOptions.downloadConfig;\n this.#initOptions = initOptions;\n }\n async #createResponse(filePath: string): Promise<Response> {\n const stats = await fs.promises.stat(filePath);\n const extension = filePath.split('.').pop()!.toLowerCase();\n const contentType =\n (CONTENT_TYPE_MAP as any)[extension] ?? 'application/octet-stream';\n const stream = fs.createReadStream(filePath);\n return new Response(stream, {\n status: 200,\n statusText: 'OK',\n headers: {\n 'content-type': contentType,\n 'content-length': stats.size.toString(),\n },\n });\n }\n async match(request: string): Promise<Response | undefined> {\n let filePath;\n if (request.startsWith('http')) {\n const data = new URL(request);\n filePath = path.join(\n this.#path,\n data.pathname.replace(\n '/' +\n env.remotePathTemplate\n .replaceAll('{model}', this.#modelName)\n .replaceAll('{revision}', encodeURIComponent('main')),\n `/${this.#modelName}/`,\n ),\n );\n\n await downloadFile(request, {\n ...this.#downloadConfig,\n savePath: filePath,\n message: this.#sendMessage,\n headers: {\n token: this.#initOptions?.hfToken ?? '',\n 'software-bbs': 'bbs.shenghuabi.site',\n },\n });\n } else {\n filePath = request;\n }\n const exists = await this.#vfs.exists(filePath);\n if (exists) {\n return this.#createResponse(filePath);\n }\n return undefined;\n }\n\n async put(request: string, response: Response): Promise<void> {\n throw new Error('no put');\n }\n}\nconst CONTENT_TYPE_MAP: Record<string, string> = {\n txt: 'text/plain',\n html: 'text/html',\n css: 'text/css',\n js: 'text/javascript',\n json: 'application/json',\n png: 'image/png',\n jpg: 'image/jpeg',\n jpeg: 'image/jpeg',\n gif: 'image/gif',\n};\n"],
|
|
5
|
+
"mappings": ";AAAA;AAAA,EACE;AAAA,EACA;AAAA,OACK;;;ACHP,SAAS,OAAAA,YAAqB;;;ACA9B,SAAS,WAAW;AACpB,SAAS,oBAAoB,YAAY;AACzC,SAAS,oBAAoB;AAC7B,OAAO,QAAQ;AAMR,IAAM,iBAAN,MAAqB;AAAA,EAC1B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY,aAA0B;AACpC,SAAK,aAAa,YAAY;AAC9B,SAAK,eAAe,CAAC,YAAiB;AACpC,kBAAY,MAAM,YAAY,EAAE,MAAM,YAAY,QAAQ,CAAC;AAAA,IAC7D;AACA,SAAK,QAAQ,YAAY;AACzB,SAAK,OAAO,mBAAmB,EAAE,KAAK,YAAY,IAAI,CAAC;AACvD,SAAK,kBAAkB,YAAY;AACnC,SAAK,eAAe;AAAA,EACtB;AAAA,EACA,MAAM,gBAAgB,UAAqC;AACzD,UAAM,QAAQ,MAAM,GAAG,SAAS,KAAK,QAAQ;AAC7C,UAAM,YAAY,SAAS,MAAM,GAAG,EAAE,IAAI,EAAG,YAAY;AACzD,UAAM,cACH,iBAAyB,SAAS,KAAK;AAC1C,UAAM,SAAS,GAAG,iBAAiB,QAAQ;AAC3C,WAAO,IAAI,SAAS,QAAQ;AAAA,MAC1B,QAAQ;AAAA,MACR,YAAY;AAAA,MACZ,SAAS;AAAA,QACP,gBAAgB;AAAA,QAChB,kBAAkB,MAAM,KAAK,SAAS;AAAA,MACxC;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EACA,MAAM,MAAM,SAAgD;AAC1D,QAAI;AACJ,QAAI,QAAQ,WAAW,MAAM,GAAG;AAC9B,YAAM,OAAO,IAAI,IAAI,OAAO;AAC5B,iBAAW,KAAK;AAAA,QACd,KAAK;AAAA,QACL,KAAK,SAAS;AAAA,UACZ,MACE,IAAI,mBACD,WAAW,WAAW,KAAK,UAAU,EACrC,WAAW,cAAc,mBAAmB,MAAM,CAAC;AAAA,UACxD,IAAI,KAAK,UAAU;AAAA,QACrB;AAAA,MACF;AAEA,YAAM,aAAa,SAAS;AAAA,QAC1B,GAAG,KAAK;AAAA,QACR,UAAU;AAAA,QACV,SAAS,KAAK;AAAA,QACd,SAAS;AAAA,UACP,OAAO,KAAK,cAAc,WAAW;AAAA,UACrC,gBAAgB;AAAA,QAClB;AAAA,MACF,CAAC;AAAA,IACH,OAAO;AACL,iBAAW;AAAA,IACb;AACA,UAAM,SAAS,MAAM,KAAK,KAAK,OAAO,QAAQ;AAC9C,QAAI,QAAQ;AACV,aAAO,KAAK,gBAAgB,QAAQ;AAAA,IACtC;AACA,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,IAAI,SAAiB,UAAmC;AAC5D,UAAM,IAAI,MAAM,QAAQ;AAAA,EAC1B;AACF;AACA,IAAM,mBAA2C;AAAA,EAC/C,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AAAA,EACL,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,KAAK;AAAA,EACL,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AACP;;;ADvEO,SAAS,sBAAsB,SAAsB;AAC1D,EAAAC,KAAI,QAAQ;AACZ,EAAAA,KAAI,iBAAiB,QAAQ;AAC7B,EAAAA,KAAI,mBAAmB;AACvB,EAAAA,KAAI,oBAAoB;AACxB,EAAAA,KAAI,WAAW,QAAQ;AACvB,EAAAA,KAAI,cAAc,IAAI,eAAe,OAAO;AAC5C,EAAAA,KAAI,kBAAkB;AACtB,EAAAA,KAAI,aAAa;AACjB,EAAAA,KAAI,iBAAiB;AAErB,EAAAA,KAAI,aAAa,WAAW,QAAQ,UAAU;AAChD;;;ADlBA,IAAM,kBAAN,MAAsB;AAAA,EACpB,OAAO,OAAO,YAAyB;AACrC,QAAI,CAAC,KAAK,SAAS,CAAC,KAAK,WAAW;AAClC,YAAM,KAAK,cAAc,OAAO;AAAA,IAClC;AACA,WAAO;AAAA,EACT;AAAA,EACA,UAAU,OAAO,UAA6C;AAC5D,UAAM,SAAS,KAAK;AAAA,MAClB,IAAI,MAAM,MAAM,KAAK,MAAM,EAAE,KAAK,MAAM,KAAK;AAAA,MAC7C;AAAA,QACE,WAAW,MAAM;AAAA,QACjB,SAAS;AAAA,QACT,YAAY;AAAA,MACd;AAAA,IACF;AACA,UAAM,EAAE,OAAO,IAAI,MAAM,KAAK,MAAO,MAAM;AAC3C,WAAQ,OACL,QAAQ,EACR,OAAO,EACP,IAAI,CAAC,CAAC,KAAK,GAAG,OAAe;AAAA,MAC5B,OAAO;AAAA,MACP;AAAA,IACF,EAAE,EACD,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EACA,MAAM,cAAc,SAAsB;AACxC,0BAAsB,OAAO;AAE7B,SAAK,YAAY,MAAM,cAAc,gBAAgB,QAAQ,SAAS;AACtE,SAAK,QAAQ,MAAM,mCAAmC;AAAA,MACpD,QAAQ;AAAA,MACR;AAAA,QACE,GAAG,QAAQ;AAAA,MACb;AAAA,IACF;AAAA,EACF;AACF;AACA,IAAM,WAAW,IAAI,gBAAgB;AACrC,IAAM,OAAO,SAAS;AACtB,IAAM,UAAU,SAAS;",
|
|
6
6
|
"names": ["env", "env"]
|
|
7
7
|
}
|
package/worker/text2vec.mjs
CHANGED
|
@@ -26,6 +26,20 @@ var FileProxyCache = class {
|
|
|
26
26
|
this.#downloadConfig = initOptions.downloadConfig;
|
|
27
27
|
this.#initOptions = initOptions;
|
|
28
28
|
}
|
|
29
|
+
async #createResponse(filePath) {
|
|
30
|
+
const stats = await fs.promises.stat(filePath);
|
|
31
|
+
const extension = filePath.split(".").pop().toLowerCase();
|
|
32
|
+
const contentType = CONTENT_TYPE_MAP[extension] ?? "application/octet-stream";
|
|
33
|
+
const stream = fs.createReadStream(filePath);
|
|
34
|
+
return new Response(stream, {
|
|
35
|
+
status: 200,
|
|
36
|
+
statusText: "OK",
|
|
37
|
+
headers: {
|
|
38
|
+
"content-type": contentType,
|
|
39
|
+
"content-length": stats.size.toString()
|
|
40
|
+
}
|
|
41
|
+
});
|
|
42
|
+
}
|
|
29
43
|
async match(request) {
|
|
30
44
|
let filePath;
|
|
31
45
|
if (request.startsWith("http")) {
|
|
@@ -51,7 +65,7 @@ var FileProxyCache = class {
|
|
|
51
65
|
}
|
|
52
66
|
const exists = await this.#vfs.exists(filePath);
|
|
53
67
|
if (exists) {
|
|
54
|
-
return
|
|
68
|
+
return this.#createResponse(filePath);
|
|
55
69
|
}
|
|
56
70
|
return void 0;
|
|
57
71
|
}
|
|
@@ -59,7 +73,6 @@ var FileProxyCache = class {
|
|
|
59
73
|
throw new Error("no put");
|
|
60
74
|
}
|
|
61
75
|
};
|
|
62
|
-
var decoder = new TextDecoder("utf-8");
|
|
63
76
|
var CONTENT_TYPE_MAP = {
|
|
64
77
|
txt: "text/plain",
|
|
65
78
|
html: "text/html",
|
|
@@ -71,51 +84,6 @@ var CONTENT_TYPE_MAP = {
|
|
|
71
84
|
jpeg: "image/jpeg",
|
|
72
85
|
gif: "image/gif"
|
|
73
86
|
};
|
|
74
|
-
var FileResponse = class _FileResponse {
|
|
75
|
-
filePath;
|
|
76
|
-
headers;
|
|
77
|
-
exists = true;
|
|
78
|
-
status = 200;
|
|
79
|
-
statusText = "OK";
|
|
80
|
-
body;
|
|
81
|
-
constructor(filePath) {
|
|
82
|
-
this.filePath = filePath;
|
|
83
|
-
this.headers = new Headers();
|
|
84
|
-
this.updateContentType();
|
|
85
|
-
this.body = fs.createReadStream(filePath);
|
|
86
|
-
}
|
|
87
|
-
updateContentType() {
|
|
88
|
-
const stats = fs.statSync(this.filePath);
|
|
89
|
-
this.headers.set("content-length", stats.size.toString());
|
|
90
|
-
const extension = this.filePath.toString().split(".").pop().toLowerCase();
|
|
91
|
-
this.headers.set(
|
|
92
|
-
"content-type",
|
|
93
|
-
CONTENT_TYPE_MAP[extension] ?? "application/octet-stream"
|
|
94
|
-
);
|
|
95
|
-
}
|
|
96
|
-
clone() {
|
|
97
|
-
const response = new _FileResponse(this.filePath);
|
|
98
|
-
response.exists = this.exists;
|
|
99
|
-
response.status = this.status;
|
|
100
|
-
response.statusText = this.statusText;
|
|
101
|
-
response.headers = new Headers(this.headers);
|
|
102
|
-
return response;
|
|
103
|
-
}
|
|
104
|
-
async arrayBuffer() {
|
|
105
|
-
return fs.promises.readFile(this.filePath).then((buffer) => buffer.buffer);
|
|
106
|
-
}
|
|
107
|
-
async blob() {
|
|
108
|
-
return new Blob([await this.arrayBuffer()], {
|
|
109
|
-
type: this.headers.get("content-type")
|
|
110
|
-
});
|
|
111
|
-
}
|
|
112
|
-
async text() {
|
|
113
|
-
return decoder.decode(await this.arrayBuffer());
|
|
114
|
-
}
|
|
115
|
-
async json() {
|
|
116
|
-
return JSON.parse(await this.text());
|
|
117
|
-
}
|
|
118
|
-
};
|
|
119
87
|
|
|
120
88
|
// packages/worker/set-transformers-config.ts
|
|
121
89
|
function setTransformersConfig(options) {
|
package/worker/text2vec.mjs.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../packages/worker/text2vec/index.ts", "../../packages/worker/set-transformers-config.ts", "../../packages/worker/custom-cache.ts"],
|
|
4
|
-
"sourcesContent": ["import { pipeline } from '@huggingface/transformers';\nimport type { FeatureExtractionPipeline } from '@huggingface/transformers';\n\nimport { InitOptions, setTransformersConfig } from '../set-transformers-config';\nfunction qwen3ToVec(\n extractor: FeatureExtractionPipeline,\n query: string[],\n description = 'Given a web search query, retrieve relevant passages that answer the query',\n) {\n return extractor!(\n query.map((item) => `Instruct: ${description}\\nQuery:${item}`),\n {\n pooling: 'last_token',\n normalize: true,\n },\n );\n}\n\nclass Text2VecService {\n #extractor!: FeatureExtractionPipeline | undefined;\n constructor() {}\n init = async (options: InitOptions) => {\n if (this.#extractor) {\n return true;\n }\n this.#extractor = await this.#downloadOnly(options);\n return !!this.#extractor;\n };\n convert = async (\n input: {\n value: string | string[];\n mode?: 'qwen3';\n taskDescription?: string;\n } & InitOptions,\n ) => {\n if (!this.#extractor) {\n await this.init(input);\n }\n const inputList =\n typeof input.value === 'string' ? [input.value] : input.value;\n let result;\n if (input.mode === 'qwen3') {\n result = qwen3ToVec(this.#extractor!, inputList, input.taskDescription);\n } else {\n result = this.#extractor!(inputList, {\n pooling: 'mean',\n normalize: true,\n });\n }\n return result.then((result) => {\n const list = result.tolist();\n return typeof input.value === 'string' ? list[0] : list;\n });\n };\n async #downloadOnly(options: InitOptions) {\n setTransformersConfig(options);\n\n return await pipeline(\n 'feature-extraction',\n options.modelName,\n options.options,\n );\n }\n getSize = () => (this.#extractor!.model.config as any).hidden_size;\n}\nconst instance = new Text2VecService();\nconst init = instance.init;\nconst getSize = instance.getSize;\nconst convert = instance.convert;\nexport { init, getSize, convert };\n", "import { env, pipeline } from '@huggingface/transformers';\nimport { MessagePort } from 'worker_threads';\nimport { FileProxyCache } from './custom-cache';\nimport type { DownloadConfigType } from '@cyia/external-call';\ntype PipeLineOptions = Partial<NonNullable<Parameters<typeof pipeline>[2]>>;\nexport interface InitOptions {\n /** 文件夹 */\n dir: string;\n /** 模型 */\n modelName: string;\n /** 模型参数 */\n options: PipeLineOptions;\n /**直接链接 */\n remoteHost: string;\n downloadConfig?: DownloadConfigType;\n port?: MessagePort;\n hfToken?: string;\n}\nexport function setTransformersConfig(options: InitOptions) {\n env.useFS = false;\n env.localModelPath = options.dir;\n env.allowLocalModels = false;\n env.allowRemoteModels = true;\n env.cacheDir = options.dir;\n env.customCache = new FileProxyCache(options);\n env.useBrowserCache = false;\n env.useFSCache = true;\n env.useCustomCache = true;\n\n env.remoteHost = `https://${options.remoteHost}`;\n}\n", "import { env } from '@huggingface/transformers';\nimport { createNormalizeVfs, path } from '@cyia/vfs2';\nimport { downloadFile } from '@cyia/dl';\nimport fs from 'fs';\nimport { InitOptions } from './set-transformers-config';\nexport interface NodeProxy {\n match: (request: string) => Promise<ArrayBuffer | undefined>;\n put: (request: string, arraybuffer: ArrayBuffer) => Promise<void>;\n}\nexport class FileProxyCache {\n #path;\n #vfs;\n #sendMessage;\n #modelName;\n #downloadConfig;\n #initOptions;\n constructor(initOptions: InitOptions) {\n this.#modelName = initOptions.modelName;\n this.#sendMessage = (message: any) => {\n initOptions.port?.postMessage({ type: 'progress', message });\n };\n this.#path = initOptions.dir;\n this.#vfs = createNormalizeVfs({ dir: initOptions.dir });\n this.#downloadConfig = initOptions.downloadConfig;\n this.#initOptions = initOptions;\n }\n async match(request: string): Promise<
|
|
5
|
-
"mappings": ";AAAA,SAAS,YAAAA,iBAAgB;;;ACAzB,SAAS,OAAAC,YAAqB;;;ACA9B,SAAS,WAAW;AACpB,SAAS,oBAAoB,YAAY;AACzC,SAAS,oBAAoB;AAC7B,OAAO,QAAQ;AAMR,IAAM,iBAAN,MAAqB;AAAA,EAC1B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY,aAA0B;AACpC,SAAK,aAAa,YAAY;AAC9B,SAAK,eAAe,CAAC,YAAiB;AACpC,kBAAY,MAAM,YAAY,EAAE,MAAM,YAAY,QAAQ,CAAC;AAAA,IAC7D;AACA,SAAK,QAAQ,YAAY;AACzB,SAAK,OAAO,mBAAmB,EAAE,KAAK,YAAY,IAAI,CAAC;AACvD,SAAK,kBAAkB,YAAY;AACnC,SAAK,eAAe;AAAA,EACtB;AAAA,EACA,MAAM,MAAM,
|
|
4
|
+
"sourcesContent": ["import { pipeline } from '@huggingface/transformers';\nimport type { FeatureExtractionPipeline } from '@huggingface/transformers';\n\nimport { InitOptions, setTransformersConfig } from '../set-transformers-config';\nfunction qwen3ToVec(\n extractor: FeatureExtractionPipeline,\n query: string[],\n description = 'Given a web search query, retrieve relevant passages that answer the query',\n) {\n return extractor!(\n query.map((item) => `Instruct: ${description}\\nQuery:${item}`),\n {\n pooling: 'last_token',\n normalize: true,\n },\n );\n}\n\nclass Text2VecService {\n #extractor!: FeatureExtractionPipeline | undefined;\n constructor() {}\n init = async (options: InitOptions) => {\n if (this.#extractor) {\n return true;\n }\n this.#extractor = await this.#downloadOnly(options);\n return !!this.#extractor;\n };\n convert = async (\n input: {\n value: string | string[];\n mode?: 'qwen3';\n taskDescription?: string;\n } & InitOptions,\n ) => {\n if (!this.#extractor) {\n await this.init(input);\n }\n const inputList =\n typeof input.value === 'string' ? [input.value] : input.value;\n let result;\n if (input.mode === 'qwen3') {\n result = qwen3ToVec(this.#extractor!, inputList, input.taskDescription);\n } else {\n result = this.#extractor!(inputList, {\n pooling: 'mean',\n normalize: true,\n });\n }\n return result.then((result) => {\n const list = result.tolist();\n return typeof input.value === 'string' ? list[0] : list;\n });\n };\n async #downloadOnly(options: InitOptions) {\n setTransformersConfig(options);\n\n return await pipeline(\n 'feature-extraction',\n options.modelName,\n options.options,\n );\n }\n getSize = () => (this.#extractor!.model.config as any).hidden_size;\n}\nconst instance = new Text2VecService();\nconst init = instance.init;\nconst getSize = instance.getSize;\nconst convert = instance.convert;\nexport { init, getSize, convert };\n", "import { env, pipeline } from '@huggingface/transformers';\nimport { MessagePort } from 'worker_threads';\nimport { FileProxyCache } from './custom-cache';\nimport type { DownloadConfigType } from '@cyia/external-call';\ntype PipeLineOptions = Partial<NonNullable<Parameters<typeof pipeline>[2]>>;\nexport interface InitOptions {\n /** 文件夹 */\n dir: string;\n /** 模型 */\n modelName: string;\n /** 模型参数 */\n options: PipeLineOptions;\n /**直接链接 */\n remoteHost: string;\n downloadConfig?: DownloadConfigType;\n port?: MessagePort;\n hfToken?: string;\n}\nexport function setTransformersConfig(options: InitOptions) {\n env.useFS = false;\n env.localModelPath = options.dir;\n env.allowLocalModels = false;\n env.allowRemoteModels = true;\n env.cacheDir = options.dir;\n env.customCache = new FileProxyCache(options);\n env.useBrowserCache = false;\n env.useFSCache = true;\n env.useCustomCache = true;\n\n env.remoteHost = `https://${options.remoteHost}`;\n}\n", "import { env } from '@huggingface/transformers';\nimport { createNormalizeVfs, path } from '@cyia/vfs2';\nimport { downloadFile } from '@cyia/dl';\nimport fs from 'fs';\nimport { InitOptions } from './set-transformers-config';\nexport interface NodeProxy {\n match: (request: string) => Promise<ArrayBuffer | undefined>;\n put: (request: string, arraybuffer: ArrayBuffer) => Promise<void>;\n}\nexport class FileProxyCache {\n #path;\n #vfs;\n #sendMessage;\n #modelName;\n #downloadConfig;\n #initOptions;\n constructor(initOptions: InitOptions) {\n this.#modelName = initOptions.modelName;\n this.#sendMessage = (message: any) => {\n initOptions.port?.postMessage({ type: 'progress', message });\n };\n this.#path = initOptions.dir;\n this.#vfs = createNormalizeVfs({ dir: initOptions.dir });\n this.#downloadConfig = initOptions.downloadConfig;\n this.#initOptions = initOptions;\n }\n async #createResponse(filePath: string): Promise<Response> {\n const stats = await fs.promises.stat(filePath);\n const extension = filePath.split('.').pop()!.toLowerCase();\n const contentType =\n (CONTENT_TYPE_MAP as any)[extension] ?? 'application/octet-stream';\n const stream = fs.createReadStream(filePath);\n return new Response(stream, {\n status: 200,\n statusText: 'OK',\n headers: {\n 'content-type': contentType,\n 'content-length': stats.size.toString(),\n },\n });\n }\n async match(request: string): Promise<Response | undefined> {\n let filePath;\n if (request.startsWith('http')) {\n const data = new URL(request);\n filePath = path.join(\n this.#path,\n data.pathname.replace(\n '/' +\n env.remotePathTemplate\n .replaceAll('{model}', this.#modelName)\n .replaceAll('{revision}', encodeURIComponent('main')),\n `/${this.#modelName}/`,\n ),\n );\n\n await downloadFile(request, {\n ...this.#downloadConfig,\n savePath: filePath,\n message: this.#sendMessage,\n headers: {\n token: this.#initOptions?.hfToken ?? '',\n 'software-bbs': 'bbs.shenghuabi.site',\n },\n });\n } else {\n filePath = request;\n }\n const exists = await this.#vfs.exists(filePath);\n if (exists) {\n return this.#createResponse(filePath);\n }\n return undefined;\n }\n\n async put(request: string, response: Response): Promise<void> {\n throw new Error('no put');\n }\n}\nconst CONTENT_TYPE_MAP: Record<string, string> = {\n txt: 'text/plain',\n html: 'text/html',\n css: 'text/css',\n js: 'text/javascript',\n json: 'application/json',\n png: 'image/png',\n jpg: 'image/jpeg',\n jpeg: 'image/jpeg',\n gif: 'image/gif',\n};\n"],
|
|
5
|
+
"mappings": ";AAAA,SAAS,YAAAA,iBAAgB;;;ACAzB,SAAS,OAAAC,YAAqB;;;ACA9B,SAAS,WAAW;AACpB,SAAS,oBAAoB,YAAY;AACzC,SAAS,oBAAoB;AAC7B,OAAO,QAAQ;AAMR,IAAM,iBAAN,MAAqB;AAAA,EAC1B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY,aAA0B;AACpC,SAAK,aAAa,YAAY;AAC9B,SAAK,eAAe,CAAC,YAAiB;AACpC,kBAAY,MAAM,YAAY,EAAE,MAAM,YAAY,QAAQ,CAAC;AAAA,IAC7D;AACA,SAAK,QAAQ,YAAY;AACzB,SAAK,OAAO,mBAAmB,EAAE,KAAK,YAAY,IAAI,CAAC;AACvD,SAAK,kBAAkB,YAAY;AACnC,SAAK,eAAe;AAAA,EACtB;AAAA,EACA,MAAM,gBAAgB,UAAqC;AACzD,UAAM,QAAQ,MAAM,GAAG,SAAS,KAAK,QAAQ;AAC7C,UAAM,YAAY,SAAS,MAAM,GAAG,EAAE,IAAI,EAAG,YAAY;AACzD,UAAM,cACH,iBAAyB,SAAS,KAAK;AAC1C,UAAM,SAAS,GAAG,iBAAiB,QAAQ;AAC3C,WAAO,IAAI,SAAS,QAAQ;AAAA,MAC1B,QAAQ;AAAA,MACR,YAAY;AAAA,MACZ,SAAS;AAAA,QACP,gBAAgB;AAAA,QAChB,kBAAkB,MAAM,KAAK,SAAS;AAAA,MACxC;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EACA,MAAM,MAAM,SAAgD;AAC1D,QAAI;AACJ,QAAI,QAAQ,WAAW,MAAM,GAAG;AAC9B,YAAM,OAAO,IAAI,IAAI,OAAO;AAC5B,iBAAW,KAAK;AAAA,QACd,KAAK;AAAA,QACL,KAAK,SAAS;AAAA,UACZ,MACE,IAAI,mBACD,WAAW,WAAW,KAAK,UAAU,EACrC,WAAW,cAAc,mBAAmB,MAAM,CAAC;AAAA,UACxD,IAAI,KAAK,UAAU;AAAA,QACrB;AAAA,MACF;AAEA,YAAM,aAAa,SAAS;AAAA,QAC1B,GAAG,KAAK;AAAA,QACR,UAAU;AAAA,QACV,SAAS,KAAK;AAAA,QACd,SAAS;AAAA,UACP,OAAO,KAAK,cAAc,WAAW;AAAA,UACrC,gBAAgB;AAAA,QAClB;AAAA,MACF,CAAC;AAAA,IACH,OAAO;AACL,iBAAW;AAAA,IACb;AACA,UAAM,SAAS,MAAM,KAAK,KAAK,OAAO,QAAQ;AAC9C,QAAI,QAAQ;AACV,aAAO,KAAK,gBAAgB,QAAQ;AAAA,IACtC;AACA,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,IAAI,SAAiB,UAAmC;AAC5D,UAAM,IAAI,MAAM,QAAQ;AAAA,EAC1B;AACF;AACA,IAAM,mBAA2C;AAAA,EAC/C,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AAAA,EACL,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,KAAK;AAAA,EACL,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AACP;;;ADvEO,SAAS,sBAAsB,SAAsB;AAC1D,EAAAC,KAAI,QAAQ;AACZ,EAAAA,KAAI,iBAAiB,QAAQ;AAC7B,EAAAA,KAAI,mBAAmB;AACvB,EAAAA,KAAI,oBAAoB;AACxB,EAAAA,KAAI,WAAW,QAAQ;AACvB,EAAAA,KAAI,cAAc,IAAI,eAAe,OAAO;AAC5C,EAAAA,KAAI,kBAAkB;AACtB,EAAAA,KAAI,aAAa;AACjB,EAAAA,KAAI,iBAAiB;AAErB,EAAAA,KAAI,aAAa,WAAW,QAAQ,UAAU;AAChD;;;AD1BA,SAAS,WACP,WACA,OACA,cAAc,8EACd;AACA,SAAO;AAAA,IACL,MAAM,IAAI,CAAC,SAAS,aAAa,WAAW;AAAA,QAAW,IAAI,EAAE;AAAA,IAC7D;AAAA,MACE,SAAS;AAAA,MACT,WAAW;AAAA,IACb;AAAA,EACF;AACF;AAEA,IAAM,kBAAN,MAAsB;AAAA,EACpB;AAAA,EACA,cAAc;AAAA,EAAC;AAAA,EACf,OAAO,OAAO,YAAyB;AACrC,QAAI,KAAK,YAAY;AACnB,aAAO;AAAA,IACT;AACA,SAAK,aAAa,MAAM,KAAK,cAAc,OAAO;AAClD,WAAO,CAAC,CAAC,KAAK;AAAA,EAChB;AAAA,EACA,UAAU,OACR,UAKG;AACH,QAAI,CAAC,KAAK,YAAY;AACpB,YAAM,KAAK,KAAK,KAAK;AAAA,IACvB;AACA,UAAM,YACJ,OAAO,MAAM,UAAU,WAAW,CAAC,MAAM,KAAK,IAAI,MAAM;AAC1D,QAAI;AACJ,QAAI,MAAM,SAAS,SAAS;AAC1B,eAAS,WAAW,KAAK,YAAa,WAAW,MAAM,eAAe;AAAA,IACxE,OAAO;AACL,eAAS,KAAK,WAAY,WAAW;AAAA,QACnC,SAAS;AAAA,QACT,WAAW;AAAA,MACb,CAAC;AAAA,IACH;AACA,WAAO,OAAO,KAAK,CAACC,YAAW;AAC7B,YAAM,OAAOA,QAAO,OAAO;AAC3B,aAAO,OAAO,MAAM,UAAU,WAAW,KAAK,CAAC,IAAI;AAAA,IACrD,CAAC;AAAA,EACH;AAAA,EACA,MAAM,cAAc,SAAsB;AACxC,0BAAsB,OAAO;AAE7B,WAAO,MAAMC;AAAA,MACX;AAAA,MACA,QAAQ;AAAA,MACR,QAAQ;AAAA,IACV;AAAA,EACF;AAAA,EACA,UAAU,MAAO,KAAK,WAAY,MAAM,OAAe;AACzD;AACA,IAAM,WAAW,IAAI,gBAAgB;AACrC,IAAM,OAAO,SAAS;AACtB,IAAM,UAAU,SAAS;AACzB,IAAM,UAAU,SAAS;",
|
|
6
6
|
"names": ["pipeline", "env", "env", "result", "pipeline"]
|
|
7
7
|
}
|