@shenghuabi/knowledge 1.0.21 → 1.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/file-parser.mjs.map +1 -1
- package/image.mjs.map +1 -1
- package/ocr.mjs +52 -52
- package/ocr.mjs.map +1 -1
- package/package.json +4 -4
- package/worker/ocr/index.d.ts +1 -1
- package/worker/ocr.mjs +1 -1
- package/worker/ocr.mjs.map +2 -2
package/file-parser.mjs.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../packages/file-parser/file-parser.service.ts", "../packages/file-parser/text-analyse.ts", "../packages/file-parser/text-parser.ts", "../packages/file-parser/const.ts", "../packages/file-parser/document-file-parser.service.ts", "../packages/file-parser/document-loader/xlsx.loader.ts", "../packages/file-parser/document-loader/pdf-img.loader.ts", "../packages/file-parser/dict/dict-format/stardict-parse.service.ts", "../packages/file-parser/dict/dict-format/mdict-parse.service.ts", "../packages/file-parser/dict/dict.service.ts", "../packages/file-parser/dict/dict-format/yaml-parse.service.ts", "../packages/file-parser/dict/dict-format/dsl/dsl-parse.service.ts", "../packages/file-parser/dict/dict-format/dsl/dsl.format.ts", "../packages/file-parser/vl-parser/markdown.parser.ts"],
|
|
4
|
-
"sourcesContent": ["import { fileTypeFromBuffer } from 'file-type';\nimport { inject, RootStaticInjectOptions } from 'static-injector';\nimport { path } from '@cyia/vfs2';\nimport { bufferDecodeToText as textParse } from './text-parser';\nimport { FileParser, FileParserToken } from './const';\nimport {\n DocumentFileParserService,\n FormatedData,\n} from './document-file-parser.service';\n\nexport class FileParserService\n extends RootStaticInjectOptions\n implements FileParser\n{\n priority: number = 0;\n #parserList = inject(FileParserToken, { optional: true })\n ?.slice()\n .sort((a, b) => a.priority - b.priority) ?? [\n inject(DocumentFileParserService),\n ];\n async parse(fileName: string, buffer: Uint8Array | ArrayBuffer) {\n const baseName = path.basename(fileName, path.extname(fileName));\n let type;\n try {\n type = await fileTypeFromBuffer(buffer);\n } catch (error) {\n throw new Error(`文件:[${fileName}]类型解析失败`, {\n cause: error,\n });\n }\n for (const item of this.#parserList) {\n const result = await item.parse(fileName, buffer, type);\n if (result) {\n return result;\n }\n }\n // 尝试以文本形式解析\n return [\n { title: baseName, content: textParse(new Uint8Array(buffer)) },\n ] as FormatedData[];\n }\n\n /** 用于支持工作流读文件 */\n parseOne(fileName: string, buffer: Uint8Array | ArrayBuffer) {\n return this.parse(fileName, buffer).then((list) => {\n return {\n content: list?.map((item) => item.content).join('\\n'),\n parseTo: list[0].parseTo,\n };\n });\n }\n}\n", "import { analyse, Match } from 'chardet';\nfunction resultWeight(item: Match) {\n let offset = 0;\n if (item.lang === 'zh') {\n offset++;\n if (item.name === 'GB18030') {\n offset += 2;\n } else if (item.name === 'Big5') {\n offset++;\n }\n }\n return item.confidence + offset;\n}\n\nexport function textAnalyse(buffer: Uint8Array) {\n return analyse(buffer).sort((a, b) => resultWeight(b) - resultWeight(a))[0]\n .name;\n}\n", "import { textAnalyse } from './text-analyse';\n\nexport function bufferDecodeToText(\n buffer: Uint8Array,\n metadata?: { path?: string },\n) {\n const type = textAnalyse(buffer);\n const decoder = new TextDecoder(type, { fatal: true });\n try {\n return decoder.decode(buffer);\n } catch (error) {\n throw new Error(`尝试使用 ${type} 编码解析失败;${metadata?.path ?? ''}`);\n }\n}\n", "import { FileTypeResult } from 'file-type';\nimport { InjectionToken, Signal } from 'static-injector';\nimport { FormatedData } from './document-file-parser.service';\n\nexport interface FileParser {\n priority: number;\n parse: (\n fileName: string,\n buffer: Uint8Array | ArrayBuffer,\n type: FileTypeResult | undefined,\n ) => Promise<FormatedData[] | undefined>;\n}\nexport const FileParserToken = new InjectionToken<FileParser[]>('FileParser');\nexport const ImageParserToken = new InjectionToken<\n (\n assetPath: string,\n prefix: string,\n buffer: Buffer<ArrayBufferLike>,\n ) => Promise<{\n content: string;\n parseTo: string;\n // assets: any[];\n }>\n>('ImageParserToken');\nexport const DocumentParserConfigToken = new InjectionToken<\n Signal<{\n pdfAsImage?: {\n enable?: boolean;\n viewPortOptions?: {\n scale?: number;\n };\n };\n }>\n>('DocumentParserConfigToken');\n", "import { FileTypeResult } from 'file-type';\nimport { inject, Injector, RootStaticInjectOptions } from 'static-injector';\nimport { EPubLoader } from '@langchain/community/document_loaders/fs/epub';\nimport { DocxLoader } from '@langchain/community/document_loaders/fs/docx';\nimport { CSVLoader } from '@langchain/community/document_loaders/fs/csv';\nimport { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';\nimport { PPTXLoader } from '@langchain/community/document_loaders/fs/pptx';\nimport { SRTLoader } from '@langchain/community/document_loaders/fs/srt';\nimport type { Document } from '@langchain/core/documents';\nimport { Blob } from 'node:buffer';\nimport { path } from '@cyia/vfs2';\nimport { xlsxLoader } from './document-loader/xlsx.loader';\nimport { DocumentParserConfigToken } from './const';\nimport { pdfImageLoader } from './document-loader/pdf-img.loader';\nexport type ParsedData = Document<Record<string, any>> & { parseTo?: string };\nexport type FormatedData = {\n title: any;\n content: string;\n parseTo?: string;\n}\nexport class DocumentFileParserService extends RootStaticInjectOptions {\n #documentParser = inject(DocumentParserConfigToken, { optional: true });\n #injector = inject(Injector);\n\n async parse(\n filePath: string,\n buffer: Uint8Array | ArrayBuffer,\n type: FileTypeResult | undefined,\n ): Promise<FormatedData[] | undefined> {\n try {\n const baseName = path.basename(filePath, path.extname(filePath));\n const blob = new Blob([buffer]);\n if (filePath.endsWith('.srt')) {\n const instance = new SRTLoader(blob);\n const result = await instance.load();\n return this.#formatResult(result, baseName);\n } else if (\n type?.ext === 'pptx' ||\n type?.ext === 'odt' ||\n type?.ext === 'odp' ||\n type?.ext === 'ods'\n ) {\n const instance = new PPTXLoader(blob);\n const result = await instance.load();\n return this.#formatResult(result, baseName);\n } else if (type?.ext === 'pdf') {\n let result;\n const useImage = this.#documentParser?.().pdfAsImage?.enable;\n if (useImage) {\n result = await pdfImageLoader(buffer, filePath, this.#injector);\n } else {\n const instance = new PDFLoader(blob);\n result = await instance.load();\n }\n return this.#formatResult(result, baseName);\n } else if (filePath.endsWith('.csv')) {\n const instance = new CSVLoader(blob);\n const result = await instance.load();\n return this.#formatResult(result, baseName);\n } else if (type?.ext === 'docx') {\n const instance = new DocxLoader(blob);\n const result = await instance.load();\n return this.#formatResult(result, baseName);\n } else if (type?.ext === 'xlsx') {\n const result = await xlsxLoader(buffer);\n return this.#formatResult(result, baseName);\n } else if (type?.ext === 'epub') {\n const instance = new EPubLoader(filePath);\n const result = await instance.load();\n return result\n .filter((item) => !!item.pageContent)\n .map((item, i) => ({\n title: item.metadata['chapter'] || `[未命名]${i + 1}`,\n content: item.pageContent,\n }));\n } else {\n return;\n }\n } catch (error) {\n throw new Error(\n `文件:[${filePath}]解析失败;类型[${JSON.stringify(type) ?? ''}]`,\n {\n cause: error,\n },\n );\n }\n }\n\n #formatResult(list: ParsedData[], title: string) {\n if (list.length === 1) {\n return [\n {\n title: list[0].metadata?.['title'] ?? title,\n content: list[0].pageContent.trim(),\n parseTo: list[0].parseTo,\n },\n ];\n }\n return list\n .map((item) => ({ ...item, pageContent: item.pageContent?.trim() }))\n .filter((item) => !!item.pageContent)\n .map((item, index) => ({\n title: item.metadata?.['title'] ?? `${title}-${index}`,\n content: item.pageContent,\n parseTo: item.parseTo,\n }));\n }\n}\n", "export async function xlsxLoader(buffer: Uint8Array | ArrayBuffer) {\n const { read, utils } = await import('xlsx');\n const workbook = read(buffer);\n return workbook.SheetNames.map((name) => {\n const worksheet = workbook.Sheets[name];\n const csv = utils.sheet_to_csv(worksheet);\n return { pageContent: csv, metadata: { title: name } };\n });\n}\n", "import { Injector } from 'static-injector';\r\nimport { DocumentParserConfigToken, ImageParserToken } from '../const';\r\nimport { path } from '@cyia/vfs2';\r\nexport async function pdfImageLoader(\r\n buffer: Uint8Array | ArrayBuffer,\r\n filePath: string,\r\n injector: Injector,\r\n) {\r\n let canvas = await import('@napi-rs/canvas');\r\n if (!(globalThis as any).DOMMatrix) {\r\n (globalThis as any).DOMMatrix = canvas.DOMMatrix;\r\n }\r\n if (!(globalThis as any).ImageData) {\r\n (globalThis as any).ImageData = canvas.ImageData;\r\n }\r\n if (!(globalThis as any).Path2D) {\r\n (globalThis as any).Path2D = canvas.Path2D;\r\n }\r\n const { getDocument } = await import('pdfjs-dist');\r\n let pdf = await getDocument(new Uint8Array(buffer)).promise;\r\n let list = [];\r\n let metadata = await pdf.getMetadata();\r\n const fileName = path.basename(filePath, path.extname(filePath));\r\n let title = (metadata.info as any)['Title'] || fileName;\r\n let imageParser = injector.get(ImageParserToken);\r\n let documentParserConfig = injector.get(DocumentParserConfigToken);\r\n for (let i = 1; i <= pdf.numPages; i++) {\r\n const page = await pdf.getPage(i);\r\n const viewport = page.getViewport({\r\n scale: documentParserConfig().pdfAsImage?.viewPortOptions?.scale ?? 1,\r\n });\r\n const canvasEl = canvas.createCanvas(viewport.width, viewport.height);\r\n const ctx = canvasEl.getContext('2d');\r\n await page.render({\r\n canvasContext: ctx! as any,\r\n viewport,\r\n canvas: canvasEl as any,\r\n }).promise;\r\n const image = canvasEl.toBuffer('image/png');\r\n let result = await imageParser(filePath, `${fileName}-${i}`, image);\r\n // 图片接入工作流\r\n list.push({\r\n pageContent: result.content,\r\n metadata: { title: title },\r\n parseTo: result.parseTo,\r\n });\r\n }\r\n\r\n return list;\r\n}\r\n", "import { AbstractDictParse } from '../type';\nimport * as fs from 'fs/promises';\nimport * as zlib from 'zlib';\nimport * as util from 'util';\nimport { RootStaticInjectOptions } from 'static-injector';\nimport decompress from 'decompress';\n\nimport decompressTarbz from '@xhmikosr/decompress-tarbz2';\nimport { tmpdir } from 'os';\nimport { v4 } from 'uuid';\nimport { path } from '@cyia/vfs2';\nimport { decode } from 'html-entities';\n\nconst decoder = new util.TextDecoder('utf-8');\nexport class StardictParseService\n extends RootStaticInjectOptions\n implements AbstractDictParse\n{\n async parse(filePathList: string) {\n const filePath = filePathList;\n let infoFileContent!: string;\n let indexFileBuffer!: Buffer;\n let dictFileBuffer!: Buffer;\n if (filePath.endsWith('tar.bz2')) {\n const dir = path.join(tmpdir(), v4());\n const list = await decompress(filePath, dir, {\n plugins: [decompressTarbz()],\n });\n for (const item of list) {\n if (item.path.endsWith('ifo')) {\n infoFileContent = item.data.toString();\n } else if (item.path.endsWith('idx')) {\n indexFileBuffer = item.data;\n } else if (item.path.endsWith('dict.dz')) {\n dictFileBuffer = item.data;\n }\n }\n if (!infoFileContent || !indexFileBuffer || !dictFileBuffer) {\n throw new Error(`解压后未找到指定内容,文件夹:${dir}`);\n }\n } else {\n const fileName = path.basename(filePath).replace(/\\.(ifo|tar\\.bz)$/, '');\n\n infoFileContent = (\n await fs.readFile(\n path.resolve(path.dirname(filePath), `${fileName}.ifo`),\n )\n ).toString();\n indexFileBuffer = await fs.readFile(\n path.resolve(path.dirname(filePath), `${fileName}.idx`),\n );\n dictFileBuffer = await fs.readFile(\n path.resolve(path.dirname(filePath), `${fileName}.dict.dz`),\n );\n }\n\n // let infoPath=fs.readFile()\n // 读取导入文件/文件夹\n //解析索引\n // 解压缩内容\n // 读取内容,并且进行一些格式化\n // 将列表返回(统一插入到数据库)\n const [info, list] = await Promise.all([\n this.#getInfo(infoFileContent).then((obj) => {\n obj['name'] ??= obj['bookname'];\n return obj;\n }),\n this.#getIndex(indexFileBuffer),\n ]);\n return {\n info: info as any,\n dataListGenerator: () => this.#getDict(dictFileBuffer, list),\n };\n }\n\n async #getInfo(content: string) {\n const list = content\n .split(/\\r\\n|\\n\\r|\\n|\\r/)\n .filter(Boolean)\n .map((item) => item.split('=').filter(Boolean))\n .filter((list) => list.length === 2);\n return list.reduce(\n (obj, item) => {\n obj[item[0]] = item[1];\n return obj;\n },\n {} as Record<string, string>,\n );\n }\n async #getIndex(buffer: Buffer) {\n const indexData: [string, number, number][] = [];\n let index = 0;\n while (index < buffer.length) {\n const beg = index;\n index = buffer.indexOf('\\x00', beg);\n let word = buffer.toString('utf-8', beg, index);\n if (word.includes('&#')) {\n word = decode(word);\n }\n index++;\n const offset = buffer.readUInt32BE(index);\n index += 4;\n const size = buffer.readUInt32BE(index);\n index += 4;\n indexData.push([word, offset, size]);\n }\n return indexData;\n }\n async *#getDict(\n buffer: Buffer,\n indexData: readonly (readonly [string, number, number])[],\n ) {\n const rawdata = new Uint8Array(buffer);\n const buffer_1 = zlib.gunzipSync(rawdata);\n const rawdata_1 = new Uint8Array(buffer_1);\n for (const [word, offset, size] of indexData) {\n const chunk = rawdata_1.slice(offset, offset + size);\n const decoded = (decoder.decode(chunk) ?? '').trim();\n if (!decoded) {\n continue;\n }\n yield {\n word,\n content: decoded,\n };\n }\n }\n}\n", "import { AbstractDictParse } from '../type';\nimport { Mdict, PassCode } from '@cyia/mdict-reader';\nimport { existsSync } from 'fs';\nimport { RootStaticInjectOptions } from 'static-injector';\nimport { path, createNormalizeVfs } from '@cyia/vfs2';\nexport class MdictParseService\n extends RootStaticInjectOptions\n implements AbstractDictParse\n{\n async parse(filePathList: string, options?: PassCode) {\n let passCode: PassCode | undefined;\n if (options?.regCode && options.userId) {\n passCode = options;\n }\n const filePath = filePathList;\n const fileName = path.basename(filePath, path.extname(filePath));\n const mdxFilePath = path.resolve(path.dirname(filePath), `${fileName}.mdx`);\n const mdxInstance = await Mdict.build(mdxFilePath, passCode);\n const mddFilePath = path.resolve(path.dirname(filePath), `${fileName}.mdd`);\n const dataInfo = mdxInstance.getDictInfo().mdx;\n return {\n // todo 其实还可以保存更多信息\n info: {\n name:\n dataInfo.Title === 'Title (No HTML code allowed)'\n ? fileName\n : dataInfo.Title,\n fileName: fileName,\n },\n dataListGenerator: () => this.wordListGenerator(mdxInstance),\n afterSave: async (assetFolder: string) => {\n // value$$.next({ message: `准备写入资源文件` });\n\n let pendList: Promise<{ name: string; message: string } | undefined>[] =\n [];\n let sum = 0;\n const waitingWrite = async () => {\n const result = (await Promise.all(pendList)).filter(Boolean);\n if (result.length) {\n throw new Error(\n result\n .map((item) => `文件[${item!.name}]写入失败,${item?.message}`)\n .join('\\n'),\n );\n } else {\n sum += pendList.length;\n // value$$.next({ message: `已写入${sum}个文件` });\n }\n pendList = [];\n };\n if (existsSync(mddFilePath)) {\n const vfs = createNormalizeVfs({ dir: assetFolder });\n const mddInstance = await Mdict.build(mddFilePath, passCode);\n const wordGenerator = mddInstance.load();\n for await (const item of wordGenerator) {\n pendList.push(\n mddInstance.getMddAsset(item).then((buffer) =>\n vfs\n .writeFile(path.join(assetFolder, item.word), buffer)\n .then(() => undefined)\n .catch((reason: any) => ({\n name: item.word,\n message: reason,\n })),\n ),\n );\n\n if (pendList.length >= 20) {\n await waitingWrite();\n }\n }\n if (pendList.length) {\n await waitingWrite();\n }\n // value$$.next({ message: '资源写入完成' });\n }\n },\n };\n }\n\n async *wordListGenerator(mdxInstance: Mdict) {\n const list = mdxInstance.load();\n for await (const item of list) {\n const definition = ((await mdxInstance.getDefinition(item)) || '').trim();\n if (!definition) {\n continue;\n }\n yield {\n content: definition,\n word: item.word,\n };\n }\n }\n}\n", "import { StardictParseService } from './dict-format/stardict-parse.service';\n\nimport { DictInput, DictParseResult } from './type';\nimport { MdictParseService } from './dict-format/mdict-parse.service';\n\nimport { inject, Injector, RootStaticInjectOptions } from 'static-injector';\n\nimport fs from 'fs';\n\nimport { path } from '@cyia/vfs2';\nimport { YamlDictParseService } from './dict-format/yaml-parse.service';\nimport { LRUCache } from 'lru-cache';\nimport { DslParseService } from './dict-format/dsl/dsl-parse.service';\n\nexport class DictService extends RootStaticInjectOptions {\n #injector = inject(Injector);\n #cache = new LRUCache<string, DictParseResult>({\n max: 2,\n ttl: 120_000,\n });\n async getDictName(input: DictInput) {\n const dict = await this.#getDictResolve(input);\n return dict.info.name || dict.info.fileName!;\n }\n async #getDictResolve(input: DictInput) {\n if (this.#cache.has(input.filePath)) {\n return this.#cache.get(input.filePath)!;\n }\n const { filePath, type } = input;\n let result!: DictParseResult;\n if (type === 'stardict') {\n result = await this.#injector.get(StardictParseService).parse(filePath);\n } else if (type === 'mdict') {\n result = await this.#injector\n .get(MdictParseService)\n .parse(filePath, input as any);\n } else if (type === 'dsl') {\n result = await this.#injector.get(DslParseService).parse(filePath);\n } else if (type === 'yaml') {\n result = await this.#injector.get(YamlDictParseService).parse(filePath);\n } else {\n throw new Error(`没有找到${type}字典对应解析器`);\n }\n this.#cache.set(input.filePath, result);\n return result;\n }\n /**\n * 第一个需要修改为3个操作\n */\n // 这里顺序反了,应该调用base,让base分配\n async importDict(name: string, dir: string, input: DictInput) {\n const baseName = path.basename(\n input!.filePath,\n path.extname(input!.filePath),\n );\n /** 知识库保存的名字 */\n const result = await this.#getDictResolve(input);\n\n name ||= result.info.name || baseName;\n\n /** 知识库文件夹 */\n\n const assetFolder = path.join(dir, 'assets');\n // 先导入资源,然后再ocr\n if (result.afterSave) {\n await fs.promises.mkdir(assetFolder, {\n recursive: true,\n });\n // 如果要保存,那么文件名应该一定存在,否则就没法写入了\n await result.afterSave(assetFolder!);\n }\n // 准备导入\n return result.dataListGenerator();\n }\n}\n", "import { path } from '@cyia/vfs2';\nimport { AbstractDictParse } from '../type';\nimport * as fs from 'fs/promises';\nimport { parse } from 'yaml';\nimport { RootStaticInjectOptions } from 'static-injector';\nimport * as v from 'valibot';\nexport const YamlDefine = v.object({\n list: v.array(\n v.object({\n word: v.string(),\n content: v.string(),\n extra: v.optional(v.record(v.string(), v.any())),\n }),\n ),\n});\nexport class YamlDictParseService\n extends RootStaticInjectOptions\n implements AbstractDictParse\n{\n async parse(filePathList: string) {\n const filePath = filePathList;\n const ext = path.extname(filePath);\n const content = await fs.readFile(filePath, { encoding: 'utf-8' });\n const data = v.parse(YamlDefine, parse(content));\n return {\n info: {\n fileName: path.basename(filePath),\n name: path.basename(filePath, ext),\n },\n dataListGenerator: async function* () {\n for (const item of data.list) {\n yield item;\n }\n },\n };\n }\n}\n", "import fs from 'fs';\nimport chardet from 'chardet';\nimport { dslFormat } from './dsl.format';\nimport { RootStaticInjectOptions } from 'static-injector';\nimport { AbstractDictParse, DictParseResult } from '../../type';\nimport { PassCode } from '@cyia/mdict-reader';\nimport { basename } from 'path';\nexport class DslParseService\n extends RootStaticInjectOptions\n implements AbstractDictParse\n{\n async parse(\n filePath: string,\n options?: Partial<PassCode>,\n ): Promise<DictParseResult> {\n const instance = new DslParse(filePath);\n await instance.init();\n return {\n info: {\n ...instance.info,\n name: instance.info['NAME'],\n fileName: basename(filePath, '.dsl'),\n },\n dataListGenerator: () => instance.generate(),\n };\n }\n}\nclass DslParse {\n filePath;\n fileContent!: string;\n start = 0;\n\n info = {} as Record<string, any>;\n wordStart!: number;\n constructor(filePath: string) {\n this.filePath = filePath;\n }\n\n async init() {\n const buffer = await fs.promises.readFile(this.filePath);\n const subBuf = new Uint8Array(buffer.subarray(0, 500));\n const a = chardet.detect(subBuf);\n this.fileContent = buffer.toString((a as any) ?? 'UTF-16LE').trimStart();\n while (this.readMetadata()) {}\n this.readEntry();\n }\n\n readMetadata() {\n const regexp =\n /^#(NAME|INDEX_LANGUAGE|CONTENTS_LANGUAGE|SOUND_DICTIONARY|SOURCE_CODE_PAGE)\\s+(.+)/dgm;\n regexp.lastIndex = this.start;\n const result = regexp.exec(this.fileContent.trimStart());\n if (result) {\n try {\n this.info[result[1]] = JSON.parse(result[2]);\n } catch (error) {\n this.info[result[1]] = result[2];\n }\n this.start = result.indices![0][1];\n return true;\n }\n return false;\n }\n lastWordInfo?: { word: string; range: [number, number] };\n async *generate() {\n let result;\n while ((result = this.readEntry())) {\n yield result;\n }\n }\n readEntry() {\n const regexp = /^[^\\s]+/dgm;\n regexp.lastIndex = this.start;\n const result = regexp.exec(this.fileContent);\n\n if (result) {\n this.start = result.indices![0][1];\n const lastWordInfo = this.lastWordInfo;\n this.lastWordInfo = {\n word: result[0],\n range: result.indices![0],\n };\n if (lastWordInfo) {\n const content = this.fileContent\n .slice(lastWordInfo.range[1], this.lastWordInfo!.range[0])\n .replace(/^\\s+/gm, '');\n return {\n word: lastWordInfo.word,\n content: content,\n htmlContent: dslFormat(content),\n };\n }\n } else {\n if (this.lastWordInfo) {\n const content = this.fileContent\n .slice(this.lastWordInfo.range[1])\n .replace(/^\\s+/gm, '');\n const result = {\n word: this.lastWordInfo.word,\n content: content,\n htmlContent: dslFormat(content),\n };\n this.lastWordInfo = undefined;\n return result;\n }\n }\n return undefined;\n }\n}\n", "const blockStatusList = [\n '*',\n 'm',\n 'trn',\n 'ex',\n 'com',\n 's',\n 'url',\n '!trs',\n 'p',\n \"'\",\n 'lang',\n 'ref',\n 'sub',\n 'sup',\n];\nconst inlineStatusList = ['b', 'i', 'u', 'c'];\nconst sImageList = ['bmp', 'pcx', 'dcx', 'jpg', 'tif'];\nconst sSoundList = ['wav'];\nconst sVideoList = ['avi', 'webm', 'mp4'];\nclass NodeItem {\n start!: number;\n // attr: Record<string, any> = {};\n type!: 'inline' | 'block' | 'text';\n}\nclass InlineNode extends NodeItem {\n override type = 'inline' as const;\n attr;\n tag;\n constructor(tag: string, attr: Record<string, any>) {\n super();\n this.attr = attr;\n this.tag = tag;\n }\n}\nclass BlockNode extends NodeItem {\n override type = 'block' as const;\n fn;\n tag;\n constructor(tag: string, fn: (text: string) => string) {\n super();\n this.tag = tag;\n this.fn = fn;\n }\n}\nclass TextNode extends NodeItem {\n override type = 'text' as const;\n text: string;\n style: Record<string, any> = {};\n constructor(text: string) {\n super();\n this.text = text;\n }\n renderedText!: string;\n renderText() {\n const styleAttr = Object.entries(this.style)\n .map((item) => `${item[0]}: ${item[1]}`)\n .join(';');\n const formatedText = this.text.replace(/\\\\\\[/g, '[').replace(/\\\\]/g, ']');\n if (styleAttr.trim().length) {\n this.renderedText = `<span style=\"${styleAttr}\">${formatedText}</span>`;\n } else {\n this.renderedText = `${formatedText}`;\n }\n }\n}\nclass DslFormat {\n #inlineStatus: InlineNode[] = [];\n #blockStatus: BlockNode[] = [];\n #textNodeList: TextNode[] = [];\n #input;\n #start = 0;\n constructor(input: string) {\n this.#input = input;\n }\n run() {\n if (!this.#input) {\n return this.#input;\n }\n this.#scan();\n return this.#textNodeList.map((item) => item.renderedText).join('');\n }\n #scan() {\n let startResult: RegExpExecArray | null;\n let endResult: RegExpExecArray | null;\n const startRegexp =\n /(?<!\\\\)\\[(?!\\/)(?<tagName>m(?<leftPadding>[0-9])|[^\\]\\s]+)((\\s+)(?<attr>[^\\]]+))?]/dg;\n const endRegexp = /(?<!\\\\)\\[\\/(?<tagName>[^\\]]+)]/dg;\n\n while (true) {\n startRegexp.lastIndex = this.#start;\n endRegexp.lastIndex = this.#start;\n const currentText = this.#input;\n startResult = startRegexp.exec(currentText);\n endResult = endRegexp.exec(currentText);\n\n if (\n startResult &&\n (!endResult || startResult.indices![0][0] < endResult.indices![0][0])\n ) {\n // 匹配开始\n const tagName = startResult.groups!['tagName'];\n const matchStart = startResult.indices![0][0];\n this.#createTextNode(this.#start, matchStart);\n\n this.#createStatusNode(\n tagName.startsWith('m') ? 'm' : tagName,\n matchStart,\n startResult.groups!['leftPadding'] || startResult.groups!['attr'],\n );\n this.#start = startResult.indices![0][1];\n } else {\n if (endResult) {\n const tagName = endResult.groups!['tagName'];\n const matchStart = endResult.indices![0][0];\n this.#createTextNode(this.#start, matchStart);\n\n this.#mergeTextNode(tagName, matchStart);\n this.#removeStatus(tagName);\n this.#start = endResult.indices![0][1];\n } else {\n break;\n }\n }\n }\n }\n /** 普通的文本节点创建 */\n #createTextNode(start: number, end: number) {\n if (start < end) {\n const node = new TextNode(this.#input.slice(start, end));\n node.start = start;\n node.style = this.#inlineStatus.reduce(\n (obj, item) => ({ ...obj, ...item.attr }),\n {} as Record<string, any>,\n );\n node.renderText();\n this.#textNodeList.push(node);\n\n return node;\n }\n }\n #createStatusNode(name: string, start: number, attrStr: string) {\n if (inlineStatusList.includes(name)) {\n attrStr;\n let attr = {} as Record<string, any>;\n switch (name) {\n case 'c':\n if (attrStr) {\n attr = { color: attrStr };\n }\n break;\n case 'b':\n attr = { 'font-weight': 'bolder' };\n break;\n case 'u':\n attr = { 'text-decoration': '#f00 wavy underline' };\n break;\n case 'i':\n attr = { 'font-style': 'italic' };\n break;\n }\n\n const node = new InlineNode(name, attr);\n node.start = start;\n this.#inlineStatus.unshift(node);\n } else {\n const node = new BlockNode(name, (text) => {\n switch (name) {\n case 'm': {\n return `<div style=\"padding-left: ${attrStr}em;\">${text}</div>`;\n }\n case 'url': {\n return `<a href=\"${text}\">${text}</a>`;\n }\n case 'ref': {\n return `<a href=\"entry://${text}\">${text}</a>`;\n }\n case 'sub':\n case 'sup': {\n {\n return `<${name}>${text}</${name}>`;\n }\n }\n case 's': {\n if (sImageList.some((item) => text.endsWith(item))) {\n const baseName = text.slice(0, text.lastIndexOf('.'));\n return `<picture><source srcset=\"${text}\"><source srcset=\"${baseName}.webp\"><source srcset=\"${baseName}.jpg\"><img src=\"${text}\"></picture>`;\n } else if (sSoundList.some((item) => text.endsWith(item))) {\n return `<figure><audio controls src=\"${text}\"></audio></figure>`;\n } else if (sVideoList.some((item) => text.endsWith(item))) {\n const baseName = text.slice(0, text.lastIndexOf('.'));\n return `<video controls style=\"width:100%\"><source src=\"${text}\"/><source src=\"${baseName}.mp4\"/><source src=\"${baseName}.webm\"/></video>`;\n }\n return ``;\n }\n default: {\n return `<span>${text}</span>`;\n }\n }\n });\n node.start = start;\n this.#blockStatus.unshift(node);\n }\n }\n #mergeTextNode(name: string, end: number) {\n const blockStart = this.#blockStatus.find((item) => item.tag === name);\n if (blockStart) {\n const start = blockStart.start;\n let tempIndex = -1;\n\n // let i = this.textNodeList.length - 1;\n for (let i = this.#textNodeList.length - 1; i > -1; i--) {\n const textNode = this.#textNodeList[i];\n if (textNode.start < start) {\n break;\n } else {\n tempIndex = i;\n }\n }\n if (tempIndex !== -1) {\n const delNodeList = this.#textNodeList.slice(tempIndex);\n\n const mergeText = blockStart.fn(\n delNodeList.map((item) => item.renderedText).join(''),\n );\n\n this.#textNodeList = this.#textNodeList.slice(0, tempIndex);\n const node = new TextNode(mergeText);\n node.start = start;\n node.renderedText = node.text;\n this.#textNodeList.push(node);\n }\n }\n }\n #removeStatus(name: string) {\n if (inlineStatusList.includes(name)) {\n const index = this.#inlineStatus.findIndex((item) => item.tag === name);\n this.#inlineStatus.splice(index, 1);\n } else {\n const index = this.#blockStatus.findIndex((item) => item.tag === name);\n this.#blockStatus.splice(index, 1);\n }\n }\n}\n\nexport function dslFormat(input: string) {\n const item = new DslFormat(input);\n return item.run();\n}\n", "import { lexer } from 'marked';\r\nimport MS from 'magic-string';\r\ntype Position = [number, number, number, number];\r\nconst regex =\r\n /<!--\\s*(Image|Table)\\s*\\(\\s*(\\d+)\\s*,\\s*(\\d+)\\s*,\\s*(\\d+)\\s*,\\s*(\\d+)\\s*\\)\\s*-->/dg;\r\n\r\n//帮我实现一个js正则表达式,能够匹配文档中`<!-- Image (142, 98, 837, 856) -->`这段注释\r\nexport async function vlMarkdownParser(\r\n content: string,\r\n options: {\r\n imageGet: (\r\n type: string,\r\n position: Position,\r\n ) => Promise<{ src: string; title: string }>;\r\n },\r\n) {\r\n let mdContent = getMdLexer(content);\r\n let result = mdContent.matchAll(regex);\r\n let ms = new MS(mdContent);\r\n for (const item of result) {\r\n // qwen3 vl\r\n let imageData = await options.imageGet('qwen3-vl', [\r\n +item[2],\r\n +item[3],\r\n +item[4],\r\n +item[5],\r\n ]);\r\n ms.update(\r\n item.index,\r\n item.index + item[0].length,\r\n ``,\r\n );\r\n }\r\n return ms.toString();\r\n}\r\nfunction getMdLexer(content: string) {\r\n const list = lexer(content);\r\n if (\r\n list.length === 1 &&\r\n list[0].type === 'code' &&\r\n (list[0].lang === 'markdown' || !list[0].lang)\r\n ) {\r\n return list[0].text as string;\r\n }\r\n return content;\r\n}\r\n"],
|
|
4
|
+
"sourcesContent": ["import { fileTypeFromBuffer } from 'file-type';\nimport { inject, RootStaticInjectOptions } from 'static-injector';\nimport { path } from '@cyia/vfs2';\nimport { bufferDecodeToText as textParse } from './text-parser';\nimport { FileParser, FileParserToken } from './const';\nimport {\n DocumentFileParserService,\n FormatedData,\n} from './document-file-parser.service';\n\nexport class FileParserService\n extends RootStaticInjectOptions\n implements FileParser\n{\n priority: number = 0;\n #parserList = inject(FileParserToken, { optional: true })\n ?.slice()\n .sort((a, b) => a.priority - b.priority) ?? [\n inject(DocumentFileParserService),\n ];\n async parse(fileName: string, buffer: Uint8Array | ArrayBuffer) {\n const baseName = path.basename(fileName, path.extname(fileName));\n let type;\n try {\n type = await fileTypeFromBuffer(buffer);\n } catch (error) {\n throw new Error(`文件:[${fileName}]类型解析失败`, {\n cause: error,\n });\n }\n for (const item of this.#parserList) {\n const result = await item.parse(fileName, buffer, type);\n if (result) {\n return result;\n }\n }\n // 尝试以文本形式解析\n return [\n { title: baseName, content: textParse(new Uint8Array(buffer)) },\n ] as FormatedData[];\n }\n\n /** 用于支持工作流读文件 */\n parseOne(fileName: string, buffer: Uint8Array | ArrayBuffer) {\n return this.parse(fileName, buffer).then((list) => {\n return {\n content: list?.map((item) => item.content).join('\\n'),\n parseTo: list[0].parseTo,\n };\n });\n }\n}\n", "import { analyse, Match } from 'chardet';\nfunction resultWeight(item: Match) {\n let offset = 0;\n if (item.lang === 'zh') {\n offset++;\n if (item.name === 'GB18030') {\n offset += 2;\n } else if (item.name === 'Big5') {\n offset++;\n }\n }\n return item.confidence + offset;\n}\n\nexport function textAnalyse(buffer: Uint8Array) {\n return analyse(buffer).sort((a, b) => resultWeight(b) - resultWeight(a))[0]\n .name;\n}\n", "import { textAnalyse } from './text-analyse';\n\nexport function bufferDecodeToText(\n buffer: Uint8Array,\n metadata?: { path?: string },\n) {\n const type = textAnalyse(buffer);\n const decoder = new TextDecoder(type, { fatal: true });\n try {\n return decoder.decode(buffer);\n } catch (error) {\n throw new Error(`尝试使用 ${type} 编码解析失败;${metadata?.path ?? ''}`);\n }\n}\n", "import { FileTypeResult } from 'file-type';\nimport { InjectionToken, Signal } from 'static-injector';\nimport { FormatedData } from './document-file-parser.service';\n\nexport interface FileParser {\n priority: number;\n parse: (\n fileName: string,\n buffer: Uint8Array | ArrayBuffer,\n type: FileTypeResult | undefined,\n ) => Promise<FormatedData[] | undefined>;\n}\nexport const FileParserToken = new InjectionToken<FileParser[]>('FileParser');\nexport const ImageParserToken = new InjectionToken<\n (\n assetPath: string,\n prefix: string,\n buffer: Buffer<ArrayBufferLike>,\n ) => Promise<{\n content: string;\n parseTo: string;\n // assets: any[];\n }>\n>('ImageParserToken');\nexport const DocumentParserConfigToken = new InjectionToken<\n Signal<{\n pdfAsImage?: {\n enable?: boolean;\n viewPortOptions?: {\n scale?: number;\n };\n };\n }>\n>('DocumentParserConfigToken');\n", "import { FileTypeResult } from 'file-type';\nimport { inject, Injector, RootStaticInjectOptions } from 'static-injector';\nimport { EPubLoader } from '@langchain/community/document_loaders/fs/epub';\nimport { DocxLoader } from '@langchain/community/document_loaders/fs/docx';\nimport { CSVLoader } from '@langchain/community/document_loaders/fs/csv';\nimport { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';\nimport { PPTXLoader } from '@langchain/community/document_loaders/fs/pptx';\nimport { SRTLoader } from '@langchain/community/document_loaders/fs/srt';\nimport type { Document } from '@langchain/core/documents';\nimport { Blob } from 'node:buffer';\nimport { path } from '@cyia/vfs2';\nimport { xlsxLoader } from './document-loader/xlsx.loader';\nimport { DocumentParserConfigToken } from './const';\nimport { pdfImageLoader } from './document-loader/pdf-img.loader';\nexport type ParsedData = Document<Record<string, any>> & { parseTo?: string };\nexport type FormatedData = {\n title: any;\n content: string;\n parseTo?: string;\n}\nexport class DocumentFileParserService extends RootStaticInjectOptions {\n #documentParser = inject(DocumentParserConfigToken, { optional: true });\n #injector = inject(Injector);\n\n async parse(\n filePath: string,\n buffer: Uint8Array | ArrayBuffer,\n type: FileTypeResult | undefined,\n ): Promise<FormatedData[] | undefined> {\n try {\n const baseName = path.basename(filePath, path.extname(filePath));\n const blob = new Blob([buffer]);\n if (filePath.endsWith('.srt')) {\n const instance = new SRTLoader(blob);\n const result = await instance.load();\n return this.#formatResult(result, baseName);\n } else if (\n type?.ext === 'pptx' ||\n type?.ext === 'odt' ||\n type?.ext === 'odp' ||\n type?.ext === 'ods'\n ) {\n const instance = new PPTXLoader(blob);\n const result = await instance.load();\n return this.#formatResult(result, baseName);\n } else if (type?.ext === 'pdf') {\n let result;\n const useImage = this.#documentParser?.().pdfAsImage?.enable;\n if (useImage) {\n result = await pdfImageLoader(buffer, filePath, this.#injector);\n } else {\n const instance = new PDFLoader(blob);\n result = await instance.load();\n }\n return this.#formatResult(result, baseName);\n } else if (filePath.endsWith('.csv')) {\n const instance = new CSVLoader(blob);\n const result = await instance.load();\n return this.#formatResult(result, baseName);\n } else if (type?.ext === 'docx') {\n const instance = new DocxLoader(blob);\n const result = await instance.load();\n return this.#formatResult(result, baseName);\n } else if (type?.ext === 'xlsx') {\n const result = await xlsxLoader(buffer);\n return this.#formatResult(result, baseName);\n } else if (type?.ext === 'epub') {\n const instance = new EPubLoader(filePath);\n const result = await instance.load();\n return result\n .filter((item) => !!item.pageContent)\n .map((item, i) => ({\n title: item.metadata['chapter'] || `[未命名]${i + 1}`,\n content: item.pageContent,\n }));\n } else {\n return;\n }\n } catch (error) {\n throw new Error(\n `文件:[${filePath}]解析失败;类型[${JSON.stringify(type) ?? ''}]`,\n {\n cause: error,\n },\n );\n }\n }\n\n #formatResult(list: ParsedData[], title: string) {\n if (list.length === 1) {\n return [\n {\n title: list[0].metadata?.['title'] ?? title,\n content: list[0].pageContent.trim(),\n parseTo: list[0].parseTo,\n },\n ];\n }\n return list\n .map((item) => ({ ...item, pageContent: item.pageContent?.trim() }))\n .filter((item) => !!item.pageContent)\n .map((item, index) => ({\n title: item.metadata?.['title'] ?? `${title}-${index}`,\n content: item.pageContent,\n parseTo: item.parseTo,\n }));\n }\n}\n", "export async function xlsxLoader(buffer: Uint8Array | ArrayBuffer) {\n const { read, utils } = await import('xlsx');\n const workbook = read(buffer);\n return workbook.SheetNames.map((name) => {\n const worksheet = workbook.Sheets[name];\n const csv = utils.sheet_to_csv(worksheet);\n return { pageContent: csv, metadata: { title: name } };\n });\n}\n", "import { Injector } from 'static-injector';\nimport { DocumentParserConfigToken, ImageParserToken } from '../const';\nimport { path } from '@cyia/vfs2';\nexport async function pdfImageLoader(\n buffer: Uint8Array | ArrayBuffer,\n filePath: string,\n injector: Injector,\n) {\n let canvas = await import('@napi-rs/canvas');\n if (!(globalThis as any).DOMMatrix) {\n (globalThis as any).DOMMatrix = canvas.DOMMatrix;\n }\n if (!(globalThis as any).ImageData) {\n (globalThis as any).ImageData = canvas.ImageData;\n }\n if (!(globalThis as any).Path2D) {\n (globalThis as any).Path2D = canvas.Path2D;\n }\n const { getDocument } = await import('pdfjs-dist');\n let pdf = await getDocument(new Uint8Array(buffer)).promise;\n let list = [];\n let metadata = await pdf.getMetadata();\n const fileName = path.basename(filePath, path.extname(filePath));\n let title = (metadata.info as any)['Title'] || fileName;\n let imageParser = injector.get(ImageParserToken);\n let documentParserConfig = injector.get(DocumentParserConfigToken);\n for (let i = 1; i <= pdf.numPages; i++) {\n const page = await pdf.getPage(i);\n const viewport = page.getViewport({\n scale: documentParserConfig().pdfAsImage?.viewPortOptions?.scale ?? 1,\n });\n const canvasEl = canvas.createCanvas(viewport.width, viewport.height);\n const ctx = canvasEl.getContext('2d');\n await page.render({\n canvasContext: ctx! as any,\n viewport,\n canvas: canvasEl as any,\n }).promise;\n const image = canvasEl.toBuffer('image/png');\n let result = await imageParser(filePath, `${fileName}-${i}`, image);\n // 图片接入工作流\n list.push({\n pageContent: result.content,\n metadata: { title: title },\n parseTo: result.parseTo,\n });\n }\n\n return list;\n}\n", "import { AbstractDictParse } from '../type';\nimport * as fs from 'fs/promises';\nimport * as zlib from 'zlib';\nimport * as util from 'util';\nimport { RootStaticInjectOptions } from 'static-injector';\nimport decompress from 'decompress';\n\nimport decompressTarbz from '@xhmikosr/decompress-tarbz2';\nimport { tmpdir } from 'os';\nimport { v4 } from 'uuid';\nimport { path } from '@cyia/vfs2';\nimport { decode } from 'html-entities';\n\nconst decoder = new util.TextDecoder('utf-8');\nexport class StardictParseService\n extends RootStaticInjectOptions\n implements AbstractDictParse\n{\n async parse(filePathList: string) {\n const filePath = filePathList;\n let infoFileContent!: string;\n let indexFileBuffer!: Buffer;\n let dictFileBuffer!: Buffer;\n if (filePath.endsWith('tar.bz2')) {\n const dir = path.join(tmpdir(), v4());\n const list = await decompress(filePath, dir, {\n plugins: [decompressTarbz()],\n });\n for (const item of list) {\n if (item.path.endsWith('ifo')) {\n infoFileContent = item.data.toString();\n } else if (item.path.endsWith('idx')) {\n indexFileBuffer = item.data;\n } else if (item.path.endsWith('dict.dz')) {\n dictFileBuffer = item.data;\n }\n }\n if (!infoFileContent || !indexFileBuffer || !dictFileBuffer) {\n throw new Error(`解压后未找到指定内容,文件夹:${dir}`);\n }\n } else {\n const fileName = path.basename(filePath).replace(/\\.(ifo|tar\\.bz)$/, '');\n\n infoFileContent = (\n await fs.readFile(\n path.resolve(path.dirname(filePath), `${fileName}.ifo`),\n )\n ).toString();\n indexFileBuffer = await fs.readFile(\n path.resolve(path.dirname(filePath), `${fileName}.idx`),\n );\n dictFileBuffer = await fs.readFile(\n path.resolve(path.dirname(filePath), `${fileName}.dict.dz`),\n );\n }\n\n // let infoPath=fs.readFile()\n // 读取导入文件/文件夹\n //解析索引\n // 解压缩内容\n // 读取内容,并且进行一些格式化\n // 将列表返回(统一插入到数据库)\n const [info, list] = await Promise.all([\n this.#getInfo(infoFileContent).then((obj) => {\n obj['name'] ??= obj['bookname'];\n return obj;\n }),\n this.#getIndex(indexFileBuffer),\n ]);\n return {\n info: info as any,\n dataListGenerator: () => this.#getDict(dictFileBuffer, list),\n };\n }\n\n async #getInfo(content: string) {\n const list = content\n .split(/\\r\\n|\\n\\r|\\n|\\r/)\n .filter(Boolean)\n .map((item) => item.split('=').filter(Boolean))\n .filter((list) => list.length === 2);\n return list.reduce(\n (obj, item) => {\n obj[item[0]] = item[1];\n return obj;\n },\n {} as Record<string, string>,\n );\n }\n async #getIndex(buffer: Buffer) {\n const indexData: [string, number, number][] = [];\n let index = 0;\n while (index < buffer.length) {\n const beg = index;\n index = buffer.indexOf('\\x00', beg);\n let word = buffer.toString('utf-8', beg, index);\n if (word.includes('&#')) {\n word = decode(word);\n }\n index++;\n const offset = buffer.readUInt32BE(index);\n index += 4;\n const size = buffer.readUInt32BE(index);\n index += 4;\n indexData.push([word, offset, size]);\n }\n return indexData;\n }\n async *#getDict(\n buffer: Buffer,\n indexData: readonly (readonly [string, number, number])[],\n ) {\n const rawdata = new Uint8Array(buffer);\n const buffer_1 = zlib.gunzipSync(rawdata);\n const rawdata_1 = new Uint8Array(buffer_1);\n for (const [word, offset, size] of indexData) {\n const chunk = rawdata_1.slice(offset, offset + size);\n const decoded = (decoder.decode(chunk) ?? '').trim();\n if (!decoded) {\n continue;\n }\n yield {\n word,\n content: decoded,\n };\n }\n }\n}\n", "import { AbstractDictParse } from '../type';\nimport { Mdict, PassCode } from '@cyia/mdict-reader';\nimport { existsSync } from 'fs';\nimport { RootStaticInjectOptions } from 'static-injector';\nimport { path, createNormalizeVfs } from '@cyia/vfs2';\nexport class MdictParseService\n extends RootStaticInjectOptions\n implements AbstractDictParse\n{\n async parse(filePathList: string, options?: PassCode) {\n let passCode: PassCode | undefined;\n if (options?.regCode && options.userId) {\n passCode = options;\n }\n const filePath = filePathList;\n const fileName = path.basename(filePath, path.extname(filePath));\n const mdxFilePath = path.resolve(path.dirname(filePath), `${fileName}.mdx`);\n const mdxInstance = await Mdict.build(mdxFilePath, passCode);\n const mddFilePath = path.resolve(path.dirname(filePath), `${fileName}.mdd`);\n const dataInfo = mdxInstance.getDictInfo().mdx;\n return {\n // todo 其实还可以保存更多信息\n info: {\n name:\n dataInfo.Title === 'Title (No HTML code allowed)'\n ? fileName\n : dataInfo.Title,\n fileName: fileName,\n },\n dataListGenerator: () => this.wordListGenerator(mdxInstance),\n afterSave: async (assetFolder: string) => {\n // value$$.next({ message: `准备写入资源文件` });\n\n let pendList: Promise<{ name: string; message: string } | undefined>[] =\n [];\n let sum = 0;\n const waitingWrite = async () => {\n const result = (await Promise.all(pendList)).filter(Boolean);\n if (result.length) {\n throw new Error(\n result\n .map((item) => `文件[${item!.name}]写入失败,${item?.message}`)\n .join('\\n'),\n );\n } else {\n sum += pendList.length;\n // value$$.next({ message: `已写入${sum}个文件` });\n }\n pendList = [];\n };\n if (existsSync(mddFilePath)) {\n const vfs = createNormalizeVfs({ dir: assetFolder });\n const mddInstance = await Mdict.build(mddFilePath, passCode);\n const wordGenerator = mddInstance.load();\n for await (const item of wordGenerator) {\n pendList.push(\n mddInstance.getMddAsset(item).then((buffer) =>\n vfs\n .writeFile(path.join(assetFolder, item.word), buffer)\n .then(() => undefined)\n .catch((reason: any) => ({\n name: item.word,\n message: reason,\n })),\n ),\n );\n\n if (pendList.length >= 20) {\n await waitingWrite();\n }\n }\n if (pendList.length) {\n await waitingWrite();\n }\n // value$$.next({ message: '资源写入完成' });\n }\n },\n };\n }\n\n async *wordListGenerator(mdxInstance: Mdict) {\n const list = mdxInstance.load();\n for await (const item of list) {\n const definition = ((await mdxInstance.getDefinition(item)) || '').trim();\n if (!definition) {\n continue;\n }\n yield {\n content: definition,\n word: item.word,\n };\n }\n }\n}\n", "import { StardictParseService } from './dict-format/stardict-parse.service';\n\nimport { DictInput, DictParseResult } from './type';\nimport { MdictParseService } from './dict-format/mdict-parse.service';\n\nimport { inject, Injector, RootStaticInjectOptions } from 'static-injector';\n\nimport fs from 'fs';\n\nimport { path } from '@cyia/vfs2';\nimport { YamlDictParseService } from './dict-format/yaml-parse.service';\nimport { LRUCache } from 'lru-cache';\nimport { DslParseService } from './dict-format/dsl/dsl-parse.service';\n\nexport class DictService extends RootStaticInjectOptions {\n #injector = inject(Injector);\n #cache = new LRUCache<string, DictParseResult>({\n max: 2,\n ttl: 120_000,\n });\n async getDictName(input: DictInput) {\n const dict = await this.#getDictResolve(input);\n return dict.info.name || dict.info.fileName!;\n }\n async #getDictResolve(input: DictInput) {\n if (this.#cache.has(input.filePath)) {\n return this.#cache.get(input.filePath)!;\n }\n const { filePath, type } = input;\n let result!: DictParseResult;\n if (type === 'stardict') {\n result = await this.#injector.get(StardictParseService).parse(filePath);\n } else if (type === 'mdict') {\n result = await this.#injector\n .get(MdictParseService)\n .parse(filePath, input as any);\n } else if (type === 'dsl') {\n result = await this.#injector.get(DslParseService).parse(filePath);\n } else if (type === 'yaml') {\n result = await this.#injector.get(YamlDictParseService).parse(filePath);\n } else {\n throw new Error(`没有找到${type}字典对应解析器`);\n }\n this.#cache.set(input.filePath, result);\n return result;\n }\n /**\n * 第一个需要修改为3个操作\n */\n // 这里顺序反了,应该调用base,让base分配\n async importDict(name: string, dir: string, input: DictInput) {\n const baseName = path.basename(\n input!.filePath,\n path.extname(input!.filePath),\n );\n /** 知识库保存的名字 */\n const result = await this.#getDictResolve(input);\n\n name ||= result.info.name || baseName;\n\n /** 知识库文件夹 */\n\n const assetFolder = path.join(dir, 'assets');\n // 先导入资源,然后再ocr\n if (result.afterSave) {\n await fs.promises.mkdir(assetFolder, {\n recursive: true,\n });\n // 如果要保存,那么文件名应该一定存在,否则就没法写入了\n await result.afterSave(assetFolder!);\n }\n // 准备导入\n return result.dataListGenerator();\n }\n}\n", "import { path } from '@cyia/vfs2';\nimport { AbstractDictParse } from '../type';\nimport * as fs from 'fs/promises';\nimport { parse } from 'yaml';\nimport { RootStaticInjectOptions } from 'static-injector';\nimport * as v from 'valibot';\nexport const YamlDefine = v.object({\n list: v.array(\n v.object({\n word: v.string(),\n content: v.string(),\n extra: v.optional(v.record(v.string(), v.any())),\n }),\n ),\n});\nexport class YamlDictParseService\n extends RootStaticInjectOptions\n implements AbstractDictParse\n{\n async parse(filePathList: string) {\n const filePath = filePathList;\n const ext = path.extname(filePath);\n const content = await fs.readFile(filePath, { encoding: 'utf-8' });\n const data = v.parse(YamlDefine, parse(content));\n return {\n info: {\n fileName: path.basename(filePath),\n name: path.basename(filePath, ext),\n },\n dataListGenerator: async function* () {\n for (const item of data.list) {\n yield item;\n }\n },\n };\n }\n}\n", "import fs from 'fs';\nimport chardet from 'chardet';\nimport { dslFormat } from './dsl.format';\nimport { RootStaticInjectOptions } from 'static-injector';\nimport { AbstractDictParse, DictParseResult } from '../../type';\nimport { PassCode } from '@cyia/mdict-reader';\nimport { basename } from 'path';\nexport class DslParseService\n extends RootStaticInjectOptions\n implements AbstractDictParse\n{\n async parse(\n filePath: string,\n options?: Partial<PassCode>,\n ): Promise<DictParseResult> {\n const instance = new DslParse(filePath);\n await instance.init();\n return {\n info: {\n ...instance.info,\n name: instance.info['NAME'],\n fileName: basename(filePath, '.dsl'),\n },\n dataListGenerator: () => instance.generate(),\n };\n }\n}\nclass DslParse {\n filePath;\n fileContent!: string;\n start = 0;\n\n info = {} as Record<string, any>;\n wordStart!: number;\n constructor(filePath: string) {\n this.filePath = filePath;\n }\n\n async init() {\n const buffer = await fs.promises.readFile(this.filePath);\n const subBuf = new Uint8Array(buffer.subarray(0, 500));\n const a = chardet.detect(subBuf);\n this.fileContent = buffer.toString((a as any) ?? 'UTF-16LE').trimStart();\n while (this.readMetadata()) {}\n this.readEntry();\n }\n\n readMetadata() {\n const regexp =\n /^#(NAME|INDEX_LANGUAGE|CONTENTS_LANGUAGE|SOUND_DICTIONARY|SOURCE_CODE_PAGE)\\s+(.+)/dgm;\n regexp.lastIndex = this.start;\n const result = regexp.exec(this.fileContent.trimStart());\n if (result) {\n try {\n this.info[result[1]] = JSON.parse(result[2]);\n } catch (error) {\n this.info[result[1]] = result[2];\n }\n this.start = result.indices![0][1];\n return true;\n }\n return false;\n }\n lastWordInfo?: { word: string; range: [number, number] };\n async *generate() {\n let result;\n while ((result = this.readEntry())) {\n yield result;\n }\n }\n readEntry() {\n const regexp = /^[^\\s]+/dgm;\n regexp.lastIndex = this.start;\n const result = regexp.exec(this.fileContent);\n\n if (result) {\n this.start = result.indices![0][1];\n const lastWordInfo = this.lastWordInfo;\n this.lastWordInfo = {\n word: result[0],\n range: result.indices![0],\n };\n if (lastWordInfo) {\n const content = this.fileContent\n .slice(lastWordInfo.range[1], this.lastWordInfo!.range[0])\n .replace(/^\\s+/gm, '');\n return {\n word: lastWordInfo.word,\n content: content,\n htmlContent: dslFormat(content),\n };\n }\n } else {\n if (this.lastWordInfo) {\n const content = this.fileContent\n .slice(this.lastWordInfo.range[1])\n .replace(/^\\s+/gm, '');\n const result = {\n word: this.lastWordInfo.word,\n content: content,\n htmlContent: dslFormat(content),\n };\n this.lastWordInfo = undefined;\n return result;\n }\n }\n return undefined;\n }\n}\n", "const blockStatusList = [\n '*',\n 'm',\n 'trn',\n 'ex',\n 'com',\n 's',\n 'url',\n '!trs',\n 'p',\n \"'\",\n 'lang',\n 'ref',\n 'sub',\n 'sup',\n];\nconst inlineStatusList = ['b', 'i', 'u', 'c'];\nconst sImageList = ['bmp', 'pcx', 'dcx', 'jpg', 'tif'];\nconst sSoundList = ['wav'];\nconst sVideoList = ['avi', 'webm', 'mp4'];\nclass NodeItem {\n start!: number;\n // attr: Record<string, any> = {};\n type!: 'inline' | 'block' | 'text';\n}\nclass InlineNode extends NodeItem {\n override type = 'inline' as const;\n attr;\n tag;\n constructor(tag: string, attr: Record<string, any>) {\n super();\n this.attr = attr;\n this.tag = tag;\n }\n}\nclass BlockNode extends NodeItem {\n override type = 'block' as const;\n fn;\n tag;\n constructor(tag: string, fn: (text: string) => string) {\n super();\n this.tag = tag;\n this.fn = fn;\n }\n}\nclass TextNode extends NodeItem {\n override type = 'text' as const;\n text: string;\n style: Record<string, any> = {};\n constructor(text: string) {\n super();\n this.text = text;\n }\n renderedText!: string;\n renderText() {\n const styleAttr = Object.entries(this.style)\n .map((item) => `${item[0]}: ${item[1]}`)\n .join(';');\n const formatedText = this.text.replace(/\\\\\\[/g, '[').replace(/\\\\]/g, ']');\n if (styleAttr.trim().length) {\n this.renderedText = `<span style=\"${styleAttr}\">${formatedText}</span>`;\n } else {\n this.renderedText = `${formatedText}`;\n }\n }\n}\nclass DslFormat {\n #inlineStatus: InlineNode[] = [];\n #blockStatus: BlockNode[] = [];\n #textNodeList: TextNode[] = [];\n #input;\n #start = 0;\n constructor(input: string) {\n this.#input = input;\n }\n run() {\n if (!this.#input) {\n return this.#input;\n }\n this.#scan();\n return this.#textNodeList.map((item) => item.renderedText).join('');\n }\n #scan() {\n let startResult: RegExpExecArray | null;\n let endResult: RegExpExecArray | null;\n const startRegexp =\n /(?<!\\\\)\\[(?!\\/)(?<tagName>m(?<leftPadding>[0-9])|[^\\]\\s]+)((\\s+)(?<attr>[^\\]]+))?]/dg;\n const endRegexp = /(?<!\\\\)\\[\\/(?<tagName>[^\\]]+)]/dg;\n\n while (true) {\n startRegexp.lastIndex = this.#start;\n endRegexp.lastIndex = this.#start;\n const currentText = this.#input;\n startResult = startRegexp.exec(currentText);\n endResult = endRegexp.exec(currentText);\n\n if (\n startResult &&\n (!endResult || startResult.indices![0][0] < endResult.indices![0][0])\n ) {\n // 匹配开始\n const tagName = startResult.groups!['tagName'];\n const matchStart = startResult.indices![0][0];\n this.#createTextNode(this.#start, matchStart);\n\n this.#createStatusNode(\n tagName.startsWith('m') ? 'm' : tagName,\n matchStart,\n startResult.groups!['leftPadding'] || startResult.groups!['attr'],\n );\n this.#start = startResult.indices![0][1];\n } else {\n if (endResult) {\n const tagName = endResult.groups!['tagName'];\n const matchStart = endResult.indices![0][0];\n this.#createTextNode(this.#start, matchStart);\n\n this.#mergeTextNode(tagName, matchStart);\n this.#removeStatus(tagName);\n this.#start = endResult.indices![0][1];\n } else {\n break;\n }\n }\n }\n }\n /** 普通的文本节点创建 */\n #createTextNode(start: number, end: number) {\n if (start < end) {\n const node = new TextNode(this.#input.slice(start, end));\n node.start = start;\n node.style = this.#inlineStatus.reduce(\n (obj, item) => ({ ...obj, ...item.attr }),\n {} as Record<string, any>,\n );\n node.renderText();\n this.#textNodeList.push(node);\n\n return node;\n }\n }\n #createStatusNode(name: string, start: number, attrStr: string) {\n if (inlineStatusList.includes(name)) {\n attrStr;\n let attr = {} as Record<string, any>;\n switch (name) {\n case 'c':\n if (attrStr) {\n attr = { color: attrStr };\n }\n break;\n case 'b':\n attr = { 'font-weight': 'bolder' };\n break;\n case 'u':\n attr = { 'text-decoration': '#f00 wavy underline' };\n break;\n case 'i':\n attr = { 'font-style': 'italic' };\n break;\n }\n\n const node = new InlineNode(name, attr);\n node.start = start;\n this.#inlineStatus.unshift(node);\n } else {\n const node = new BlockNode(name, (text) => {\n switch (name) {\n case 'm': {\n return `<div style=\"padding-left: ${attrStr}em;\">${text}</div>`;\n }\n case 'url': {\n return `<a href=\"${text}\">${text}</a>`;\n }\n case 'ref': {\n return `<a href=\"entry://${text}\">${text}</a>`;\n }\n case 'sub':\n case 'sup': {\n {\n return `<${name}>${text}</${name}>`;\n }\n }\n case 's': {\n if (sImageList.some((item) => text.endsWith(item))) {\n const baseName = text.slice(0, text.lastIndexOf('.'));\n return `<picture><source srcset=\"${text}\"><source srcset=\"${baseName}.webp\"><source srcset=\"${baseName}.jpg\"><img src=\"${text}\"></picture>`;\n } else if (sSoundList.some((item) => text.endsWith(item))) {\n return `<figure><audio controls src=\"${text}\"></audio></figure>`;\n } else if (sVideoList.some((item) => text.endsWith(item))) {\n const baseName = text.slice(0, text.lastIndexOf('.'));\n return `<video controls style=\"width:100%\"><source src=\"${text}\"/><source src=\"${baseName}.mp4\"/><source src=\"${baseName}.webm\"/></video>`;\n }\n return ``;\n }\n default: {\n return `<span>${text}</span>`;\n }\n }\n });\n node.start = start;\n this.#blockStatus.unshift(node);\n }\n }\n #mergeTextNode(name: string, end: number) {\n const blockStart = this.#blockStatus.find((item) => item.tag === name);\n if (blockStart) {\n const start = blockStart.start;\n let tempIndex = -1;\n\n // let i = this.textNodeList.length - 1;\n for (let i = this.#textNodeList.length - 1; i > -1; i--) {\n const textNode = this.#textNodeList[i];\n if (textNode.start < start) {\n break;\n } else {\n tempIndex = i;\n }\n }\n if (tempIndex !== -1) {\n const delNodeList = this.#textNodeList.slice(tempIndex);\n\n const mergeText = blockStart.fn(\n delNodeList.map((item) => item.renderedText).join(''),\n );\n\n this.#textNodeList = this.#textNodeList.slice(0, tempIndex);\n const node = new TextNode(mergeText);\n node.start = start;\n node.renderedText = node.text;\n this.#textNodeList.push(node);\n }\n }\n }\n #removeStatus(name: string) {\n if (inlineStatusList.includes(name)) {\n const index = this.#inlineStatus.findIndex((item) => item.tag === name);\n this.#inlineStatus.splice(index, 1);\n } else {\n const index = this.#blockStatus.findIndex((item) => item.tag === name);\n this.#blockStatus.splice(index, 1);\n }\n }\n}\n\nexport function dslFormat(input: string) {\n const item = new DslFormat(input);\n return item.run();\n}\n", "import { lexer } from 'marked';\nimport MS from 'magic-string';\ntype Position = [number, number, number, number];\nconst regex =\n /<!--\\s*(Image|Table)\\s*\\(\\s*(\\d+)\\s*,\\s*(\\d+)\\s*,\\s*(\\d+)\\s*,\\s*(\\d+)\\s*\\)\\s*-->/dg;\n\n//帮我实现一个js正则表达式,能够匹配文档中`<!-- Image (142, 98, 837, 856) -->`这段注释\nexport async function vlMarkdownParser(\n content: string,\n options: {\n imageGet: (\n type: string,\n position: Position,\n ) => Promise<{ src: string; title: string }>;\n },\n) {\n let mdContent = getMdLexer(content);\n let result = mdContent.matchAll(regex);\n let ms = new MS(mdContent);\n for (const item of result) {\n // qwen3 vl\n let imageData = await options.imageGet('qwen3-vl', [\n +item[2],\n +item[3],\n +item[4],\n +item[5],\n ]);\n ms.update(\n item.index,\n item.index + item[0].length,\n ``,\n );\n }\n return ms.toString();\n}\nfunction getMdLexer(content: string) {\n const list = lexer(content);\n if (\n list.length === 1 &&\n list[0].type === 'code' &&\n (list[0].lang === 'markdown' || !list[0].lang)\n ) {\n return list[0].text as string;\n }\n return content;\n}\n"],
|
|
5
5
|
"mappings": ";AAAA,SAAS,0BAA0B;AACnC,SAAS,UAAAA,SAAQ,2BAAAC,gCAA+B;AAChD,SAAS,QAAAC,aAAY;;;ACFrB,SAAS,eAAsB;AAC/B,SAAS,aAAa,MAAa;AACjC,MAAI,SAAS;AACb,MAAI,KAAK,SAAS,MAAM;AACtB;AACA,QAAI,KAAK,SAAS,WAAW;AAC3B,gBAAU;AAAA,IACZ,WAAW,KAAK,SAAS,QAAQ;AAC/B;AAAA,IACF;AAAA,EACF;AACA,SAAO,KAAK,aAAa;AAC3B;AAEO,SAAS,YAAY,QAAoB;AAC9C,SAAO,QAAQ,MAAM,EAAE,KAAK,CAAC,GAAG,MAAM,aAAa,CAAC,IAAI,aAAa,CAAC,CAAC,EAAE,CAAC,EACvE;AACL;;;ACfO,SAAS,mBACd,QACA,UACA;AACA,QAAM,OAAO,YAAY,MAAM;AAC/B,QAAMC,WAAU,IAAI,YAAY,MAAM,EAAE,OAAO,KAAK,CAAC;AACrD,MAAI;AACF,WAAOA,SAAQ,OAAO,MAAM;AAAA,EAC9B,SAAS,OAAO;AACd,UAAM,IAAI,MAAM,QAAQ,IAAI,WAAW,UAAU,QAAQ,EAAE,EAAE;AAAA,EAC/D;AACF;;;ACZA,SAAS,sBAA8B;AAWhC,IAAM,kBAAkB,IAAI,eAA6B,YAAY;AACrE,IAAM,mBAAmB,IAAI,eAUlC,kBAAkB;AACb,IAAM,4BAA4B,IAAI,eAS3C,2BAA2B;;;AChC7B,SAAS,QAAQ,UAAU,+BAA+B;AAC1D,SAAS,kBAAkB;AAC3B,SAAS,kBAAkB;AAC3B,SAAS,iBAAiB;AAC1B,SAAS,iBAAiB;AAC1B,SAAS,kBAAkB;AAC3B,SAAS,iBAAiB;AAE1B,SAAS,YAAY;AACrB,SAAS,QAAAC,aAAY;;;ACVrB,eAAsB,WAAW,QAAkC;AACjE,QAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,MAAM;AAC3C,QAAM,WAAW,KAAK,MAAM;AAC5B,SAAO,SAAS,WAAW,IAAI,CAAC,SAAS;AACvC,UAAM,YAAY,SAAS,OAAO,IAAI;AACtC,UAAM,MAAM,MAAM,aAAa,SAAS;AACxC,WAAO,EAAE,aAAa,KAAK,UAAU,EAAE,OAAO,KAAK,EAAE;AAAA,EACvD,CAAC;AACH;;;ACNA,SAAS,YAAY;AACrB,eAAsB,eACpB,QACA,UACA,UACA;AACA,MAAI,SAAS,MAAM,OAAO,iBAAiB;AAC3C,MAAI,CAAE,WAAmB,WAAW;AAClC,IAAC,WAAmB,YAAY,OAAO;AAAA,EACzC;AACA,MAAI,CAAE,WAAmB,WAAW;AAClC,IAAC,WAAmB,YAAY,OAAO;AAAA,EACzC;AACA,MAAI,CAAE,WAAmB,QAAQ;AAC/B,IAAC,WAAmB,SAAS,OAAO;AAAA,EACtC;AACA,QAAM,EAAE,YAAY,IAAI,MAAM,OAAO,YAAY;AACjD,MAAI,MAAM,MAAM,YAAY,IAAI,WAAW,MAAM,CAAC,EAAE;AACpD,MAAI,OAAO,CAAC;AACZ,MAAI,WAAW,MAAM,IAAI,YAAY;AACrC,QAAM,WAAW,KAAK,SAAS,UAAU,KAAK,QAAQ,QAAQ,CAAC;AAC/D,MAAI,QAAS,SAAS,KAAa,OAAO,KAAK;AAC/C,MAAI,cAAc,SAAS,IAAI,gBAAgB;AAC/C,MAAI,uBAAuB,SAAS,IAAI,yBAAyB;AACjE,WAAS,IAAI,GAAG,KAAK,IAAI,UAAU,KAAK;AACtC,UAAM,OAAO,MAAM,IAAI,QAAQ,CAAC;AAChC,UAAM,WAAW,KAAK,YAAY;AAAA,MAChC,OAAO,qBAAqB,EAAE,YAAY,iBAAiB,SAAS;AAAA,IACtE,CAAC;AACD,UAAM,WAAW,OAAO,aAAa,SAAS,OAAO,SAAS,MAAM;AACpE,UAAM,MAAM,SAAS,WAAW,IAAI;AACpC,UAAM,KAAK,OAAO;AAAA,MAChB,eAAe;AAAA,MACf;AAAA,MACA,QAAQ;AAAA,IACV,CAAC,EAAE;AACH,UAAM,QAAQ,SAAS,SAAS,WAAW;AAC3C,QAAI,SAAS,MAAM,YAAY,UAAU,GAAG,QAAQ,IAAI,CAAC,IAAI,KAAK;AAElE,SAAK,KAAK;AAAA,MACR,aAAa,OAAO;AAAA,MACpB,UAAU,EAAE,MAAa;AAAA,MACzB,SAAS,OAAO;AAAA,IAClB,CAAC;AAAA,EACH;AAEA,SAAO;AACT;;;AF7BO,IAAM,4BAAN,cAAwC,wBAAwB;AAAA,EACrE,kBAAkB,OAAO,2BAA2B,EAAE,UAAU,KAAK,CAAC;AAAA,EACtE,YAAY,OAAO,QAAQ;AAAA,EAE3B,MAAM,MACJ,UACA,QACA,MACqC;AACrC,QAAI;AACF,YAAM,WAAWC,MAAK,SAAS,UAAUA,MAAK,QAAQ,QAAQ,CAAC;AAC/D,YAAM,OAAO,IAAI,KAAK,CAAC,MAAM,CAAC;AAC9B,UAAI,SAAS,SAAS,MAAM,GAAG;AAC7B,cAAM,WAAW,IAAI,UAAU,IAAI;AACnC,cAAM,SAAS,MAAM,SAAS,KAAK;AACnC,eAAO,KAAK,cAAc,QAAQ,QAAQ;AAAA,MAC5C,WACE,MAAM,QAAQ,UACd,MAAM,QAAQ,SACd,MAAM,QAAQ,SACd,MAAM,QAAQ,OACd;AACA,cAAM,WAAW,IAAI,WAAW,IAAI;AACpC,cAAM,SAAS,MAAM,SAAS,KAAK;AACnC,eAAO,KAAK,cAAc,QAAQ,QAAQ;AAAA,MAC5C,WAAW,MAAM,QAAQ,OAAO;AAC9B,YAAI;AACJ,cAAM,WAAW,KAAK,kBAAkB,EAAE,YAAY;AACtD,YAAI,UAAU;AACZ,mBAAS,MAAM,eAAe,QAAQ,UAAU,KAAK,SAAS;AAAA,QAChE,OAAO;AACL,gBAAM,WAAW,IAAI,UAAU,IAAI;AACnC,mBAAS,MAAM,SAAS,KAAK;AAAA,QAC/B;AACA,eAAO,KAAK,cAAc,QAAQ,QAAQ;AAAA,MAC5C,WAAW,SAAS,SAAS,MAAM,GAAG;AACpC,cAAM,WAAW,IAAI,UAAU,IAAI;AACnC,cAAM,SAAS,MAAM,SAAS,KAAK;AACnC,eAAO,KAAK,cAAc,QAAQ,QAAQ;AAAA,MAC5C,WAAW,MAAM,QAAQ,QAAQ;AAC/B,cAAM,WAAW,IAAI,WAAW,IAAI;AACpC,cAAM,SAAS,MAAM,SAAS,KAAK;AACnC,eAAO,KAAK,cAAc,QAAQ,QAAQ;AAAA,MAC5C,WAAW,MAAM,QAAQ,QAAQ;AAC/B,cAAM,SAAS,MAAM,WAAW,MAAM;AACtC,eAAO,KAAK,cAAc,QAAQ,QAAQ;AAAA,MAC5C,WAAW,MAAM,QAAQ,QAAQ;AAC/B,cAAM,WAAW,IAAI,WAAW,QAAQ;AACxC,cAAM,SAAS,MAAM,SAAS,KAAK;AACnC,eAAO,OACJ,OAAO,CAAC,SAAS,CAAC,CAAC,KAAK,WAAW,EACnC,IAAI,CAAC,MAAM,OAAO;AAAA,UACjB,OAAO,KAAK,SAAS,SAAS,KAAK,QAAQ,IAAI,CAAC;AAAA,UAChD,SAAS,KAAK;AAAA,QAChB,EAAE;AAAA,MACN,OAAO;AACL;AAAA,MACF;AAAA,IACF,SAAS,OAAO;AACd,YAAM,IAAI;AAAA,QACR,OAAO,QAAQ,YAAY,KAAK,UAAU,IAAI,KAAK,EAAE;AAAA,QACrD;AAAA,UACE,OAAO;AAAA,QACT;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,cAAc,MAAoB,OAAe;AAC/C,QAAI,KAAK,WAAW,GAAG;AACrB,aAAO;AAAA,QACL;AAAA,UACE,OAAO,KAAK,CAAC,EAAE,WAAW,OAAO,KAAK;AAAA,UACtC,SAAS,KAAK,CAAC,EAAE,YAAY,KAAK;AAAA,UAClC,SAAS,KAAK,CAAC,EAAE;AAAA,QACnB;AAAA,MACF;AAAA,IACF;AACA,WAAO,KACJ,IAAI,CAAC,UAAU,EAAE,GAAG,MAAM,aAAa,KAAK,aAAa,KAAK,EAAE,EAAE,EAClE,OAAO,CAAC,SAAS,CAAC,CAAC,KAAK,WAAW,EACnC,IAAI,CAAC,MAAM,WAAW;AAAA,MACrB,OAAO,KAAK,WAAW,OAAO,KAAK,GAAG,KAAK,IAAI,KAAK;AAAA,MACpD,SAAS,KAAK;AAAA,MACd,SAAS,KAAK;AAAA,IAChB,EAAE;AAAA,EACN;AACF;;;AJjGO,IAAM,oBAAN,cACGC,yBAEV;AAAA,EACE,WAAmB;AAAA,EACnB,cAAcC,QAAO,iBAAiB,EAAE,UAAU,KAAK,CAAC,GACpD,MAAM,EACP,KAAK,CAAC,GAAG,MAAM,EAAE,WAAW,EAAE,QAAQ,KAAK;AAAA,IAC5CA,QAAO,yBAAyB;AAAA,EAClC;AAAA,EACA,MAAM,MAAM,UAAkB,QAAkC;AAC9D,UAAM,WAAWC,MAAK,SAAS,UAAUA,MAAK,QAAQ,QAAQ,CAAC;AAC/D,QAAI;AACJ,QAAI;AACF,aAAO,MAAM,mBAAmB,MAAM;AAAA,IACxC,SAAS,OAAO;AACd,YAAM,IAAI,MAAM,OAAO,QAAQ,WAAW;AAAA,QACxC,OAAO;AAAA,MACT,CAAC;AAAA,IACH;AACA,eAAW,QAAQ,KAAK,aAAa;AACnC,YAAM,SAAS,MAAM,KAAK,MAAM,UAAU,QAAQ,IAAI;AACtD,UAAI,QAAQ;AACV,eAAO;AAAA,MACT;AAAA,IACF;AAEA,WAAO;AAAA,MACL,EAAE,OAAO,UAAU,SAAS,mBAAU,IAAI,WAAW,MAAM,CAAC,EAAE;AAAA,IAChE;AAAA,EACF;AAAA;AAAA,EAGA,SAAS,UAAkB,QAAkC;AAC3D,WAAO,KAAK,MAAM,UAAU,MAAM,EAAE,KAAK,CAAC,SAAS;AACjD,aAAO;AAAA,QACL,SAAS,MAAM,IAAI,CAAC,SAAS,KAAK,OAAO,EAAE,KAAK,IAAI;AAAA,QACpD,SAAS,KAAK,CAAC,EAAE;AAAA,MACnB;AAAA,IACF,CAAC;AAAA,EACH;AACF;;;AOlDA,YAAY,QAAQ;AACpB,YAAY,UAAU;AACtB,YAAY,UAAU;AACtB,SAAS,2BAAAC,gCAA+B;AACxC,OAAO,gBAAgB;AAEvB,OAAO,qBAAqB;AAC5B,SAAS,cAAc;AACvB,SAAS,UAAU;AACnB,SAAS,QAAAC,aAAY;AACrB,SAAS,cAAc;AAEvB,IAAM,UAAU,IAAS,iBAAY,OAAO;AACrC,IAAM,uBAAN,cACGD,yBAEV;AAAA,EACE,MAAM,MAAM,cAAsB;AAChC,UAAM,WAAW;AACjB,QAAI;AACJ,QAAI;AACJ,QAAI;AACJ,QAAI,SAAS,SAAS,SAAS,GAAG;AAChC,YAAM,MAAMC,MAAK,KAAK,OAAO,GAAG,GAAG,CAAC;AACpC,YAAMC,QAAO,MAAM,WAAW,UAAU,KAAK;AAAA,QAC3C,SAAS,CAAC,gBAAgB,CAAC;AAAA,MAC7B,CAAC;AACD,iBAAW,QAAQA,OAAM;AACvB,YAAI,KAAK,KAAK,SAAS,KAAK,GAAG;AAC7B,4BAAkB,KAAK,KAAK,SAAS;AAAA,QACvC,WAAW,KAAK,KAAK,SAAS,KAAK,GAAG;AACpC,4BAAkB,KAAK;AAAA,QACzB,WAAW,KAAK,KAAK,SAAS,SAAS,GAAG;AACxC,2BAAiB,KAAK;AAAA,QACxB;AAAA,MACF;AACA,UAAI,CAAC,mBAAmB,CAAC,mBAAmB,CAAC,gBAAgB;AAC3D,cAAM,IAAI,MAAM,kBAAkB,GAAG,EAAE;AAAA,MACzC;AAAA,IACF,OAAO;AACL,YAAM,WAAWD,MAAK,SAAS,QAAQ,EAAE,QAAQ,oBAAoB,EAAE;AAEvE,yBACE,MAAS;AAAA,QACPA,MAAK,QAAQA,MAAK,QAAQ,QAAQ,GAAG,GAAG,QAAQ,MAAM;AAAA,MACxD,GACA,SAAS;AACX,wBAAkB,MAAS;AAAA,QACzBA,MAAK,QAAQA,MAAK,QAAQ,QAAQ,GAAG,GAAG,QAAQ,MAAM;AAAA,MACxD;AACA,uBAAiB,MAAS;AAAA,QACxBA,MAAK,QAAQA,MAAK,QAAQ,QAAQ,GAAG,GAAG,QAAQ,UAAU;AAAA,MAC5D;AAAA,IACF;AAQA,UAAM,CAAC,MAAM,IAAI,IAAI,MAAM,QAAQ,IAAI;AAAA,MACrC,KAAK,SAAS,eAAe,EAAE,KAAK,CAAC,QAAQ;AAC3C,YAAI,MAAM,MAAM,IAAI,UAAU;AAC9B,eAAO;AAAA,MACT,CAAC;AAAA,MACD,KAAK,UAAU,eAAe;AAAA,IAChC,CAAC;AACD,WAAO;AAAA,MACL;AAAA,MACA,mBAAmB,MAAM,KAAK,SAAS,gBAAgB,IAAI;AAAA,IAC7D;AAAA,EACF;AAAA,EAEA,MAAM,SAAS,SAAiB;AAC9B,UAAM,OAAO,QACV,MAAM,iBAAiB,EACvB,OAAO,OAAO,EACd,IAAI,CAAC,SAAS,KAAK,MAAM,GAAG,EAAE,OAAO,OAAO,CAAC,EAC7C,OAAO,CAACC,UAASA,MAAK,WAAW,CAAC;AACrC,WAAO,KAAK;AAAA,MACV,CAAC,KAAK,SAAS;AACb,YAAI,KAAK,CAAC,CAAC,IAAI,KAAK,CAAC;AACrB,eAAO;AAAA,MACT;AAAA,MACA,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EACA,MAAM,UAAU,QAAgB;AAC9B,UAAM,YAAwC,CAAC;AAC/C,QAAI,QAAQ;AACZ,WAAO,QAAQ,OAAO,QAAQ;AAC5B,YAAM,MAAM;AACZ,cAAQ,OAAO,QAAQ,MAAQ,GAAG;AAClC,UAAI,OAAO,OAAO,SAAS,SAAS,KAAK,KAAK;AAC9C,UAAI,KAAK,SAAS,IAAI,GAAG;AACvB,eAAO,OAAO,IAAI;AAAA,MACpB;AACA;AACA,YAAM,SAAS,OAAO,aAAa,KAAK;AACxC,eAAS;AACT,YAAM,OAAO,OAAO,aAAa,KAAK;AACtC,eAAS;AACT,gBAAU,KAAK,CAAC,MAAM,QAAQ,IAAI,CAAC;AAAA,IACrC;AACA,WAAO;AAAA,EACT;AAAA,EACA,OAAO,SACL,QACA,WACA;AACA,UAAM,UAAU,IAAI,WAAW,MAAM;AACrC,UAAM,WAAgB,gBAAW,OAAO;AACxC,UAAM,YAAY,IAAI,WAAW,QAAQ;AACzC,eAAW,CAAC,MAAM,QAAQ,IAAI,KAAK,WAAW;AAC5C,YAAM,QAAQ,UAAU,MAAM,QAAQ,SAAS,IAAI;AACnD,YAAM,WAAW,QAAQ,OAAO,KAAK,KAAK,IAAI,KAAK;AACnD,UAAI,CAAC,SAAS;AACZ;AAAA,MACF;AACA,YAAM;AAAA,QACJ;AAAA,QACA,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;;;AC9HA,SAAS,aAAuB;AAChC,SAAS,kBAAkB;AAC3B,SAAS,2BAAAC,gCAA+B;AACxC,SAAS,QAAAC,OAAM,0BAA0B;AAClC,IAAM,oBAAN,cACGD,yBAEV;AAAA,EACE,MAAM,MAAM,cAAsB,SAAoB;AACpD,QAAI;AACJ,QAAI,SAAS,WAAW,QAAQ,QAAQ;AACtC,iBAAW;AAAA,IACb;AACA,UAAM,WAAW;AACjB,UAAM,WAAWC,MAAK,SAAS,UAAUA,MAAK,QAAQ,QAAQ,CAAC;AAC/D,UAAM,cAAcA,MAAK,QAAQA,MAAK,QAAQ,QAAQ,GAAG,GAAG,QAAQ,MAAM;AAC1E,UAAM,cAAc,MAAM,MAAM,MAAM,aAAa,QAAQ;AAC3D,UAAM,cAAcA,MAAK,QAAQA,MAAK,QAAQ,QAAQ,GAAG,GAAG,QAAQ,MAAM;AAC1E,UAAM,WAAW,YAAY,YAAY,EAAE;AAC3C,WAAO;AAAA;AAAA,MAEL,MAAM;AAAA,QACJ,MACE,SAAS,UAAU,iCACf,WACA,SAAS;AAAA,QACf;AAAA,MACF;AAAA,MACA,mBAAmB,MAAM,KAAK,kBAAkB,WAAW;AAAA,MAC3D,WAAW,OAAO,gBAAwB;AAGxC,YAAI,WACF,CAAC;AACH,YAAI,MAAM;AACV,cAAM,eAAe,YAAY;AAC/B,gBAAM,UAAU,MAAM,QAAQ,IAAI,QAAQ,GAAG,OAAO,OAAO;AAC3D,cAAI,OAAO,QAAQ;AACjB,kBAAM,IAAI;AAAA,cACR,OACG,IAAI,CAAC,SAAS,MAAM,KAAM,IAAI,SAAS,MAAM,OAAO,EAAE,EACtD,KAAK,IAAI;AAAA,YACd;AAAA,UACF,OAAO;AACL,mBAAO,SAAS;AAAA,UAElB;AACA,qBAAW,CAAC;AAAA,QACd;AACA,YAAI,WAAW,WAAW,GAAG;AAC3B,gBAAM,MAAM,mBAAmB,EAAE,KAAK,YAAY,CAAC;AACnD,gBAAM,cAAc,MAAM,MAAM,MAAM,aAAa,QAAQ;AAC3D,gBAAM,gBAAgB,YAAY,KAAK;AACvC,2BAAiB,QAAQ,eAAe;AACtC,qBAAS;AAAA,cACP,YAAY,YAAY,IAAI,EAAE;AAAA,gBAAK,CAAC,WAClC,IACG,UAAUA,MAAK,KAAK,aAAa,KAAK,IAAI,GAAG,MAAM,EACnD,KAAK,MAAM,MAAS,EACpB,MAAM,CAAC,YAAiB;AAAA,kBACvB,MAAM,KAAK;AAAA,kBACX,SAAS;AAAA,gBACX,EAAE;AAAA,cACN;AAAA,YACF;AAEA,gBAAI,SAAS,UAAU,IAAI;AACzB,oBAAM,aAAa;AAAA,YACrB;AAAA,UACF;AACA,cAAI,SAAS,QAAQ;AACnB,kBAAM,aAAa;AAAA,UACrB;AAAA,QAEF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,OAAO,kBAAkB,aAAoB;AAC3C,UAAM,OAAO,YAAY,KAAK;AAC9B,qBAAiB,QAAQ,MAAM;AAC7B,YAAM,cAAe,MAAM,YAAY,cAAc,IAAI,KAAM,IAAI,KAAK;AACxE,UAAI,CAAC,YAAY;AACf;AAAA,MACF;AACA,YAAM;AAAA,QACJ,SAAS;AAAA,QACT,MAAM,KAAK;AAAA,MACb;AAAA,IACF;AAAA,EACF;AACF;;;ACxFA,SAAS,UAAAC,SAAQ,YAAAC,WAAU,2BAAAC,gCAA+B;AAE1D,OAAOC,SAAQ;AAEf,SAAS,QAAAC,aAAY;;;ACTrB,SAAS,QAAAC,aAAY;AAErB,YAAYC,SAAQ;AACpB,SAAS,aAAa;AACtB,SAAS,2BAAAC,gCAA+B;AACxC,YAAY,OAAO;AACZ,IAAM,aAAe,SAAO;AAAA,EACjC,MAAQ;AAAA,IACJ,SAAO;AAAA,MACP,MAAQ,SAAO;AAAA,MACf,SAAW,SAAO;AAAA,MAClB,OAAS,WAAW,SAAS,SAAO,GAAK,MAAI,CAAC,CAAC;AAAA,IACjD,CAAC;AAAA,EACH;AACF,CAAC;AACM,IAAM,uBAAN,cACGA,yBAEV;AAAA,EACE,MAAM,MAAM,cAAsB;AAChC,UAAM,WAAW;AACjB,UAAM,MAAMF,MAAK,QAAQ,QAAQ;AACjC,UAAM,UAAU,MAAS,aAAS,UAAU,EAAE,UAAU,QAAQ,CAAC;AACjE,UAAM,OAAS,QAAM,YAAY,MAAM,OAAO,CAAC;AAC/C,WAAO;AAAA,MACL,MAAM;AAAA,QACJ,UAAUA,MAAK,SAAS,QAAQ;AAAA,QAChC,MAAMA,MAAK,SAAS,UAAU,GAAG;AAAA,MACnC;AAAA,MACA,mBAAmB,mBAAmB;AACpC,mBAAW,QAAQ,KAAK,MAAM;AAC5B,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;;;ADzBA,SAAS,gBAAgB;;;AEXzB,OAAOG,SAAQ;AACf,OAAO,aAAa;;;ACepB,IAAM,mBAAmB,CAAC,KAAK,KAAK,KAAK,GAAG;AAC5C,IAAM,aAAa,CAAC,OAAO,OAAO,OAAO,OAAO,KAAK;AACrD,IAAM,aAAa,CAAC,KAAK;AACzB,IAAM,aAAa,CAAC,OAAO,QAAQ,KAAK;AACxC,IAAM,WAAN,MAAe;AAAA,EACb;AAAA;AAAA,EAEA;AACF;AACA,IAAM,aAAN,cAAyB,SAAS;AAAA,EACvB,OAAO;AAAA,EAChB;AAAA,EACA;AAAA,EACA,YAAY,KAAa,MAA2B;AAClD,UAAM;AACN,SAAK,OAAO;AACZ,SAAK,MAAM;AAAA,EACb;AACF;AACA,IAAM,YAAN,cAAwB,SAAS;AAAA,EACtB,OAAO;AAAA,EAChB;AAAA,EACA;AAAA,EACA,YAAY,KAAa,IAA8B;AACrD,UAAM;AACN,SAAK,MAAM;AACX,SAAK,KAAK;AAAA,EACZ;AACF;AACA,IAAM,WAAN,cAAuB,SAAS;AAAA,EACrB,OAAO;AAAA,EAChB;AAAA,EACA,QAA6B,CAAC;AAAA,EAC9B,YAAY,MAAc;AACxB,UAAM;AACN,SAAK,OAAO;AAAA,EACd;AAAA,EACA;AAAA,EACA,aAAa;AACX,UAAM,YAAY,OAAO,QAAQ,KAAK,KAAK,EACxC,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,EAAE,EACtC,KAAK,GAAG;AACX,UAAM,eAAe,KAAK,KAAK,QAAQ,SAAS,GAAG,EAAE,QAAQ,QAAQ,GAAG;AACxE,QAAI,UAAU,KAAK,EAAE,QAAQ;AAC3B,WAAK,eAAe,gBAAgB,SAAS,KAAK,YAAY;AAAA,IAChE,OAAO;AACL,WAAK,eAAe,GAAG,YAAY;AAAA,IACrC;AAAA,EACF;AACF;AACA,IAAM,YAAN,MAAgB;AAAA,EACd,gBAA8B,CAAC;AAAA,EAC/B,eAA4B,CAAC;AAAA,EAC7B,gBAA4B,CAAC;AAAA,EAC7B;AAAA,EACA,SAAS;AAAA,EACT,YAAY,OAAe;AACzB,SAAK,SAAS;AAAA,EAChB;AAAA,EACA,MAAM;AACJ,QAAI,CAAC,KAAK,QAAQ;AAChB,aAAO,KAAK;AAAA,IACd;AACA,SAAK,MAAM;AACX,WAAO,KAAK,cAAc,IAAI,CAAC,SAAS,KAAK,YAAY,EAAE,KAAK,EAAE;AAAA,EACpE;AAAA,EACA,QAAQ;AACN,QAAI;AACJ,QAAI;AACJ,UAAM,cACJ;AACF,UAAM,YAAY;AAElB,WAAO,MAAM;AACX,kBAAY,YAAY,KAAK;AAC7B,gBAAU,YAAY,KAAK;AAC3B,YAAM,cAAc,KAAK;AACzB,oBAAc,YAAY,KAAK,WAAW;AAC1C,kBAAY,UAAU,KAAK,WAAW;AAEtC,UACE,gBACC,CAAC,aAAa,YAAY,QAAS,CAAC,EAAE,CAAC,IAAI,UAAU,QAAS,CAAC,EAAE,CAAC,IACnE;AAEA,cAAM,UAAU,YAAY,OAAQ,SAAS;AAC7C,cAAM,aAAa,YAAY,QAAS,CAAC,EAAE,CAAC;AAC5C,aAAK,gBAAgB,KAAK,QAAQ,UAAU;AAE5C,aAAK;AAAA,UACH,QAAQ,WAAW,GAAG,IAAI,MAAM;AAAA,UAChC;AAAA,UACA,YAAY,OAAQ,aAAa,KAAK,YAAY,OAAQ,MAAM;AAAA,QAClE;AACA,aAAK,SAAS,YAAY,QAAS,CAAC,EAAE,CAAC;AAAA,MACzC,OAAO;AACL,YAAI,WAAW;AACb,gBAAM,UAAU,UAAU,OAAQ,SAAS;AAC3C,gBAAM,aAAa,UAAU,QAAS,CAAC,EAAE,CAAC;AAC1C,eAAK,gBAAgB,KAAK,QAAQ,UAAU;AAE5C,eAAK,eAAe,SAAS,UAAU;AACvC,eAAK,cAAc,OAAO;AAC1B,eAAK,SAAS,UAAU,QAAS,CAAC,EAAE,CAAC;AAAA,QACvC,OAAO;AACL;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAEA,gBAAgB,OAAe,KAAa;AAC1C,QAAI,QAAQ,KAAK;AACf,YAAM,OAAO,IAAI,SAAS,KAAK,OAAO,MAAM,OAAO,GAAG,CAAC;AACvD,WAAK,QAAQ;AACb,WAAK,QAAQ,KAAK,cAAc;AAAA,QAC9B,CAAC,KAAK,UAAU,EAAE,GAAG,KAAK,GAAG,KAAK,KAAK;AAAA,QACvC,CAAC;AAAA,MACH;AACA,WAAK,WAAW;AAChB,WAAK,cAAc,KAAK,IAAI;AAE5B,aAAO;AAAA,IACT;AAAA,EACF;AAAA,EACA,kBAAkB,MAAc,OAAe,SAAiB;AAC9D,QAAI,iBAAiB,SAAS,IAAI,GAAG;AACnC;AACA,UAAI,OAAO,CAAC;AACZ,cAAQ,MAAM;AAAA,QACZ,KAAK;AACH,cAAI,SAAS;AACX,mBAAO,EAAE,OAAO,QAAQ;AAAA,UAC1B;AACA;AAAA,QACF,KAAK;AACH,iBAAO,EAAE,eAAe,SAAS;AACjC;AAAA,QACF,KAAK;AACH,iBAAO,EAAE,mBAAmB,sBAAsB;AAClD;AAAA,QACF,KAAK;AACH,iBAAO,EAAE,cAAc,SAAS;AAChC;AAAA,MACJ;AAEA,YAAM,OAAO,IAAI,WAAW,MAAM,IAAI;AACtC,WAAK,QAAQ;AACb,WAAK,cAAc,QAAQ,IAAI;AAAA,IACjC,OAAO;AACL,YAAM,OAAO,IAAI,UAAU,MAAM,CAAC,SAAS;AACzC,gBAAQ,MAAM;AAAA,UACZ,KAAK,KAAK;AACR,mBAAO,6BAA6B,OAAO,QAAQ,IAAI;AAAA,UACzD;AAAA,UACA,KAAK,OAAO;AACV,mBAAO,YAAY,IAAI,KAAK,IAAI;AAAA,UAClC;AAAA,UACA,KAAK,OAAO;AACV,mBAAO,oBAAoB,IAAI,KAAK,IAAI;AAAA,UAC1C;AAAA,UACA,KAAK;AAAA,UACL,KAAK,OAAO;AACV;AACE,qBAAO,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI;AAAA,YAClC;AAAA,UACF;AAAA,UACA,KAAK,KAAK;AACR,gBAAI,WAAW,KAAK,CAAC,SAAS,KAAK,SAAS,IAAI,CAAC,GAAG;AAClD,oBAAM,WAAW,KAAK,MAAM,GAAG,KAAK,YAAY,GAAG,CAAC;AACpD,qBAAO,4BAA4B,IAAI,qBAAqB,QAAQ,0BAA0B,QAAQ,mBAAmB,IAAI;AAAA,YAC/H,WAAW,WAAW,KAAK,CAAC,SAAS,KAAK,SAAS,IAAI,CAAC,GAAG;AACzD,qBAAO,gCAAgC,IAAI;AAAA,YAC7C,WAAW,WAAW,KAAK,CAAC,SAAS,KAAK,SAAS,IAAI,CAAC,GAAG;AACzD,oBAAM,WAAW,KAAK,MAAM,GAAG,KAAK,YAAY,GAAG,CAAC;AACpD,qBAAO,mDAAmD,IAAI,mBAAmB,QAAQ,uBAAuB,QAAQ;AAAA,YAC1H;AACA,mBAAO;AAAA,UACT;AAAA,UACA,SAAS;AACP,mBAAO,SAAS,IAAI;AAAA,UACtB;AAAA,QACF;AAAA,MACF,CAAC;AACD,WAAK,QAAQ;AACb,WAAK,aAAa,QAAQ,IAAI;AAAA,IAChC;AAAA,EACF;AAAA,EACA,eAAe,MAAc,KAAa;AACxC,UAAM,aAAa,KAAK,aAAa,KAAK,CAAC,SAAS,KAAK,QAAQ,IAAI;AACrE,QAAI,YAAY;AACd,YAAM,QAAQ,WAAW;AACzB,UAAI,YAAY;AAGhB,eAAS,IAAI,KAAK,cAAc,SAAS,GAAG,IAAI,IAAI,KAAK;AACvD,cAAM,WAAW,KAAK,cAAc,CAAC;AACrC,YAAI,SAAS,QAAQ,OAAO;AAC1B;AAAA,QACF,OAAO;AACL,sBAAY;AAAA,QACd;AAAA,MACF;AACA,UAAI,cAAc,IAAI;AACpB,cAAM,cAAc,KAAK,cAAc,MAAM,SAAS;AAEtD,cAAM,YAAY,WAAW;AAAA,UAC3B,YAAY,IAAI,CAAC,SAAS,KAAK,YAAY,EAAE,KAAK,EAAE;AAAA,QACtD;AAEA,aAAK,gBAAgB,KAAK,cAAc,MAAM,GAAG,SAAS;AAC1D,cAAM,OAAO,IAAI,SAAS,SAAS;AACnC,aAAK,QAAQ;AACb,aAAK,eAAe,KAAK;AACzB,aAAK,cAAc,KAAK,IAAI;AAAA,MAC9B;AAAA,IACF;AAAA,EACF;AAAA,EACA,cAAc,MAAc;AAC1B,QAAI,iBAAiB,SAAS,IAAI,GAAG;AACnC,YAAM,QAAQ,KAAK,cAAc,UAAU,CAAC,SAAS,KAAK,QAAQ,IAAI;AACtE,WAAK,cAAc,OAAO,OAAO,CAAC;AAAA,IACpC,OAAO;AACL,YAAM,QAAQ,KAAK,aAAa,UAAU,CAAC,SAAS,KAAK,QAAQ,IAAI;AACrE,WAAK,aAAa,OAAO,OAAO,CAAC;AAAA,IACnC;AAAA,EACF;AACF;AAEO,SAAS,UAAU,OAAe;AACvC,QAAM,OAAO,IAAI,UAAU,KAAK;AAChC,SAAO,KAAK,IAAI;AAClB;;;ADrPA,SAAS,2BAAAC,gCAA+B;AAGxC,SAAS,gBAAgB;AAClB,IAAM,kBAAN,cACGA,yBAEV;AAAA,EACE,MAAM,MACJ,UACA,SAC0B;AAC1B,UAAM,WAAW,IAAI,SAAS,QAAQ;AACtC,UAAM,SAAS,KAAK;AACpB,WAAO;AAAA,MACL,MAAM;AAAA,QACJ,GAAG,SAAS;AAAA,QACZ,MAAM,SAAS,KAAK,MAAM;AAAA,QAC1B,UAAU,SAAS,UAAU,MAAM;AAAA,MACrC;AAAA,MACA,mBAAmB,MAAM,SAAS,SAAS;AAAA,IAC7C;AAAA,EACF;AACF;AACA,IAAM,WAAN,MAAe;AAAA,EACb;AAAA,EACA;AAAA,EACA,QAAQ;AAAA,EAER,OAAO,CAAC;AAAA,EACR;AAAA,EACA,YAAY,UAAkB;AAC5B,SAAK,WAAW;AAAA,EAClB;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,SAAS,MAAMC,IAAG,SAAS,SAAS,KAAK,QAAQ;AACvD,UAAM,SAAS,IAAI,WAAW,OAAO,SAAS,GAAG,GAAG,CAAC;AACrD,UAAM,IAAI,QAAQ,OAAO,MAAM;AAC/B,SAAK,cAAc,OAAO,SAAU,KAAa,UAAU,EAAE,UAAU;AACvE,WAAO,KAAK,aAAa,GAAG;AAAA,IAAC;AAC7B,SAAK,UAAU;AAAA,EACjB;AAAA,EAEA,eAAe;AACb,UAAM,SACJ;AACF,WAAO,YAAY,KAAK;AACxB,UAAM,SAAS,OAAO,KAAK,KAAK,YAAY,UAAU,CAAC;AACvD,QAAI,QAAQ;AACV,UAAI;AACF,aAAK,KAAK,OAAO,CAAC,CAAC,IAAI,KAAK,MAAM,OAAO,CAAC,CAAC;AAAA,MAC7C,SAAS,OAAO;AACd,aAAK,KAAK,OAAO,CAAC,CAAC,IAAI,OAAO,CAAC;AAAA,MACjC;AACA,WAAK,QAAQ,OAAO,QAAS,CAAC,EAAE,CAAC;AACjC,aAAO;AAAA,IACT;AACA,WAAO;AAAA,EACT;AAAA,EACA;AAAA,EACA,OAAO,WAAW;AAChB,QAAI;AACJ,WAAQ,SAAS,KAAK,UAAU,GAAI;AAClC,YAAM;AAAA,IACR;AAAA,EACF;AAAA,EACA,YAAY;AACV,UAAM,SAAS;AACf,WAAO,YAAY,KAAK;AACxB,UAAM,SAAS,OAAO,KAAK,KAAK,WAAW;AAE3C,QAAI,QAAQ;AACV,WAAK,QAAQ,OAAO,QAAS,CAAC,EAAE,CAAC;AACjC,YAAM,eAAe,KAAK;AAC1B,WAAK,eAAe;AAAA,QAClB,MAAM,OAAO,CAAC;AAAA,QACd,OAAO,OAAO,QAAS,CAAC;AAAA,MAC1B;AACA,UAAI,cAAc;AAChB,cAAM,UAAU,KAAK,YAClB,MAAM,aAAa,MAAM,CAAC,GAAG,KAAK,aAAc,MAAM,CAAC,CAAC,EACxD,QAAQ,UAAU,EAAE;AACvB,eAAO;AAAA,UACL,MAAM,aAAa;AAAA,UACnB;AAAA,UACA,aAAa,UAAU,OAAO;AAAA,QAChC;AAAA,MACF;AAAA,IACF,OAAO;AACL,UAAI,KAAK,cAAc;AACrB,cAAM,UAAU,KAAK,YAClB,MAAM,KAAK,aAAa,MAAM,CAAC,CAAC,EAChC,QAAQ,UAAU,EAAE;AACvB,cAAMC,UAAS;AAAA,UACb,MAAM,KAAK,aAAa;AAAA,UACxB;AAAA,UACA,aAAa,UAAU,OAAO;AAAA,QAChC;AACA,aAAK,eAAe;AACpB,eAAOA;AAAA,MACT;AAAA,IACF;AACA,WAAO;AAAA,EACT;AACF;;;AF9FO,IAAM,cAAN,cAA0BC,yBAAwB;AAAA,EACvD,YAAYC,QAAOC,SAAQ;AAAA,EAC3B,SAAS,IAAI,SAAkC;AAAA,IAC7C,KAAK;AAAA,IACL,KAAK;AAAA,EACP,CAAC;AAAA,EACD,MAAM,YAAY,OAAkB;AAClC,UAAM,OAAO,MAAM,KAAK,gBAAgB,KAAK;AAC7C,WAAO,KAAK,KAAK,QAAQ,KAAK,KAAK;AAAA,EACrC;AAAA,EACA,MAAM,gBAAgB,OAAkB;AACtC,QAAI,KAAK,OAAO,IAAI,MAAM,QAAQ,GAAG;AACnC,aAAO,KAAK,OAAO,IAAI,MAAM,QAAQ;AAAA,IACvC;AACA,UAAM,EAAE,UAAU,KAAK,IAAI;AAC3B,QAAI;AACJ,QAAI,SAAS,YAAY;AACvB,eAAS,MAAM,KAAK,UAAU,IAAI,oBAAoB,EAAE,MAAM,QAAQ;AAAA,IACxE,WAAW,SAAS,SAAS;AAC3B,eAAS,MAAM,KAAK,UACjB,IAAI,iBAAiB,EACrB,MAAM,UAAU,KAAY;AAAA,IACjC,WAAW,SAAS,OAAO;AACzB,eAAS,MAAM,KAAK,UAAU,IAAI,eAAe,EAAE,MAAM,QAAQ;AAAA,IACnE,WAAW,SAAS,QAAQ;AAC1B,eAAS,MAAM,KAAK,UAAU,IAAI,oBAAoB,EAAE,MAAM,QAAQ;AAAA,IACxE,OAAO;AACL,YAAM,IAAI,MAAM,OAAO,IAAI,SAAS;AAAA,IACtC;AACA,SAAK,OAAO,IAAI,MAAM,UAAU,MAAM;AACtC,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,WAAW,MAAc,KAAa,OAAkB;AAC5D,UAAM,WAAWC,MAAK;AAAA,MACpB,MAAO;AAAA,MACPA,MAAK,QAAQ,MAAO,QAAQ;AAAA,IAC9B;AAEA,UAAM,SAAS,MAAM,KAAK,gBAAgB,KAAK;AAE/C,aAAS,OAAO,KAAK,QAAQ;AAI7B,UAAM,cAAcA,MAAK,KAAK,KAAK,QAAQ;AAE3C,QAAI,OAAO,WAAW;AACpB,YAAMC,IAAG,SAAS,MAAM,aAAa;AAAA,QACnC,WAAW;AAAA,MACb,CAAC;AAED,YAAM,OAAO,UAAU,WAAY;AAAA,IACrC;AAEA,WAAO,OAAO,kBAAkB;AAAA,EAClC;AACF;;;AI1EA,SAAS,aAAa;AACtB,OAAO,QAAQ;AAEf,IAAM,QACJ;AAGF,eAAsB,iBACpB,SACA,SAMA;AACA,MAAI,YAAY,WAAW,OAAO;AAClC,MAAI,SAAS,UAAU,SAAS,KAAK;AACrC,MAAI,KAAK,IAAI,GAAG,SAAS;AACzB,aAAW,QAAQ,QAAQ;AAEzB,QAAI,YAAY,MAAM,QAAQ,SAAS,YAAY;AAAA,MACjD,CAAC,KAAK,CAAC;AAAA,MACP,CAAC,KAAK,CAAC;AAAA,MACP,CAAC,KAAK,CAAC;AAAA,MACP,CAAC,KAAK,CAAC;AAAA,IACT,CAAC;AACD,OAAG;AAAA,MACD,KAAK;AAAA,MACL,KAAK,QAAQ,KAAK,CAAC,EAAE;AAAA,MACrB,KAAK,UAAU,KAAK,KAAK,UAAU,GAAG;AAAA,IACxC;AAAA,EACF;AACA,SAAO,GAAG,SAAS;AACrB;AACA,SAAS,WAAW,SAAiB;AACnC,QAAM,OAAO,MAAM,OAAO;AAC1B,MACE,KAAK,WAAW,KAChB,KAAK,CAAC,EAAE,SAAS,WAChB,KAAK,CAAC,EAAE,SAAS,cAAc,CAAC,KAAK,CAAC,EAAE,OACzC;AACA,WAAO,KAAK,CAAC,EAAE;AAAA,EACjB;AACA,SAAO;AACT;",
|
|
6
6
|
"names": ["inject", "RootStaticInjectOptions", "path", "decoder", "path", "path", "RootStaticInjectOptions", "inject", "path", "RootStaticInjectOptions", "path", "list", "RootStaticInjectOptions", "path", "inject", "Injector", "RootStaticInjectOptions", "fs", "path", "path", "fs", "RootStaticInjectOptions", "fs", "RootStaticInjectOptions", "fs", "result", "RootStaticInjectOptions", "inject", "Injector", "path", "fs"]
|
|
7
7
|
}
|
package/image.mjs.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../packages/image/convert.ts", "../packages/image/extract.ts", "../packages/image/image-metadata.ts"],
|
|
4
|
-
"sourcesContent": ["import * as v from 'valibot';\nimport * as fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { fileTypeFromBuffer } from 'file-type';\nimport sharp from 'sharp';\nimport heicdecode from 'heic-decode';\nimport { decode } from 'bmp-js';\n\nconst BASE64_HEAD_REPLACE_REG = /^data:image\\/[\\w]+;base64,/;\n\nconst InputDefine = v.union([\n v.pipe(\n v.string(),\n v.check((input) => BASE64_HEAD_REPLACE_REG.test(input)),\n v.transform((base64) => {\n const result = base64.match(BASE64_HEAD_REPLACE_REG)!;\n return new Uint8Array(\n Buffer.from(base64.slice(result[0].length), 'base64'),\n );\n }),\n ),\n v.pipe(\n v.string(),\n v.transform((filePath) =>\n fs\n .readFile(path.normalize(filePath))\n .then((buffer) => new Uint8Array(buffer)),\n ),\n ),\n v.pipe(v.custom<Uint8Array>((input) => input instanceof Uint8Array)),\n]);\nexport async function decodeToBuffer(input: string | Uint8Array) {\n const buffer = await v.parse(InputDefine, input);\n return buffer;\n}\n/**\n * ocr处理用\n * 支持路径,base64,uint8array\n */\nexport async function convertToRaw(input: string | Uint8Array) {\n const buffer = await decodeToBuffer(input);\n const type = await fileTypeFromBuffer(buffer);\n if (!type) {\n throw new Error(`不支持的图片类型`);\n }\n if (type.mime === 'image/bmp') {\n const data = decode(Buffer.from(buffer));\n const resolvedBuffer = data.data;\n //ABGR =>RGBA\n for (let i = 0; i < resolvedBuffer.length; i += 4) {\n const alpha = resolvedBuffer[i];\n const blue = resolvedBuffer[i + 1];\n const green = resolvedBuffer[i + 2];\n const red = resolvedBuffer[i + 3];\n resolvedBuffer[i] = red;\n resolvedBuffer[i + 1] = green;\n resolvedBuffer[i + 2] = blue;\n resolvedBuffer[i + 3] = (data as any).is_with_alpha ? alpha : 0xff;\n }\n const result = sharp(resolvedBuffer, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n }).ensureAlpha(1);\n return { type: 'image/png', raw: result };\n } else if (type?.mime === 'image/heic' || type?.mime === 'image/heif') {\n const data = await heicdecode({\n buffer: buffer as any,\n });\n const result = sharp(data.data, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n });\n return { type: 'image/png', raw: result };\n } else {\n const result = sharp(buffer);\n return { type: type.mime, raw: result };\n }\n}\n// todo 未来其实应该直接是Buffer转通道颜色\n/**\n * 转换为兼容的图片格式\n */\nexport async function convertToCompatibleBuffer(input: string | Uint8Array) {\n const result2 = await convertToRaw(input);\n\n return {\n type: result2.type,\n buffer: new Uint8Array(await result2.raw.png().toBuffer()),\n };\n}\n\nexport function bufferToImageBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return `data:${input.type};base64,${Buffer.from(input.buffer).toString('base64')}`;\n}\nexport function bufferToFileBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return Buffer.from(input.buffer).toString('base64');\n}\n", "import sharp from 'sharp';\
|
|
4
|
+
"sourcesContent": ["import * as v from 'valibot';\nimport * as fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { fileTypeFromBuffer } from 'file-type';\nimport sharp from 'sharp';\nimport heicdecode from 'heic-decode';\nimport { decode } from 'bmp-js';\n\nconst BASE64_HEAD_REPLACE_REG = /^data:image\\/[\\w]+;base64,/;\n\nconst InputDefine = v.union([\n v.pipe(\n v.string(),\n v.check((input) => BASE64_HEAD_REPLACE_REG.test(input)),\n v.transform((base64) => {\n const result = base64.match(BASE64_HEAD_REPLACE_REG)!;\n return new Uint8Array(\n Buffer.from(base64.slice(result[0].length), 'base64'),\n );\n }),\n ),\n v.pipe(\n v.string(),\n v.transform((filePath) =>\n fs\n .readFile(path.normalize(filePath))\n .then((buffer) => new Uint8Array(buffer)),\n ),\n ),\n v.pipe(v.custom<Uint8Array>((input) => input instanceof Uint8Array)),\n]);\nexport async function decodeToBuffer(input: string | Uint8Array) {\n const buffer = await v.parse(InputDefine, input);\n return buffer;\n}\n/**\n * ocr处理用\n * 支持路径,base64,uint8array\n */\nexport async function convertToRaw(input: string | Uint8Array) {\n const buffer = await decodeToBuffer(input);\n const type = await fileTypeFromBuffer(buffer);\n if (!type) {\n throw new Error(`不支持的图片类型`);\n }\n if (type.mime === 'image/bmp') {\n const data = decode(Buffer.from(buffer));\n const resolvedBuffer = data.data;\n //ABGR =>RGBA\n for (let i = 0; i < resolvedBuffer.length; i += 4) {\n const alpha = resolvedBuffer[i];\n const blue = resolvedBuffer[i + 1];\n const green = resolvedBuffer[i + 2];\n const red = resolvedBuffer[i + 3];\n resolvedBuffer[i] = red;\n resolvedBuffer[i + 1] = green;\n resolvedBuffer[i + 2] = blue;\n resolvedBuffer[i + 3] = (data as any).is_with_alpha ? alpha : 0xff;\n }\n const result = sharp(resolvedBuffer, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n }).ensureAlpha(1);\n return { type: 'image/png', raw: result };\n } else if (type?.mime === 'image/heic' || type?.mime === 'image/heif') {\n const data = await heicdecode({\n buffer: buffer as any,\n });\n const result = sharp(data.data, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n });\n return { type: 'image/png', raw: result };\n } else {\n const result = sharp(buffer);\n return { type: type.mime, raw: result };\n }\n}\n// todo 未来其实应该直接是Buffer转通道颜色\n/**\n * 转换为兼容的图片格式\n */\nexport async function convertToCompatibleBuffer(input: string | Uint8Array) {\n const result2 = await convertToRaw(input);\n\n return {\n type: result2.type,\n buffer: new Uint8Array(await result2.raw.png().toBuffer()),\n };\n}\n\nexport function bufferToImageBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return `data:${input.type};base64,${Buffer.from(input.buffer).toString('base64')}`;\n}\nexport function bufferToFileBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return Buffer.from(input.buffer).toString('base64');\n}\n", "import sharp from 'sharp';\nimport { getImageMetadata } from './image-metadata';\nexport async function imageExtract(\n buffer: Buffer,\n position: sharp.Region,\n padding: number = 0,\n) {\n let metadata =await getImageMetadata(buffer);\n let left = Math.min(\n Math.max(Math.round(position.left - padding), 0),\n metadata.width,\n );\n let top = Math.min(\n Math.max(Math.round(position.top - padding), 0),\n metadata.height,\n );\n return sharp(buffer)\n .extract({\n left,\n top,\n width: Math.min(\n Math.max(Math.round(position.width + padding * 2), 0),\n metadata.width - left,\n ),\n height: Math.min(\n Math.max(Math.round(position.height + padding * 2), 0),\n metadata.height - top,\n ),\n })\n .toBuffer();\n}\n", "import sharp from 'sharp';\n\nexport function getImageMetadata(buffer: Buffer) {\n let instance = sharp(buffer);\n return instance.metadata();\n}\n"],
|
|
5
5
|
"mappings": ";AAAA,YAAY,OAAO;AACnB,YAAY,QAAQ;AACpB,SAAS,YAAY;AACrB,SAAS,0BAA0B;AACnC,OAAO,WAAW;AAClB,OAAO,gBAAgB;AACvB,SAAS,cAAc;AAEvB,IAAM,0BAA0B;AAEhC,IAAM,cAAgB,QAAM;AAAA,EACxB;AAAA,IACE,SAAO;AAAA,IACP,QAAM,CAAC,UAAU,wBAAwB,KAAK,KAAK,CAAC;AAAA,IACpD,YAAU,CAAC,WAAW;AACtB,YAAM,SAAS,OAAO,MAAM,uBAAuB;AACnD,aAAO,IAAI;AAAA,QACT,OAAO,KAAK,OAAO,MAAM,OAAO,CAAC,EAAE,MAAM,GAAG,QAAQ;AAAA,MACtD;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EACE;AAAA,IACE,SAAO;AAAA,IACP;AAAA,MAAU,CAAC,aAER,YAAS,KAAK,UAAU,QAAQ,CAAC,EACjC,KAAK,CAAC,WAAW,IAAI,WAAW,MAAM,CAAC;AAAA,IAC5C;AAAA,EACF;AAAA,EACE,OAAO,SAAmB,CAAC,UAAU,iBAAiB,UAAU,CAAC;AACrE,CAAC;AACD,eAAsB,eAAe,OAA4B;AAC/D,QAAM,SAAS,MAAQ,QAAM,aAAa,KAAK;AAC/C,SAAO;AACT;AAKA,eAAsB,aAAa,OAA4B;AAC7D,QAAM,SAAS,MAAM,eAAe,KAAK;AACzC,QAAM,OAAO,MAAM,mBAAmB,MAAM;AAC5C,MAAI,CAAC,MAAM;AACT,UAAM,IAAI,MAAM,UAAU;AAAA,EAC5B;AACA,MAAI,KAAK,SAAS,aAAa;AAC7B,UAAM,OAAO,OAAO,OAAO,KAAK,MAAM,CAAC;AACvC,UAAM,iBAAiB,KAAK;AAE5B,aAAS,IAAI,GAAG,IAAI,eAAe,QAAQ,KAAK,GAAG;AACjD,YAAM,QAAQ,eAAe,CAAC;AAC9B,YAAM,OAAO,eAAe,IAAI,CAAC;AACjC,YAAM,QAAQ,eAAe,IAAI,CAAC;AAClC,YAAM,MAAM,eAAe,IAAI,CAAC;AAChC,qBAAe,CAAC,IAAI;AACpB,qBAAe,IAAI,CAAC,IAAI;AACxB,qBAAe,IAAI,CAAC,IAAI;AACxB,qBAAe,IAAI,CAAC,IAAK,KAAa,gBAAgB,QAAQ;AAAA,IAChE;AACA,UAAM,SAAS,MAAM,gBAAgB;AAAA,MACnC,KAAK;AAAA,QACH,OAAO,KAAK;AAAA,QACZ,QAAQ,KAAK;AAAA,QACb,UAAU;AAAA,MACZ;AAAA,IACF,CAAC,EAAE,YAAY,CAAC;AAChB,WAAO,EAAE,MAAM,aAAa,KAAK,OAAO;AAAA,EAC1C,WAAW,MAAM,SAAS,gBAAgB,MAAM,SAAS,cAAc;AACrE,UAAM,OAAO,MAAM,WAAW;AAAA,MAC5B;AAAA,IACF,CAAC;AACD,UAAM,SAAS,MAAM,KAAK,MAAM;AAAA,MAC9B,KAAK;AAAA,QACH,OAAO,KAAK;AAAA,QACZ,QAAQ,KAAK;AAAA,QACb,UAAU;AAAA,MACZ;AAAA,IACF,CAAC;AACD,WAAO,EAAE,MAAM,aAAa,KAAK,OAAO;AAAA,EAC1C,OAAO;AACL,UAAM,SAAS,MAAM,MAAM;AAC3B,WAAO,EAAE,MAAM,KAAK,MAAM,KAAK,OAAO;AAAA,EACxC;AACF;AAKA,eAAsB,0BAA0B,OAA4B;AAC1E,QAAM,UAAU,MAAM,aAAa,KAAK;AAExC,SAAO;AAAA,IACL,MAAM,QAAQ;AAAA,IACd,QAAQ,IAAI,WAAW,MAAM,QAAQ,IAAI,IAAI,EAAE,SAAS,CAAC;AAAA,EAC3D;AACF;AAEO,SAAS,oBAAoB,OAGjC;AACD,SAAO,QAAQ,MAAM,IAAI,WAAW,OAAO,KAAK,MAAM,MAAM,EAAE,SAAS,QAAQ,CAAC;AAClF;AACO,SAAS,mBAAmB,OAGhC;AACD,SAAO,OAAO,KAAK,MAAM,MAAM,EAAE,SAAS,QAAQ;AACpD;;;AC5GA,OAAOA,YAAW;;;ACAlB,OAAOC,YAAW;AAEX,SAAS,iBAAiB,QAAgB;AAC/C,MAAI,WAAWA,OAAM,MAAM;AAC3B,SAAO,SAAS,SAAS;AAC3B;;;ADHA,eAAsB,aACpB,QACA,UACA,UAAkB,GAClB;AACA,MAAI,WAAU,MAAM,iBAAiB,MAAM;AAC3C,MAAI,OAAO,KAAK;AAAA,IACd,KAAK,IAAI,KAAK,MAAM,SAAS,OAAO,OAAO,GAAG,CAAC;AAAA,IAC/C,SAAS;AAAA,EACX;AACA,MAAI,MAAM,KAAK;AAAA,IACb,KAAK,IAAI,KAAK,MAAM,SAAS,MAAM,OAAO,GAAG,CAAC;AAAA,IAC9C,SAAS;AAAA,EACX;AACA,SAAOC,OAAM,MAAM,EAChB,QAAQ;AAAA,IACP;AAAA,IACA;AAAA,IACA,OAAO,KAAK;AAAA,MACV,KAAK,IAAI,KAAK,MAAM,SAAS,QAAQ,UAAU,CAAC,GAAG,CAAC;AAAA,MACpD,SAAS,QAAQ;AAAA,IACnB;AAAA,IACA,QAAQ,KAAK;AAAA,MACX,KAAK,IAAI,KAAK,MAAM,SAAS,SAAS,UAAU,CAAC,GAAG,CAAC;AAAA,MACrD,SAAS,SAAS;AAAA,IACpB;AAAA,EACF,CAAC,EACA,SAAS;AACd;",
|
|
6
6
|
"names": ["sharp", "sharp", "sharp"]
|
|
7
7
|
}
|
package/ocr.mjs
CHANGED
|
@@ -242,106 +242,106 @@ var ModelConfig = [
|
|
|
242
242
|
{
|
|
243
243
|
label: "简体中文",
|
|
244
244
|
key: "ch_mobile",
|
|
245
|
-
det: "det/ch_PP-
|
|
246
|
-
rec: "rec/ch_PP-
|
|
247
|
-
dict: "rec/ch_PP-
|
|
248
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
245
|
+
det: "det/ch_PP-OCRv4_det_mobile.onnx",
|
|
246
|
+
rec: "rec/ch_PP-OCRv4_rec_mobile.onnx",
|
|
247
|
+
dict: "rec/ch_PP-OCRv4_rec_mobile/ppocr_keys_v1.txt",
|
|
248
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
249
249
|
},
|
|
250
250
|
{
|
|
251
251
|
label: "简体中文(服务器)",
|
|
252
252
|
key: "ch_server",
|
|
253
|
-
det: "det/ch_PP-
|
|
254
|
-
rec: "rec/ch_PP-
|
|
255
|
-
dict: "rec/ch_PP-
|
|
256
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
253
|
+
det: "det/ch_PP-OCRv4_det_server.onnx",
|
|
254
|
+
rec: "rec/ch_PP-OCRv4_rec_server.onnx",
|
|
255
|
+
dict: "rec/ch_PP-OCRv4_rec_server/ppocr_keys_v1.txt",
|
|
256
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
257
257
|
},
|
|
258
258
|
{
|
|
259
259
|
label: "繁體中文",
|
|
260
260
|
key: "chinese_cht",
|
|
261
|
-
det: "det/ch_PP-
|
|
262
|
-
rec: "rec/chinese_cht_PP-
|
|
263
|
-
dict: "rec/chinese_cht_PP-
|
|
264
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
261
|
+
det: "det/ch_PP-OCRv4_det_server.onnx",
|
|
262
|
+
rec: "rec/chinese_cht_PP-OCRv3_rec_mobile.onnx",
|
|
263
|
+
dict: "rec/chinese_cht_PP-OCRv3_rec_mobile/chinese_cht_dict.txt",
|
|
264
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
265
265
|
},
|
|
266
266
|
{
|
|
267
267
|
label: "英文",
|
|
268
268
|
key: "en_mobile",
|
|
269
|
-
det: "det/en_PP-
|
|
270
|
-
rec: "rec/en_PP-
|
|
271
|
-
dict: "rec/en_PP-
|
|
272
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
269
|
+
det: "det/en_PP-OCRv3_det_mobile.onnx",
|
|
270
|
+
rec: "rec/en_PP-OCRv4_rec_mobile.onnx",
|
|
271
|
+
dict: "rec/en_PP-OCRv4_rec_mobile/en_dict.txt",
|
|
272
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
273
273
|
},
|
|
274
274
|
{
|
|
275
275
|
label: "阿拉伯文",
|
|
276
276
|
key: "ar_mobile",
|
|
277
|
-
det: "det/
|
|
278
|
-
rec: "rec/arabic_PP-
|
|
279
|
-
dict: "rec/arabic_PP-
|
|
280
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
277
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
278
|
+
rec: "rec/arabic_PP-OCRv4_rec_mobile.onnx",
|
|
279
|
+
dict: "rec/arabic_PP-OCRv4_rec_mobile/arabic_dict.txt",
|
|
280
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
281
281
|
},
|
|
282
282
|
{
|
|
283
283
|
label: "塞尔维亚文",
|
|
284
284
|
key: "cyrillic_mobile",
|
|
285
|
-
det: "det/
|
|
286
|
-
rec: "rec/cyrillic_PP-
|
|
287
|
-
dict: "rec/cyrillic_PP-
|
|
288
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
285
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
286
|
+
rec: "rec/cyrillic_PP-OCRv3_rec_mobile.onnx",
|
|
287
|
+
dict: "rec/cyrillic_PP-OCRv3_rec_mobile/cyrillic_dict.txt",
|
|
288
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
289
289
|
},
|
|
290
290
|
{
|
|
291
291
|
label: "梵文",
|
|
292
292
|
key: "devanagari_mobile",
|
|
293
|
-
det: "det/
|
|
294
|
-
rec: "rec/devanagari_PP-
|
|
295
|
-
dict: "rec/devanagari_PP-
|
|
296
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
293
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
294
|
+
rec: "rec/devanagari_PP-OCRv4_rec_mobile.onnx",
|
|
295
|
+
dict: "rec/devanagari_PP-OCRv4_rec_mobile/devanagari_dict.txt",
|
|
296
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
297
297
|
},
|
|
298
298
|
{
|
|
299
299
|
label: "日文",
|
|
300
300
|
key: "japan_mobile",
|
|
301
|
-
det: "det/
|
|
302
|
-
rec: "rec/japan_PP-
|
|
303
|
-
dict: "rec/japan_PP-
|
|
304
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
301
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
302
|
+
rec: "rec/japan_PP-OCRv4_rec_mobile.onnx",
|
|
303
|
+
dict: "rec/japan_PP-OCRv4_rec_mobile/japan_dict.txt",
|
|
304
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
305
305
|
},
|
|
306
306
|
{
|
|
307
307
|
label: "卡纳达语",
|
|
308
308
|
key: "ka_mobile",
|
|
309
|
-
det: "det/
|
|
310
|
-
rec: "rec/ka_PP-
|
|
311
|
-
dict: "rec/ka_PP-
|
|
312
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
309
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
310
|
+
rec: "rec/ka_PP-OCRv4_rec_mobile.onnx",
|
|
311
|
+
dict: "rec/ka_PP-OCRv4_rec_mobile/ka_dict.txt",
|
|
312
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
313
313
|
},
|
|
314
314
|
{
|
|
315
315
|
label: "韩文",
|
|
316
316
|
key: "korean_mobile",
|
|
317
|
-
det: "det/
|
|
318
|
-
rec: "rec/korean_PP-
|
|
319
|
-
dict: "rec/korean_PP-
|
|
320
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
317
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
318
|
+
rec: "rec/korean_PP-OCRv4_rec_mobile.onnx",
|
|
319
|
+
dict: "rec/korean_PP-OCRv4_rec_mobile/korean_dict.txt",
|
|
320
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
321
321
|
},
|
|
322
322
|
{
|
|
323
323
|
label: "拉丁文",
|
|
324
324
|
key: "latin_mobile",
|
|
325
|
-
det: "det/
|
|
326
|
-
rec: "rec/latin_PP-
|
|
327
|
-
dict: "rec/latin_PP-
|
|
328
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
325
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
326
|
+
rec: "rec/latin_PP-OCRv3_rec_mobile.onnx",
|
|
327
|
+
dict: "rec/latin_PP-OCRv3_rec_mobile/latin_dict.txt",
|
|
328
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
329
329
|
},
|
|
330
330
|
{
|
|
331
331
|
label: "泰米尔文",
|
|
332
332
|
key: "ta_mobile",
|
|
333
|
-
det: "det/
|
|
334
|
-
rec: "rec/ta_PP-
|
|
335
|
-
dict: "rec/ta_PP-
|
|
336
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
333
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
334
|
+
rec: "rec/ta_PP-OCRv4_rec_mobile.onnx",
|
|
335
|
+
dict: "rec/ta_PP-OCRv4_rec_mobile/ta_dict.txt",
|
|
336
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
337
337
|
},
|
|
338
338
|
{
|
|
339
339
|
label: "泰卢固文",
|
|
340
340
|
key: "te_mobile",
|
|
341
|
-
det: "det/
|
|
342
|
-
rec: "rec/te_PP-
|
|
343
|
-
dict: "rec/te_PP-
|
|
344
|
-
cls: "cls/ch_ppocr_mobile_v2.
|
|
341
|
+
det: "det/multi_PP-OCRv3_det_mobile.onnx",
|
|
342
|
+
rec: "rec/te_PP-OCRv4_rec_mobile.onnx",
|
|
343
|
+
dict: "rec/te_PP-OCRv4_rec_mobile/te_dict.txt",
|
|
344
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx"
|
|
345
345
|
}
|
|
346
346
|
];
|
|
347
347
|
export {
|
package/ocr.mjs.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../packages/ocr/ocr.ts", "../packages/ocr/ImageRaw.ts", "../packages/ocr/FileUtils.ts", "../packages/image/convert.ts", "../packages/image/extract.ts", "../packages/image/image-metadata.ts", "../packages/ocr/model-config.ts"],
|
|
4
|
-
"sourcesContent": ["import BaseOcr, {\n ModelCreateOptions,\n registerBackend,\n} from '@gutenye/ocr-common';\nimport { splitIntoLineImages } from '@gutenye/ocr-common/splitIntoLineImages';\nimport { ImageRaw } from './ImageRaw';\nimport { FileUtils } from './FileUtils';\nimport { InferenceSession } from 'onnxruntime-node';\nimport fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { convertToRaw } from '../image';\nimport * as v from 'valibot';\nconst ImageAdjustDefine = v.object({\n padding: v.pipe(\n v.optional(\n v.union([\n v.pipe(\n v.number(),\n v.transform((item) => ({\n top: item,\n left: item,\n right: item,\n bottom: item,\n })),\n ),\n v.object({\n left: v.number(),\n right: v.number(),\n top: v.number(),\n bottom: v.number(),\n }),\n ]),\n { top: 50, right: 50, left: 50, bottom: 50 },\n ),\n ),\n maxSideLen: v.optional(v.union([v.pipe(v.number())]), 1920),\n // threshold: v.optional(v.union([v.pipe(v.number())]), 0.3),\n});\nexport type ImageAdjustType = v.InferInput<typeof ImageAdjustDefine>;\nregisterBackend({\n FileUtils,\n ImageRaw,\n InferenceSession,\n splitIntoLineImages,\n defaultModels: undefined,\n});\n\nasync function convert(\n this: BaseOcr,\n input: string | Uint8Array,\n options: ImageAdjustType = {},\n) {\n const resolveOptions = v.parse(ImageAdjustDefine, options);\n //100 80 0.8\n // 50 40\n let { raw } = await convertToRaw(input);\n const metadata = await raw.metadata();\n const maxSize = Math.max(metadata.width!, metadata.height!);\n if (maxSize > resolveOptions.maxSideLen) {\n let ratio = metadata.width! / metadata.height!;\n ratio = ratio > 1 ? 1 / ratio : ratio;\n raw = raw.resize({\n width: Math.round(\n maxSize === metadata.width!\n ? resolveOptions.maxSideLen\n : ratio * resolveOptions.maxSideLen,\n ),\n height: Math.round(\n maxSize === metadata.height!\n ? resolveOptions.maxSideLen\n : ratio * resolveOptions.maxSideLen,\n ),\n });\n }\n raw = raw.extend({ ...resolveOptions.padding, background: '#fff' });\n raw = raw.ensureAlpha(1);\n return this.detect(raw as any);\n}\nexport class Ocr extends BaseOcr {\n static override async create(options: ModelCreateOptions = {}) {\n const ocr = await BaseOcr.create(options);\n if (options.debugOutputDir) {\n await fs.mkdir(path.normalize(options.debugOutputDir), {\n recursive: true,\n });\n }\n (ocr as any).convert = convert.bind(ocr);\n return ocr as BaseOcr & { convert: typeof convert };\n }\n}\n", "import filePath from 'node:path';\nimport { ImageRawBase } from '@gutenye/ocr-common';\nimport type { ImageRawData, LineImage, SizeOption } from '@gutenye/ocr-common';\nimport sharp from 'sharp';\nexport class ImageRaw extends ImageRawBase {\n #sharp!: sharp.Sharp;\n\n static async open(path: string): Promise<ImageRaw> {\n // let { raw } = await convertToRaw(path);\n return new ImageRaw(await toImageRaw(path as any));\n }\n\n constructor(imageRawData: ImageRawData) {\n super(imageRawData);\n this.#sharp = toSharp(imageRawData);\n }\n\n async write(path: string) {\n const ext = filePath.extname(path).slice(1);\n return this.#sharp.toFormat(ext as keyof sharp.FormatEnum).toFile(path);\n }\n\n async resize(size: SizeOption) {\n return this.#apply(\n this.#sharp.resize({\n width: size.width,\n height: size.height,\n fit: 'contain',\n }),\n );\n }\n\n async drawBox(lineImages: LineImage[]) {\n const svg = `\n <svg width=\"${this.width}\" height=\"${this.height}\">\n ${lineImages\n .map((lineImage) => {\n const [p1, p2, p3, p4] = lineImage.box;\n return `<polygon points=\"${p1[0]},${p1[1]} ${p2[0]},${p2[1]} ${p3[0]},${p3[1]} ${p4[0]},${p4[1]}\" fill=\"none\" stroke=\"red\" />`;\n })\n .join('\\n')}\n </svg>\n `;\n return this.#apply(\n this.#sharp.composite([{ input: Buffer.from(svg), left: 0, top: 0 }]),\n );\n }\n\n async #apply(sharp: sharp.Sharp) {\n this.#sharp = sharp;\n const result = await toImageRaw(sharp);\n this.data = result.data;\n this.width = result.width;\n this.height = result.height;\n return this;\n }\n}\n\nasync function toImageRaw(sharp: sharp.Sharp) {\n const result = await sharp.raw().toBuffer({ resolveWithObject: true });\n return {\n data: result.data,\n width: result.info.width,\n height: result.info.height,\n };\n}\nfunction toSharp(imageRawData: ImageRawData) {\n return sharp(imageRawData.data, {\n raw: {\n width: imageRawData.width,\n height: imageRawData.height,\n channels: 4,\n },\n });\n}\n", "import fs from 'node:fs/promises';\nimport { FileUtilsBase } from '@gutenye/ocr-common';\nimport { path } from '@cyia/vfs2';\nexport class FileUtils extends FileUtilsBase {\n static override async read(filePath: string) {\n return await fs.readFile(path.normalize(filePath), 'utf8');\n }\n}\n", "import * as v from 'valibot';\nimport * as fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { fileTypeFromBuffer } from 'file-type';\nimport sharp from 'sharp';\nimport heicdecode from 'heic-decode';\nimport { decode } from 'bmp-js';\n\nconst BASE64_HEAD_REPLACE_REG = /^data:image\\/[\\w]+;base64,/;\n\nconst InputDefine = v.union([\n v.pipe(\n v.string(),\n v.check((input) => BASE64_HEAD_REPLACE_REG.test(input)),\n v.transform((base64) => {\n const result = base64.match(BASE64_HEAD_REPLACE_REG)!;\n return new Uint8Array(\n Buffer.from(base64.slice(result[0].length), 'base64'),\n );\n }),\n ),\n v.pipe(\n v.string(),\n v.transform((filePath) =>\n fs\n .readFile(path.normalize(filePath))\n .then((buffer) => new Uint8Array(buffer)),\n ),\n ),\n v.pipe(v.custom<Uint8Array>((input) => input instanceof Uint8Array)),\n]);\nexport async function decodeToBuffer(input: string | Uint8Array) {\n const buffer = await v.parse(InputDefine, input);\n return buffer;\n}\n/**\n * ocr处理用\n * 支持路径,base64,uint8array\n */\nexport async function convertToRaw(input: string | Uint8Array) {\n const buffer = await decodeToBuffer(input);\n const type = await fileTypeFromBuffer(buffer);\n if (!type) {\n throw new Error(`不支持的图片类型`);\n }\n if (type.mime === 'image/bmp') {\n const data = decode(Buffer.from(buffer));\n const resolvedBuffer = data.data;\n //ABGR =>RGBA\n for (let i = 0; i < resolvedBuffer.length; i += 4) {\n const alpha = resolvedBuffer[i];\n const blue = resolvedBuffer[i + 1];\n const green = resolvedBuffer[i + 2];\n const red = resolvedBuffer[i + 3];\n resolvedBuffer[i] = red;\n resolvedBuffer[i + 1] = green;\n resolvedBuffer[i + 2] = blue;\n resolvedBuffer[i + 3] = (data as any).is_with_alpha ? alpha : 0xff;\n }\n const result = sharp(resolvedBuffer, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n }).ensureAlpha(1);\n return { type: 'image/png', raw: result };\n } else if (type?.mime === 'image/heic' || type?.mime === 'image/heif') {\n const data = await heicdecode({\n buffer: buffer as any,\n });\n const result = sharp(data.data, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n });\n return { type: 'image/png', raw: result };\n } else {\n const result = sharp(buffer);\n return { type: type.mime, raw: result };\n }\n}\n// todo 未来其实应该直接是Buffer转通道颜色\n/**\n * 转换为兼容的图片格式\n */\nexport async function convertToCompatibleBuffer(input: string | Uint8Array) {\n const result2 = await convertToRaw(input);\n\n return {\n type: result2.type,\n buffer: new Uint8Array(await result2.raw.png().toBuffer()),\n };\n}\n\nexport function bufferToImageBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return `data:${input.type};base64,${Buffer.from(input.buffer).toString('base64')}`;\n}\nexport function bufferToFileBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return Buffer.from(input.buffer).toString('base64');\n}\n", "import sharp from 'sharp';\r\nimport { getImageMetadata } from './image-metadata';\r\nexport async function imageExtract(\r\n buffer: Buffer,\r\n position: sharp.Region,\r\n padding: number = 0,\r\n) {\r\n let metadata =await getImageMetadata(buffer);\r\n let left = Math.min(\r\n Math.max(Math.round(position.left - padding), 0),\r\n metadata.width,\r\n );\r\n let top = Math.min(\r\n Math.max(Math.round(position.top - padding), 0),\r\n metadata.height,\r\n );\r\n return sharp(buffer)\r\n .extract({\r\n left,\r\n top,\r\n width: Math.min(\r\n Math.max(Math.round(position.width + padding * 2), 0),\r\n metadata.width - left,\r\n ),\r\n height: Math.min(\r\n Math.max(Math.round(position.height + padding * 2), 0),\r\n metadata.height - top,\r\n ),\r\n })\r\n .toBuffer();\r\n}\r\n", "import sharp from 'sharp';\r\n\r\nexport function getImageMetadata(buffer: Buffer) {\r\n let instance = sharp(buffer);\r\n return instance.metadata();\r\n}\r\n", "export const ModelConfig = [\n {\n label: '简体中文',\n key: 'ch_mobile',\n det: 'det/ch_PP-OCRv4_det_infer.onnx',\n rec: 'rec/ch_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/ch_PP-OCRv4_rec_infer/ppocr_keys_v1.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '简体中文(服务器)',\n key: 'ch_server',\n det: 'det/ch_PP-OCRv4_det_server_infer.onnx',\n rec: 'rec/ch_PP-OCRv4_rec_server_infer.onnx',\n dict: 'rec/ch_PP-OCRv4_rec_server_infer/ppocr_keys_v1.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '繁體中文',\n key: 'chinese_cht',\n det: 'det/ch_PP-OCRv4_det_infer.onnx',\n rec: 'rec/chinese_cht_PP-OCRv3_rec_infer.onnx',\n dict: 'rec/chinese_cht_PP-OCRv3_rec_infer/chinese_cht_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '英文',\n key: 'en_mobile',\n det: 'det/en_PP-OCRv3_det_infer.onnx',\n rec: 'rec/en_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/en_PP-OCRv4_rec_infer/en_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '阿拉伯文',\n key: 'ar_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/arabic_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/arabic_PP-OCRv4_rec_infer/arabic_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '塞尔维亚文',\n key: 'cyrillic_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/cyrillic_PP-OCRv3_rec_infer.onnx',\n dict: 'rec/cyrillic_PP-OCRv3_rec_infer/cyrillic_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '梵文',\n key: 'devanagari_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/devanagari_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/devanagari_PP-OCRv4_rec_infer/devanagari_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '日文',\n key: 'japan_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/japan_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/japan_PP-OCRv4_rec_infer/japan_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '卡纳达语',\n key: 'ka_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/ka_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/ka_PP-OCRv4_rec_infer/ka_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '韩文',\n key: 'korean_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/korean_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/korean_PP-OCRv4_rec_infer/korean_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '拉丁文',\n key: 'latin_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/latin_PP-OCRv3_rec_infer.onnx',\n dict: 'rec/latin_PP-OCRv3_rec_infer/latin_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '泰米尔文',\n key: 'ta_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/ta_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/ta_PP-OCRv4_rec_infer/ta_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '泰卢固文',\n key: 'te_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/te_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/te_PP-OCRv4_rec_infer/te_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n];\n"],
|
|
4
|
+
"sourcesContent": ["import BaseOcr, {\n ModelCreateOptions,\n registerBackend,\n} from '@gutenye/ocr-common';\nimport { splitIntoLineImages } from '@gutenye/ocr-common/splitIntoLineImages';\nimport { ImageRaw } from './ImageRaw';\nimport { FileUtils } from './FileUtils';\nimport { InferenceSession } from 'onnxruntime-node';\nimport fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { convertToRaw } from '../image';\nimport * as v from 'valibot';\nconst ImageAdjustDefine = v.object({\n padding: v.pipe(\n v.optional(\n v.union([\n v.pipe(\n v.number(),\n v.transform((item) => ({\n top: item,\n left: item,\n right: item,\n bottom: item,\n })),\n ),\n v.object({\n left: v.number(),\n right: v.number(),\n top: v.number(),\n bottom: v.number(),\n }),\n ]),\n { top: 50, right: 50, left: 50, bottom: 50 },\n ),\n ),\n maxSideLen: v.optional(v.union([v.pipe(v.number())]), 1920),\n // threshold: v.optional(v.union([v.pipe(v.number())]), 0.3),\n});\nexport type ImageAdjustType = v.InferInput<typeof ImageAdjustDefine>;\nregisterBackend({\n FileUtils,\n ImageRaw,\n InferenceSession,\n splitIntoLineImages,\n defaultModels: undefined,\n});\n\nasync function convert(\n this: BaseOcr,\n input: string | Uint8Array,\n options: ImageAdjustType = {},\n) {\n const resolveOptions = v.parse(ImageAdjustDefine, options);\n //100 80 0.8\n // 50 40\n let { raw } = await convertToRaw(input);\n const metadata = await raw.metadata();\n const maxSize = Math.max(metadata.width!, metadata.height!);\n if (maxSize > resolveOptions.maxSideLen) {\n let ratio = metadata.width! / metadata.height!;\n ratio = ratio > 1 ? 1 / ratio : ratio;\n raw = raw.resize({\n width: Math.round(\n maxSize === metadata.width!\n ? resolveOptions.maxSideLen\n : ratio * resolveOptions.maxSideLen,\n ),\n height: Math.round(\n maxSize === metadata.height!\n ? resolveOptions.maxSideLen\n : ratio * resolveOptions.maxSideLen,\n ),\n });\n }\n raw = raw.extend({ ...resolveOptions.padding, background: '#fff' });\n raw = raw.ensureAlpha(1);\n return this.detect(raw as any);\n}\nexport class Ocr extends BaseOcr {\n static override async create(options: ModelCreateOptions = {}) {\n const ocr = await BaseOcr.create(options);\n if (options.debugOutputDir) {\n await fs.mkdir(path.normalize(options.debugOutputDir), {\n recursive: true,\n });\n }\n (ocr as any).convert = convert.bind(ocr);\n return ocr as BaseOcr & { convert: typeof convert };\n }\n}\n", "import filePath from 'node:path';\nimport { ImageRawBase } from '@gutenye/ocr-common';\nimport type { ImageRawData, LineImage, SizeOption } from '@gutenye/ocr-common';\nimport sharp from 'sharp';\nexport class ImageRaw extends ImageRawBase {\n #sharp!: sharp.Sharp;\n\n static async open(path: string): Promise<ImageRaw> {\n // let { raw } = await convertToRaw(path);\n return new ImageRaw(await toImageRaw(path as any));\n }\n\n constructor(imageRawData: ImageRawData) {\n super(imageRawData);\n this.#sharp = toSharp(imageRawData);\n }\n\n async write(path: string) {\n const ext = filePath.extname(path).slice(1);\n return this.#sharp.toFormat(ext as keyof sharp.FormatEnum).toFile(path);\n }\n\n async resize(size: SizeOption) {\n return this.#apply(\n this.#sharp.resize({\n width: size.width,\n height: size.height,\n fit: 'contain',\n }),\n );\n }\n\n async drawBox(lineImages: LineImage[]) {\n const svg = `\n <svg width=\"${this.width}\" height=\"${this.height}\">\n ${lineImages\n .map((lineImage) => {\n const [p1, p2, p3, p4] = lineImage.box;\n return `<polygon points=\"${p1[0]},${p1[1]} ${p2[0]},${p2[1]} ${p3[0]},${p3[1]} ${p4[0]},${p4[1]}\" fill=\"none\" stroke=\"red\" />`;\n })\n .join('\\n')}\n </svg>\n `;\n return this.#apply(\n this.#sharp.composite([{ input: Buffer.from(svg), left: 0, top: 0 }]),\n );\n }\n\n async #apply(sharp: sharp.Sharp) {\n this.#sharp = sharp;\n const result = await toImageRaw(sharp);\n this.data = result.data;\n this.width = result.width;\n this.height = result.height;\n return this;\n }\n}\n\nasync function toImageRaw(sharp: sharp.Sharp) {\n const result = await sharp.raw().toBuffer({ resolveWithObject: true });\n return {\n data: result.data,\n width: result.info.width,\n height: result.info.height,\n };\n}\nfunction toSharp(imageRawData: ImageRawData) {\n return sharp(imageRawData.data, {\n raw: {\n width: imageRawData.width,\n height: imageRawData.height,\n channels: 4,\n },\n });\n}\n", "import fs from 'node:fs/promises';\nimport { FileUtilsBase } from '@gutenye/ocr-common';\nimport { path } from '@cyia/vfs2';\nexport class FileUtils extends FileUtilsBase {\n static override async read(filePath: string) {\n return await fs.readFile(path.normalize(filePath), 'utf8');\n }\n}\n", "import * as v from 'valibot';\nimport * as fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { fileTypeFromBuffer } from 'file-type';\nimport sharp from 'sharp';\nimport heicdecode from 'heic-decode';\nimport { decode } from 'bmp-js';\n\nconst BASE64_HEAD_REPLACE_REG = /^data:image\\/[\\w]+;base64,/;\n\nconst InputDefine = v.union([\n v.pipe(\n v.string(),\n v.check((input) => BASE64_HEAD_REPLACE_REG.test(input)),\n v.transform((base64) => {\n const result = base64.match(BASE64_HEAD_REPLACE_REG)!;\n return new Uint8Array(\n Buffer.from(base64.slice(result[0].length), 'base64'),\n );\n }),\n ),\n v.pipe(\n v.string(),\n v.transform((filePath) =>\n fs\n .readFile(path.normalize(filePath))\n .then((buffer) => new Uint8Array(buffer)),\n ),\n ),\n v.pipe(v.custom<Uint8Array>((input) => input instanceof Uint8Array)),\n]);\nexport async function decodeToBuffer(input: string | Uint8Array) {\n const buffer = await v.parse(InputDefine, input);\n return buffer;\n}\n/**\n * ocr处理用\n * 支持路径,base64,uint8array\n */\nexport async function convertToRaw(input: string | Uint8Array) {\n const buffer = await decodeToBuffer(input);\n const type = await fileTypeFromBuffer(buffer);\n if (!type) {\n throw new Error(`不支持的图片类型`);\n }\n if (type.mime === 'image/bmp') {\n const data = decode(Buffer.from(buffer));\n const resolvedBuffer = data.data;\n //ABGR =>RGBA\n for (let i = 0; i < resolvedBuffer.length; i += 4) {\n const alpha = resolvedBuffer[i];\n const blue = resolvedBuffer[i + 1];\n const green = resolvedBuffer[i + 2];\n const red = resolvedBuffer[i + 3];\n resolvedBuffer[i] = red;\n resolvedBuffer[i + 1] = green;\n resolvedBuffer[i + 2] = blue;\n resolvedBuffer[i + 3] = (data as any).is_with_alpha ? alpha : 0xff;\n }\n const result = sharp(resolvedBuffer, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n }).ensureAlpha(1);\n return { type: 'image/png', raw: result };\n } else if (type?.mime === 'image/heic' || type?.mime === 'image/heif') {\n const data = await heicdecode({\n buffer: buffer as any,\n });\n const result = sharp(data.data, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n });\n return { type: 'image/png', raw: result };\n } else {\n const result = sharp(buffer);\n return { type: type.mime, raw: result };\n }\n}\n// todo 未来其实应该直接是Buffer转通道颜色\n/**\n * 转换为兼容的图片格式\n */\nexport async function convertToCompatibleBuffer(input: string | Uint8Array) {\n const result2 = await convertToRaw(input);\n\n return {\n type: result2.type,\n buffer: new Uint8Array(await result2.raw.png().toBuffer()),\n };\n}\n\nexport function bufferToImageBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return `data:${input.type};base64,${Buffer.from(input.buffer).toString('base64')}`;\n}\nexport function bufferToFileBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return Buffer.from(input.buffer).toString('base64');\n}\n", "import sharp from 'sharp';\nimport { getImageMetadata } from './image-metadata';\nexport async function imageExtract(\n buffer: Buffer,\n position: sharp.Region,\n padding: number = 0,\n) {\n let metadata =await getImageMetadata(buffer);\n let left = Math.min(\n Math.max(Math.round(position.left - padding), 0),\n metadata.width,\n );\n let top = Math.min(\n Math.max(Math.round(position.top - padding), 0),\n metadata.height,\n );\n return sharp(buffer)\n .extract({\n left,\n top,\n width: Math.min(\n Math.max(Math.round(position.width + padding * 2), 0),\n metadata.width - left,\n ),\n height: Math.min(\n Math.max(Math.round(position.height + padding * 2), 0),\n metadata.height - top,\n ),\n })\n .toBuffer();\n}\n", "import sharp from 'sharp';\n\nexport function getImageMetadata(buffer: Buffer) {\n let instance = sharp(buffer);\n return instance.metadata();\n}\n", "export const ModelConfig = [\n {\n label: '简体中文',\n key: 'ch_mobile',\n det: 'det/ch_PP-OCRv4_det_mobile.onnx',\n rec: 'rec/ch_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/ch_PP-OCRv4_rec_mobile/ppocr_keys_v1.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '简体中文(服务器)',\n key: 'ch_server',\n det: 'det/ch_PP-OCRv4_det_server.onnx',\n rec: 'rec/ch_PP-OCRv4_rec_server.onnx',\n dict: 'rec/ch_PP-OCRv4_rec_server/ppocr_keys_v1.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '繁體中文',\n key: 'chinese_cht',\n det: 'det/ch_PP-OCRv4_det_server.onnx',\n rec: 'rec/chinese_cht_PP-OCRv3_rec_mobile.onnx',\n dict: 'rec/chinese_cht_PP-OCRv3_rec_mobile/chinese_cht_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '英文',\n key: 'en_mobile',\n det: 'det/en_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/en_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/en_PP-OCRv4_rec_mobile/en_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '阿拉伯文',\n key: 'ar_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/arabic_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/arabic_PP-OCRv4_rec_mobile/arabic_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '塞尔维亚文',\n key: 'cyrillic_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/cyrillic_PP-OCRv3_rec_mobile.onnx',\n dict: 'rec/cyrillic_PP-OCRv3_rec_mobile/cyrillic_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '梵文',\n key: 'devanagari_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/devanagari_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/devanagari_PP-OCRv4_rec_mobile/devanagari_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '日文',\n key: 'japan_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/japan_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/japan_PP-OCRv4_rec_mobile/japan_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '卡纳达语',\n key: 'ka_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/ka_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/ka_PP-OCRv4_rec_mobile/ka_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '韩文',\n key: 'korean_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/korean_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/korean_PP-OCRv4_rec_mobile/korean_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '拉丁文',\n key: 'latin_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/latin_PP-OCRv3_rec_mobile.onnx',\n dict: 'rec/latin_PP-OCRv3_rec_mobile/latin_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '泰米尔文',\n key: 'ta_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/ta_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/ta_PP-OCRv4_rec_mobile/ta_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n {\n label: '泰卢固文',\n key: 'te_mobile',\n det: 'det/multi_PP-OCRv3_det_mobile.onnx',\n rec: 'rec/te_PP-OCRv4_rec_mobile.onnx',\n dict: 'rec/te_PP-OCRv4_rec_mobile/te_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx',\n },\n];\n"],
|
|
5
5
|
"mappings": ";AAAA,OAAO;AAAA,EAEL;AAAA,OACK;AACP,SAAS,2BAA2B;;;ACJpC,OAAO,cAAc;AACrB,SAAS,oBAAoB;AAE7B,OAAO,WAAW;AACX,IAAM,WAAN,MAAM,kBAAiB,aAAa;AAAA,EACzC;AAAA,EAEA,aAAa,KAAKA,OAAiC;AAEjD,WAAO,IAAI,UAAS,MAAM,WAAWA,KAAW,CAAC;AAAA,EACnD;AAAA,EAEA,YAAY,cAA4B;AACtC,UAAM,YAAY;AAClB,SAAK,SAAS,QAAQ,YAAY;AAAA,EACpC;AAAA,EAEA,MAAM,MAAMA,OAAc;AACxB,UAAM,MAAM,SAAS,QAAQA,KAAI,EAAE,MAAM,CAAC;AAC1C,WAAO,KAAK,OAAO,SAAS,GAA6B,EAAE,OAAOA,KAAI;AAAA,EACxE;AAAA,EAEA,MAAM,OAAO,MAAkB;AAC7B,WAAO,KAAK;AAAA,MACV,KAAK,OAAO,OAAO;AAAA,QACjB,OAAO,KAAK;AAAA,QACZ,QAAQ,KAAK;AAAA,QACb,KAAK;AAAA,MACP,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,MAAM,QAAQ,YAAyB;AACrC,UAAM,MAAM;AAAA,oBACI,KAAK,KAAK,aAAa,KAAK,MAAM;AAAA,UAC5C,WACC,IAAI,CAAC,cAAc;AAClB,YAAM,CAAC,IAAI,IAAI,IAAI,EAAE,IAAI,UAAU;AACnC,aAAO,oBAAoB,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC;AAAA,IACjG,CAAC,EACA,KAAK,IAAI,CAAC;AAAA;AAAA;AAGjB,WAAO,KAAK;AAAA,MACV,KAAK,OAAO,UAAU,CAAC,EAAE,OAAO,OAAO,KAAK,GAAG,GAAG,MAAM,GAAG,KAAK,EAAE,CAAC,CAAC;AAAA,IACtE;AAAA,EACF;AAAA,EAEA,MAAM,OAAOC,QAAoB;AAC/B,SAAK,SAASA;AACd,UAAM,SAAS,MAAM,WAAWA,MAAK;AACrC,SAAK,OAAO,OAAO;AACnB,SAAK,QAAQ,OAAO;AACpB,SAAK,SAAS,OAAO;AACrB,WAAO;AAAA,EACT;AACF;AAEA,eAAe,WAAWA,QAAoB;AAC5C,QAAM,SAAS,MAAMA,OAAM,IAAI,EAAE,SAAS,EAAE,mBAAmB,KAAK,CAAC;AACrE,SAAO;AAAA,IACL,MAAM,OAAO;AAAA,IACb,OAAO,OAAO,KAAK;AAAA,IACnB,QAAQ,OAAO,KAAK;AAAA,EACtB;AACF;AACA,SAAS,QAAQ,cAA4B;AAC3C,SAAO,MAAM,aAAa,MAAM;AAAA,IAC9B,KAAK;AAAA,MACH,OAAO,aAAa;AAAA,MACpB,QAAQ,aAAa;AAAA,MACrB,UAAU;AAAA,IACZ;AAAA,EACF,CAAC;AACH;;;AC1EA,OAAO,QAAQ;AACf,SAAS,qBAAqB;AAC9B,SAAS,YAAY;AACd,IAAM,YAAN,cAAwB,cAAc;AAAA,EAC3C,aAAsB,KAAKC,WAAkB;AAC3C,WAAO,MAAM,GAAG,SAAS,KAAK,UAAUA,SAAQ,GAAG,MAAM;AAAA,EAC3D;AACF;;;AFAA,SAAS,wBAAwB;AACjC,OAAOC,SAAQ;AACf,SAAS,QAAAC,aAAY;;;AGTrB,YAAY,OAAO;AACnB,YAAYC,SAAQ;AACpB,SAAS,QAAAC,aAAY;AACrB,SAAS,0BAA0B;AACnC,OAAOC,YAAW;AAClB,OAAO,gBAAgB;AACvB,SAAS,cAAc;AAEvB,IAAM,0BAA0B;AAEhC,IAAM,cAAgB,QAAM;AAAA,EACxB;AAAA,IACE,SAAO;AAAA,IACP,QAAM,CAAC,UAAU,wBAAwB,KAAK,KAAK,CAAC;AAAA,IACpD,YAAU,CAAC,WAAW;AACtB,YAAM,SAAS,OAAO,MAAM,uBAAuB;AACnD,aAAO,IAAI;AAAA,QACT,OAAO,KAAK,OAAO,MAAM,OAAO,CAAC,EAAE,MAAM,GAAG,QAAQ;AAAA,MACtD;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EACE;AAAA,IACE,SAAO;AAAA,IACP;AAAA,MAAU,CAACC,cAER,aAASF,MAAK,UAAUE,SAAQ,CAAC,EACjC,KAAK,CAAC,WAAW,IAAI,WAAW,MAAM,CAAC;AAAA,IAC5C;AAAA,EACF;AAAA,EACE,OAAO,SAAmB,CAAC,UAAU,iBAAiB,UAAU,CAAC;AACrE,CAAC;AACD,eAAsB,eAAe,OAA4B;AAC/D,QAAM,SAAS,MAAQ,QAAM,aAAa,KAAK;AAC/C,SAAO;AACT;AAKA,eAAsB,aAAa,OAA4B;AAC7D,QAAM,SAAS,MAAM,eAAe,KAAK;AACzC,QAAM,OAAO,MAAM,mBAAmB,MAAM;AAC5C,MAAI,CAAC,MAAM;AACT,UAAM,IAAI,MAAM,UAAU;AAAA,EAC5B;AACA,MAAI,KAAK,SAAS,aAAa;AAC7B,UAAM,OAAO,OAAO,OAAO,KAAK,MAAM,CAAC;AACvC,UAAM,iBAAiB,KAAK;AAE5B,aAAS,IAAI,GAAG,IAAI,eAAe,QAAQ,KAAK,GAAG;AACjD,YAAM,QAAQ,eAAe,CAAC;AAC9B,YAAM,OAAO,eAAe,IAAI,CAAC;AACjC,YAAM,QAAQ,eAAe,IAAI,CAAC;AAClC,YAAM,MAAM,eAAe,IAAI,CAAC;AAChC,qBAAe,CAAC,IAAI;AACpB,qBAAe,IAAI,CAAC,IAAI;AACxB,qBAAe,IAAI,CAAC,IAAI;AACxB,qBAAe,IAAI,CAAC,IAAK,KAAa,gBAAgB,QAAQ;AAAA,IAChE;AACA,UAAM,SAASD,OAAM,gBAAgB;AAAA,MACnC,KAAK;AAAA,QACH,OAAO,KAAK;AAAA,QACZ,QAAQ,KAAK;AAAA,QACb,UAAU;AAAA,MACZ;AAAA,IACF,CAAC,EAAE,YAAY,CAAC;AAChB,WAAO,EAAE,MAAM,aAAa,KAAK,OAAO;AAAA,EAC1C,WAAW,MAAM,SAAS,gBAAgB,MAAM,SAAS,cAAc;AACrE,UAAM,OAAO,MAAM,WAAW;AAAA,MAC5B;AAAA,IACF,CAAC;AACD,UAAM,SAASA,OAAM,KAAK,MAAM;AAAA,MAC9B,KAAK;AAAA,QACH,OAAO,KAAK;AAAA,QACZ,QAAQ,KAAK;AAAA,QACb,UAAU;AAAA,MACZ;AAAA,IACF,CAAC;AACD,WAAO,EAAE,MAAM,aAAa,KAAK,OAAO;AAAA,EAC1C,OAAO;AACL,UAAM,SAASA,OAAM,MAAM;AAC3B,WAAO,EAAE,MAAM,KAAK,MAAM,KAAK,OAAO;AAAA,EACxC;AACF;;;ACnFA,OAAOE,YAAW;;;ACAlB,OAAOC,YAAW;;;ALWlB,YAAYC,QAAO;AACnB,IAAM,oBAAsB,UAAO;AAAA,EACjC,SAAW;AAAA,IACP;AAAA,MACE,SAAM;AAAA,QACJ;AAAA,UACE,UAAO;AAAA,UACP,aAAU,CAAC,UAAU;AAAA,YACrB,KAAK;AAAA,YACL,MAAM;AAAA,YACN,OAAO;AAAA,YACP,QAAQ;AAAA,UACV,EAAE;AAAA,QACJ;AAAA,QACE,UAAO;AAAA,UACP,MAAQ,UAAO;AAAA,UACf,OAAS,UAAO;AAAA,UAChB,KAAO,UAAO;AAAA,UACd,QAAU,UAAO;AAAA,QACnB,CAAC;AAAA,MACH,CAAC;AAAA,MACD,EAAE,KAAK,IAAI,OAAO,IAAI,MAAM,IAAI,QAAQ,GAAG;AAAA,IAC7C;AAAA,EACF;AAAA,EACA,YAAc,YAAW,SAAM,CAAG,QAAO,UAAO,CAAC,CAAC,CAAC,GAAG,IAAI;AAAA;AAE5D,CAAC;AAED,gBAAgB;AAAA,EACd;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AACjB,CAAC;AAED,eAAe,QAEb,OACA,UAA2B,CAAC,GAC5B;AACA,QAAM,iBAAmB,SAAM,mBAAmB,OAAO;AAGzD,MAAI,EAAE,IAAI,IAAI,MAAM,aAAa,KAAK;AACtC,QAAM,WAAW,MAAM,IAAI,SAAS;AACpC,QAAM,UAAU,KAAK,IAAI,SAAS,OAAQ,SAAS,MAAO;AAC1D,MAAI,UAAU,eAAe,YAAY;AACvC,QAAI,QAAQ,SAAS,QAAS,SAAS;AACvC,YAAQ,QAAQ,IAAI,IAAI,QAAQ;AAChC,UAAM,IAAI,OAAO;AAAA,MACf,OAAO,KAAK;AAAA,QACV,YAAY,SAAS,QACjB,eAAe,aACf,QAAQ,eAAe;AAAA,MAC7B;AAAA,MACA,QAAQ,KAAK;AAAA,QACX,YAAY,SAAS,SACjB,eAAe,aACf,QAAQ,eAAe;AAAA,MAC7B;AAAA,IACF,CAAC;AAAA,EACH;AACA,QAAM,IAAI,OAAO,EAAE,GAAG,eAAe,SAAS,YAAY,OAAO,CAAC;AAClE,QAAM,IAAI,YAAY,CAAC;AACvB,SAAO,KAAK,OAAO,GAAU;AAC/B;AACO,IAAM,MAAN,cAAkB,QAAQ;AAAA,EAC/B,aAAsB,OAAO,UAA8B,CAAC,GAAG;AAC7D,UAAM,MAAM,MAAM,QAAQ,OAAO,OAAO;AACxC,QAAI,QAAQ,gBAAgB;AAC1B,YAAMC,IAAG,MAAMC,MAAK,UAAU,QAAQ,cAAc,GAAG;AAAA,QACrD,WAAW;AAAA,MACb,CAAC;AAAA,IACH;AACA,IAAC,IAAY,UAAU,QAAQ,KAAK,GAAG;AACvC,WAAO;AAAA,EACT;AACF;;;AMzFO,IAAM,cAAc;AAAA,EACzB;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AACF;",
|
|
6
6
|
"names": ["path", "sharp", "filePath", "fs", "path", "fs", "path", "sharp", "filePath", "sharp", "sharp", "v", "fs", "path"]
|
|
7
7
|
}
|
package/package.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@shenghuabi/knowledge",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.23",
|
|
4
4
|
"description": "知识库",
|
|
5
5
|
"author": "wszgrcy",
|
|
6
6
|
"sideEffects": false,
|
|
7
7
|
"peerDependencies": {
|
|
8
|
-
"@cyia/vfs2": "^1.
|
|
8
|
+
"@cyia/vfs2": "^1.5.2",
|
|
9
9
|
"handlebars": "^4.7.8",
|
|
10
10
|
"lru-cache": "^11.2.1",
|
|
11
11
|
"rfdc": "^1.4.1",
|
|
@@ -19,8 +19,8 @@
|
|
|
19
19
|
"html-to-text": "^9.0.5",
|
|
20
20
|
"fastq": "^1.19.1",
|
|
21
21
|
"sharp": "0.34.2",
|
|
22
|
-
"@cyia/dl": "^1.
|
|
23
|
-
"@cyia/external-call": "^1.
|
|
22
|
+
"@cyia/dl": "^1.5.2",
|
|
23
|
+
"@cyia/external-call": "^1.5.2"
|
|
24
24
|
},
|
|
25
25
|
"dependencies": {
|
|
26
26
|
"html-entities": "^2.6.0",
|
package/worker/ocr/index.d.ts
CHANGED
package/worker/ocr.mjs
CHANGED
|
@@ -9,7 +9,7 @@ var BaseUrl = "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/
|
|
|
9
9
|
var DictUrl = "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/paddle/PP-OCRv4";
|
|
10
10
|
async function init(ocrConfig) {
|
|
11
11
|
const messageCb = (message) => {
|
|
12
|
-
ocrConfig.port
|
|
12
|
+
ocrConfig.port?.postMessage({ type: "progress", message });
|
|
13
13
|
};
|
|
14
14
|
const modelConfig = ModelConfig.find((item) => item.key === ocrConfig.key);
|
|
15
15
|
const fs = createNormalizeVfs({ dir: ocrConfig.modelDir });
|
package/worker/ocr.mjs.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../packages/worker/ocr/index.ts"],
|
|
4
|
-
"sourcesContent": ["import { ImageAdjustType, Ocr, ModelConfig } from '@shenghuabi/knowledge/ocr';\nimport { createNormalizeVfs, path } from '@cyia/vfs2';\n// import * as ort from 'onnxruntime-node';\nimport { MessagePort } from 'worker_threads';\nimport { getUniqueObjectKey } from '@shenghuabi/knowledge/util';\nimport { downloadFile } from '@cyia/dl';\nlet key!: string;\nlet ocrInstance: ReturnType<(typeof Ocr)['create']>;\nconst BaseUrl =\n 'https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/onnx/PP-OCRv4';\n\nconst DictUrl =\n 'https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/paddle/PP-OCRv4';\nasync function init(ocrConfig: {\n key: string;\n modelDir: string;\n port
|
|
5
|
-
"mappings": ";AAAA,SAA0B,KAAK,mBAAmB;AAClD,SAAS,oBAAoB,YAAY;AAGzC,SAAS,0BAA0B;AACnC,SAAS,oBAAoB;AAC7B,IAAI;AACJ,IAAI;AACJ,IAAM,UACJ;AAEF,IAAM,UACJ;AACF,eAAe,KAAK,WAIjB;AACD,QAAM,YAAY,CAAC,YAAiB;AAClC,cAAU,
|
|
4
|
+
"sourcesContent": ["import { ImageAdjustType, Ocr, ModelConfig } from '@shenghuabi/knowledge/ocr';\nimport { createNormalizeVfs, path } from '@cyia/vfs2';\n// import * as ort from 'onnxruntime-node';\nimport { MessagePort } from 'worker_threads';\nimport { getUniqueObjectKey } from '@shenghuabi/knowledge/util';\nimport { downloadFile } from '@cyia/dl';\nlet key!: string;\nlet ocrInstance: ReturnType<(typeof Ocr)['create']>;\nconst BaseUrl =\n 'https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/onnx/PP-OCRv4';\n\nconst DictUrl =\n 'https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/master/paddle/PP-OCRv4';\nasync function init(ocrConfig: {\n key: string;\n modelDir: string;\n port?: MessagePort;\n}) {\n const messageCb = (message: any) => {\n ocrConfig.port?.postMessage({ type: 'progress', message });\n };\n const modelConfig = ModelConfig.find((item) => item.key === ocrConfig.key)!;\n const fs = createNormalizeVfs({ dir: ocrConfig.modelDir });\n // 自动下载模型\n const absDetectionPath = path.join(ocrConfig.modelDir, modelConfig.det);\n if (!(await fs.exists(modelConfig.det))) {\n await downloadFile(`${BaseUrl}/${modelConfig.det}`, {\n savePath: absDetectionPath,\n message: messageCb,\n });\n }\n const absRecognitionPath = path.join(ocrConfig.modelDir, modelConfig.rec);\n if (!(await fs.exists(modelConfig.rec))) {\n await downloadFile(`${BaseUrl}/${modelConfig.rec}`, {\n savePath: absRecognitionPath,\n message: messageCb,\n });\n }\n const absDictionaryPath = path.join(ocrConfig.modelDir, modelConfig.dict);\n if (!(await fs.exists(modelConfig.dict))) {\n await downloadFile(`${DictUrl}/${modelConfig.dict}`, {\n savePath: absDictionaryPath,\n message: messageCb,\n });\n }\n}\n// 改为init和convert\nasync function convert(input: {\n filePath: string | Uint8Array;\n ocrConfig: { key: string; modelDir: string; device?: 'dml' | 'cuda' | 'cpu' };\n options?: ImageAdjustType;\n}) {\n const inputKey = getUniqueObjectKey(input.ocrConfig);\n if (key !== inputKey) {\n const modelConfig = ModelConfig.find(\n (item) => item.key === input.ocrConfig.key,\n )!;\n // 自动下载模型\n const absDetectionPath = path.join(\n input.ocrConfig.modelDir,\n modelConfig.det,\n );\n const absRecognitionPath = path.join(\n input.ocrConfig.modelDir,\n modelConfig.rec,\n );\n const absDictionaryPath = path.join(\n input.ocrConfig.modelDir,\n modelConfig.dict,\n );\n ocrInstance = Ocr.create({\n onnxOptions: {\n executionProviders: input.ocrConfig.device\n ? [input.ocrConfig.device]\n : ['dml', 'cuda', 'cpu'],\n executionMode: 'parallel',\n },\n models: {\n detectionPath: absDetectionPath,\n recognitionPath: absRecognitionPath,\n dictionaryPath: absDictionaryPath,\n },\n });\n key = inputKey;\n }\n return (await ocrInstance).convert(input.filePath, input.options);\n}\n\nexport { init, convert };\n"],
|
|
5
|
+
"mappings": ";AAAA,SAA0B,KAAK,mBAAmB;AAClD,SAAS,oBAAoB,YAAY;AAGzC,SAAS,0BAA0B;AACnC,SAAS,oBAAoB;AAC7B,IAAI;AACJ,IAAI;AACJ,IAAM,UACJ;AAEF,IAAM,UACJ;AACF,eAAe,KAAK,WAIjB;AACD,QAAM,YAAY,CAAC,YAAiB;AAClC,cAAU,MAAM,YAAY,EAAE,MAAM,YAAY,QAAQ,CAAC;AAAA,EAC3D;AACA,QAAM,cAAc,YAAY,KAAK,CAAC,SAAS,KAAK,QAAQ,UAAU,GAAG;AACzE,QAAM,KAAK,mBAAmB,EAAE,KAAK,UAAU,SAAS,CAAC;AAEzD,QAAM,mBAAmB,KAAK,KAAK,UAAU,UAAU,YAAY,GAAG;AACtE,MAAI,CAAE,MAAM,GAAG,OAAO,YAAY,GAAG,GAAI;AACvC,UAAM,aAAa,GAAG,OAAO,IAAI,YAAY,GAAG,IAAI;AAAA,MAClD,UAAU;AAAA,MACV,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACA,QAAM,qBAAqB,KAAK,KAAK,UAAU,UAAU,YAAY,GAAG;AACxE,MAAI,CAAE,MAAM,GAAG,OAAO,YAAY,GAAG,GAAI;AACvC,UAAM,aAAa,GAAG,OAAO,IAAI,YAAY,GAAG,IAAI;AAAA,MAClD,UAAU;AAAA,MACV,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACA,QAAM,oBAAoB,KAAK,KAAK,UAAU,UAAU,YAAY,IAAI;AACxE,MAAI,CAAE,MAAM,GAAG,OAAO,YAAY,IAAI,GAAI;AACxC,UAAM,aAAa,GAAG,OAAO,IAAI,YAAY,IAAI,IAAI;AAAA,MACnD,UAAU;AAAA,MACV,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACF;AAEA,eAAe,QAAQ,OAIpB;AACD,QAAM,WAAW,mBAAmB,MAAM,SAAS;AACnD,MAAI,QAAQ,UAAU;AACpB,UAAM,cAAc,YAAY;AAAA,MAC9B,CAAC,SAAS,KAAK,QAAQ,MAAM,UAAU;AAAA,IACzC;AAEA,UAAM,mBAAmB,KAAK;AAAA,MAC5B,MAAM,UAAU;AAAA,MAChB,YAAY;AAAA,IACd;AACA,UAAM,qBAAqB,KAAK;AAAA,MAC9B,MAAM,UAAU;AAAA,MAChB,YAAY;AAAA,IACd;AACA,UAAM,oBAAoB,KAAK;AAAA,MAC7B,MAAM,UAAU;AAAA,MAChB,YAAY;AAAA,IACd;AACA,kBAAc,IAAI,OAAO;AAAA,MACvB,aAAa;AAAA,QACX,oBAAoB,MAAM,UAAU,SAChC,CAAC,MAAM,UAAU,MAAM,IACvB,CAAC,OAAO,QAAQ,KAAK;AAAA,QACzB,eAAe;AAAA,MACjB;AAAA,MACA,QAAQ;AAAA,QACN,eAAe;AAAA,QACf,iBAAiB;AAAA,QACjB,gBAAgB;AAAA,MAClB;AAAA,IACF,CAAC;AACD,UAAM;AAAA,EACR;AACA,UAAQ,MAAM,aAAa,QAAQ,MAAM,UAAU,MAAM,OAAO;AAClE;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|