@shenghuabi/knowledge 1.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/embedding/embedding.service.d.ts +4 -0
  2. package/embedding/index.d.ts +2 -0
  3. package/embedding/type.d.ts +12 -0
  4. package/embedding.mjs +61 -0
  5. package/embedding.mjs.map +7 -0
  6. package/file-parser/const.d.ts +20 -0
  7. package/file-parser/dict/dict-format/dsl/dsl-parse.service.d.ts +6 -0
  8. package/file-parser/dict/dict-format/dsl/dsl.format.d.ts +1 -0
  9. package/file-parser/dict/dict-format/mdict-parse.service.d.ts +20 -0
  10. package/file-parser/dict/dict-format/stardict-parse.service.d.ts +12 -0
  11. package/file-parser/dict/dict-format/yaml-parse.service.d.ts +25 -0
  12. package/file-parser/dict/dict.service.d.ts +10 -0
  13. package/file-parser/dict/index.d.ts +2 -0
  14. package/file-parser/dict/type.d.ts +24 -0
  15. package/file-parser/document-file-parser.service.d.ts +15 -0
  16. package/file-parser/document-loader/pdf-img.loader.d.ts +8 -0
  17. package/file-parser/document-loader/xlsx.loader.d.ts +6 -0
  18. package/file-parser/file-parser.service.d.ts +13 -0
  19. package/file-parser/index.d.ts +6 -0
  20. package/file-parser/text-analyse.d.ts +1 -0
  21. package/file-parser/text-parser.d.ts +3 -0
  22. package/file-parser/vl-parser/markdown.parser.d.ts +8 -0
  23. package/file-parser.mjs +850 -0
  24. package/file-parser.mjs.map +7 -0
  25. package/image/convert.d.ts +25 -0
  26. package/image/extract.d.ts +2 -0
  27. package/image/image-metadata.d.ts +2 -0
  28. package/image/index.d.ts +3 -0
  29. package/image.mjs +134 -0
  30. package/image.mjs.map +7 -0
  31. package/knowledge/article/article.knowledge.service.d.ts +53 -0
  32. package/knowledge/article/define/config.d.ts +60 -0
  33. package/knowledge/article/define/index.d.ts +2 -0
  34. package/knowledge/article/define/payload.d.ts +16 -0
  35. package/knowledge/article/index.d.ts +2 -0
  36. package/knowledge/common/common.knowledge.service.d.ts +240 -0
  37. package/knowledge/common/define/base.d.ts +7 -0
  38. package/knowledge/common/define/chunk.d.ts +14 -0
  39. package/knowledge/common/define/embedding.d.ts +5 -0
  40. package/knowledge/common/define/index.d.ts +3 -0
  41. package/knowledge/common/index.d.ts +1 -0
  42. package/knowledge/common/query.d.ts +7 -0
  43. package/knowledge/const.d.ts +95 -0
  44. package/knowledge/define/index.d.ts +245 -0
  45. package/knowledge/dict/define/config.d.ts +68 -0
  46. package/knowledge/dict/define/index.d.ts +1 -0
  47. package/knowledge/dict/dict.knowledge.service.d.ts +67 -0
  48. package/knowledge/graph/const.d.ts +20 -0
  49. package/knowledge/graph/define/config.d.ts +169 -0
  50. package/knowledge/graph/define/define.d.ts +402 -0
  51. package/knowledge/graph/define/index.d.ts +2 -0
  52. package/knowledge/graph/define/query.d.ts +14 -0
  53. package/knowledge/graph/graph.handle.service.d.ts +28 -0
  54. package/knowledge/graph/graph.knowledge.service.d.ts +40 -0
  55. package/knowledge/graph/graph.local.service.d.ts +85 -0
  56. package/knowledge/graph/graph.query.service.d.ts +160 -0
  57. package/knowledge/graph/graph.service.d.ts +24 -0
  58. package/knowledge/graph/graph.util.service.d.ts +31 -0
  59. package/knowledge/graph/type.d.ts +11 -0
  60. package/knowledge/graph/util/format-attr.d.ts +48 -0
  61. package/knowledge/graph/util/graph-util.d.ts +5 -0
  62. package/knowledge/graph/util.d.ts +1 -0
  63. package/knowledge/graph/vecotr-format.d.ts +11 -0
  64. package/knowledge/index.d.ts +17 -0
  65. package/knowledge/knowledge.manager.service.d.ts +42 -0
  66. package/knowledge/knowledge.util.service.d.ts +21 -0
  67. package/knowledge/normal/define/config.d.ts +60 -0
  68. package/knowledge/normal/define/index.d.ts +1 -0
  69. package/knowledge/normal/normal.knowledge.service.d.ts +49 -0
  70. package/knowledge/template.format.d.ts +6 -0
  71. package/knowledge/type.d.ts +28 -0
  72. package/knowledge.mjs +2856 -0
  73. package/knowledge.mjs.map +7 -0
  74. package/ocr/FileUtils.d.ts +4 -0
  75. package/ocr/ImageRaw.d.ts +11 -0
  76. package/ocr/index.d.ts +2 -0
  77. package/ocr/model-config.d.ts +8 -0
  78. package/ocr/ocr.d.ts +29 -0
  79. package/ocr.mjs +351 -0
  80. package/ocr.mjs.map +7 -0
  81. package/package.json +105 -0
  82. package/qdrant/index.d.ts +3 -0
  83. package/qdrant/qdrant-client.service.d.ts +396 -0
  84. package/qdrant/qdrant-server.service.d.ts +21 -0
  85. package/qdrant/type.d.ts +18 -0
  86. package/qdrant/util.d.ts +1 -0
  87. package/qdrant.mjs +274 -0
  88. package/qdrant.mjs.map +7 -0
  89. package/util/batch-queue.d.ts +6 -0
  90. package/util/cache-queue.d.ts +10 -0
  91. package/util/clone.d.ts +1 -0
  92. package/util/embedding-queue.d.ts +3 -0
  93. package/util/get-hash.d.ts +2 -0
  94. package/util/html-to-text/index.d.ts +5 -0
  95. package/util/index.d.ts +10 -0
  96. package/util/is-truthy.d.ts +1 -0
  97. package/util/log.service.d.ts +6 -0
  98. package/util/promise.d.ts +5 -0
  99. package/util/type.d.ts +1 -0
  100. package/util/uniq-object-key.d.ts +1 -0
  101. package/util.mjs +219 -0
  102. package/util.mjs.map +7 -0
  103. package/worker/custom-cache.d.ts +28 -0
  104. package/worker/ocr/index.d.ts +17 -0
  105. package/worker/ocr.mjs +75 -0
  106. package/worker/ocr.mjs.map +7 -0
  107. package/worker/reranker.mjs +180 -0
  108. package/worker/reranker.mjs.map +7 -0
  109. package/worker/set-transformers-config.d.ts +19 -0
  110. package/worker/text2vec/index.d.ts +9 -0
  111. package/worker/text2vec.mjs +194 -0
  112. package/worker/text2vec.mjs.map +7 -0
package/ocr.mjs ADDED
@@ -0,0 +1,351 @@
1
+ // packages/ocr/ocr.ts
2
+ import BaseOcr, {
3
+ registerBackend
4
+ } from "@gutenye/ocr-common";
5
+ import { splitIntoLineImages } from "@gutenye/ocr-common/splitIntoLineImages";
6
+
7
+ // packages/ocr/ImageRaw.ts
8
+ import filePath from "node:path";
9
+ import { ImageRawBase } from "@gutenye/ocr-common";
10
+ import sharp from "sharp";
11
+ var ImageRaw = class _ImageRaw extends ImageRawBase {
12
+ #sharp;
13
+ static async open(path4) {
14
+ return new _ImageRaw(await toImageRaw(path4));
15
+ }
16
+ constructor(imageRawData) {
17
+ super(imageRawData);
18
+ this.#sharp = toSharp(imageRawData);
19
+ }
20
+ async write(path4) {
21
+ const ext = filePath.extname(path4).slice(1);
22
+ return this.#sharp.toFormat(ext).toFile(path4);
23
+ }
24
+ async resize(size) {
25
+ return this.#apply(
26
+ this.#sharp.resize({
27
+ width: size.width,
28
+ height: size.height,
29
+ fit: "contain"
30
+ })
31
+ );
32
+ }
33
+ async drawBox(lineImages) {
34
+ const svg = `
35
+ <svg width="${this.width}" height="${this.height}">
36
+ ${lineImages.map((lineImage) => {
37
+ const [p1, p2, p3, p4] = lineImage.box;
38
+ return `<polygon points="${p1[0]},${p1[1]} ${p2[0]},${p2[1]} ${p3[0]},${p3[1]} ${p4[0]},${p4[1]}" fill="none" stroke="red" />`;
39
+ }).join("\n")}
40
+ </svg>
41
+ `;
42
+ return this.#apply(
43
+ this.#sharp.composite([{ input: Buffer.from(svg), left: 0, top: 0 }])
44
+ );
45
+ }
46
+ async #apply(sharp5) {
47
+ this.#sharp = sharp5;
48
+ const result = await toImageRaw(sharp5);
49
+ this.data = result.data;
50
+ this.width = result.width;
51
+ this.height = result.height;
52
+ return this;
53
+ }
54
+ };
55
+ async function toImageRaw(sharp5) {
56
+ const result = await sharp5.raw().toBuffer({ resolveWithObject: true });
57
+ return {
58
+ data: result.data,
59
+ width: result.info.width,
60
+ height: result.info.height
61
+ };
62
+ }
63
+ function toSharp(imageRawData) {
64
+ return sharp(imageRawData.data, {
65
+ raw: {
66
+ width: imageRawData.width,
67
+ height: imageRawData.height,
68
+ channels: 4
69
+ }
70
+ });
71
+ }
72
+
73
+ // packages/ocr/FileUtils.ts
74
+ import fs from "node:fs/promises";
75
+ import { FileUtilsBase } from "@gutenye/ocr-common";
76
+ import { path } from "@cyia/vfs2";
77
+ var FileUtils = class extends FileUtilsBase {
78
+ static async read(filePath2) {
79
+ return await fs.readFile(path.normalize(filePath2), "utf8");
80
+ }
81
+ };
82
+
83
+ // packages/ocr/ocr.ts
84
+ import { InferenceSession } from "onnxruntime-node";
85
+ import fs3 from "fs/promises";
86
+ import { path as path3 } from "@cyia/vfs2";
87
+
88
+ // packages/image/convert.ts
89
+ import * as v from "valibot";
90
+ import * as fs2 from "fs/promises";
91
+ import { path as path2 } from "@cyia/vfs2";
92
+ import { fileTypeFromBuffer } from "file-type";
93
+ import sharp2 from "sharp";
94
+ import heicdecode from "heic-decode";
95
+ import { decode } from "bmp-js";
96
+ var BASE64_HEAD_REPLACE_REG = /^data:image\/[\w]+;base64,/;
97
+ var InputDefine = v.union([
98
+ v.pipe(
99
+ v.string(),
100
+ v.check((input) => BASE64_HEAD_REPLACE_REG.test(input)),
101
+ v.transform((base64) => {
102
+ const result = base64.match(BASE64_HEAD_REPLACE_REG);
103
+ return new Uint8Array(
104
+ Buffer.from(base64.slice(result[0].length), "base64")
105
+ );
106
+ })
107
+ ),
108
+ v.pipe(
109
+ v.string(),
110
+ v.transform(
111
+ (filePath2) => fs2.readFile(path2.normalize(filePath2)).then((buffer) => new Uint8Array(buffer))
112
+ )
113
+ ),
114
+ v.pipe(v.custom((input) => input instanceof Uint8Array))
115
+ ]);
116
+ async function decodeToBuffer(input) {
117
+ const buffer = await v.parse(InputDefine, input);
118
+ return buffer;
119
+ }
120
+ async function convertToRaw(input) {
121
+ const buffer = await decodeToBuffer(input);
122
+ const type = await fileTypeFromBuffer(buffer);
123
+ if (!type) {
124
+ throw new Error(`不支持的图片类型`);
125
+ }
126
+ if (type.mime === "image/bmp") {
127
+ const data = decode(Buffer.from(buffer));
128
+ const resolvedBuffer = data.data;
129
+ for (let i = 0; i < resolvedBuffer.length; i += 4) {
130
+ const alpha = resolvedBuffer[i];
131
+ const blue = resolvedBuffer[i + 1];
132
+ const green = resolvedBuffer[i + 2];
133
+ const red = resolvedBuffer[i + 3];
134
+ resolvedBuffer[i] = red;
135
+ resolvedBuffer[i + 1] = green;
136
+ resolvedBuffer[i + 2] = blue;
137
+ resolvedBuffer[i + 3] = data.is_with_alpha ? alpha : 255;
138
+ }
139
+ const result = sharp2(resolvedBuffer, {
140
+ raw: {
141
+ width: data.width,
142
+ height: data.height,
143
+ channels: 4
144
+ }
145
+ }).ensureAlpha(1);
146
+ return { type: "image/png", raw: result };
147
+ } else if (type?.mime === "image/heic" || type?.mime === "image/heif") {
148
+ const data = await heicdecode({
149
+ buffer
150
+ });
151
+ const result = sharp2(data.data, {
152
+ raw: {
153
+ width: data.width,
154
+ height: data.height,
155
+ channels: 4
156
+ }
157
+ });
158
+ return { type: "image/png", raw: result };
159
+ } else {
160
+ const result = sharp2(buffer);
161
+ return { type: type.mime, raw: result };
162
+ }
163
+ }
164
+
165
+ // packages/image/extract.ts
166
+ import sharp4 from "sharp";
167
+
168
+ // packages/image/image-metadata.ts
169
+ import sharp3 from "sharp";
170
+
171
+ // packages/ocr/ocr.ts
172
+ import * as v2 from "valibot";
173
+ var ImageAdjustDefine = v2.object({
174
+ padding: v2.pipe(
175
+ v2.optional(
176
+ v2.union([
177
+ v2.pipe(
178
+ v2.number(),
179
+ v2.transform((item) => ({
180
+ top: item,
181
+ left: item,
182
+ right: item,
183
+ bottom: item
184
+ }))
185
+ ),
186
+ v2.object({
187
+ left: v2.number(),
188
+ right: v2.number(),
189
+ top: v2.number(),
190
+ bottom: v2.number()
191
+ })
192
+ ]),
193
+ { top: 50, right: 50, left: 50, bottom: 50 }
194
+ )
195
+ ),
196
+ maxSideLen: v2.optional(v2.union([v2.pipe(v2.number())]), 1920)
197
+ // threshold: v.optional(v.union([v.pipe(v.number())]), 0.3),
198
+ });
199
+ registerBackend({
200
+ FileUtils,
201
+ ImageRaw,
202
+ InferenceSession,
203
+ splitIntoLineImages,
204
+ defaultModels: void 0
205
+ });
206
+ async function convert(input, options = {}) {
207
+ const resolveOptions = v2.parse(ImageAdjustDefine, options);
208
+ let { raw } = await convertToRaw(input);
209
+ const metadata = await raw.metadata();
210
+ const maxSize = Math.max(metadata.width, metadata.height);
211
+ if (maxSize > resolveOptions.maxSideLen) {
212
+ let ratio = metadata.width / metadata.height;
213
+ ratio = ratio > 1 ? 1 / ratio : ratio;
214
+ raw = raw.resize({
215
+ width: Math.round(
216
+ maxSize === metadata.width ? resolveOptions.maxSideLen : ratio * resolveOptions.maxSideLen
217
+ ),
218
+ height: Math.round(
219
+ maxSize === metadata.height ? resolveOptions.maxSideLen : ratio * resolveOptions.maxSideLen
220
+ )
221
+ });
222
+ }
223
+ raw = raw.extend({ ...resolveOptions.padding, background: "#fff" });
224
+ raw = raw.ensureAlpha(1);
225
+ return this.detect(raw);
226
+ }
227
+ var Ocr = class extends BaseOcr {
228
+ static async create(options = {}) {
229
+ const ocr = await BaseOcr.create(options);
230
+ if (options.debugOutputDir) {
231
+ await fs3.mkdir(path3.normalize(options.debugOutputDir), {
232
+ recursive: true
233
+ });
234
+ }
235
+ ocr.convert = convert.bind(ocr);
236
+ return ocr;
237
+ }
238
+ };
239
+
240
+ // packages/ocr/model-config.ts
241
+ var ModelConfig = [
242
+ {
243
+ label: "简体中文",
244
+ key: "ch_mobile",
245
+ det: "det/ch_PP-OCRv4_det_infer.onnx",
246
+ rec: "rec/ch_PP-OCRv4_rec_infer.onnx",
247
+ dict: "rec/ch_PP-OCRv4_rec_infer/ppocr_keys_v1.txt",
248
+ cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
249
+ },
250
+ {
251
+ label: "简体中文(服务器)",
252
+ key: "ch_server",
253
+ det: "det/ch_PP-OCRv4_det_server_infer.onnx",
254
+ rec: "rec/ch_PP-OCRv4_rec_server_infer.onnx",
255
+ dict: "rec/ch_PP-OCRv4_rec_server_infer/ppocr_keys_v1.txt",
256
+ cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
257
+ },
258
+ {
259
+ label: "繁體中文",
260
+ key: "chinese_cht",
261
+ det: "det/ch_PP-OCRv4_det_infer.onnx",
262
+ rec: "rec/chinese_cht_PP-OCRv3_rec_infer.onnx",
263
+ dict: "rec/chinese_cht_PP-OCRv3_rec_infer/chinese_cht_dict.txt",
264
+ cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
265
+ },
266
+ {
267
+ label: "英文",
268
+ key: "en_mobile",
269
+ det: "det/en_PP-OCRv3_det_infer.onnx",
270
+ rec: "rec/en_PP-OCRv4_rec_infer.onnx",
271
+ dict: "rec/en_PP-OCRv4_rec_infer/en_dict.txt",
272
+ cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
273
+ },
274
+ {
275
+ label: "阿拉伯文",
276
+ key: "ar_mobile",
277
+ det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
278
+ rec: "rec/arabic_PP-OCRv4_rec_infer.onnx",
279
+ dict: "rec/arabic_PP-OCRv4_rec_infer/arabic_dict.txt",
280
+ cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
281
+ },
282
+ {
283
+ label: "塞尔维亚文",
284
+ key: "cyrillic_mobile",
285
+ det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
286
+ rec: "rec/cyrillic_PP-OCRv3_rec_infer.onnx",
287
+ dict: "rec/cyrillic_PP-OCRv3_rec_infer/cyrillic_dict.txt",
288
+ cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
289
+ },
290
+ {
291
+ label: "梵文",
292
+ key: "devanagari_mobile",
293
+ det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
294
+ rec: "rec/devanagari_PP-OCRv4_rec_infer.onnx",
295
+ dict: "rec/devanagari_PP-OCRv4_rec_infer/devanagari_dict.txt",
296
+ cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
297
+ },
298
+ {
299
+ label: "日文",
300
+ key: "japan_mobile",
301
+ det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
302
+ rec: "rec/japan_PP-OCRv4_rec_infer.onnx",
303
+ dict: "rec/japan_PP-OCRv4_rec_infer/japan_dict.txt",
304
+ cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
305
+ },
306
+ {
307
+ label: "卡纳达语",
308
+ key: "ka_mobile",
309
+ det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
310
+ rec: "rec/ka_PP-OCRv4_rec_infer.onnx",
311
+ dict: "rec/ka_PP-OCRv4_rec_infer/ka_dict.txt",
312
+ cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
313
+ },
314
+ {
315
+ label: "韩文",
316
+ key: "korean_mobile",
317
+ det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
318
+ rec: "rec/korean_PP-OCRv4_rec_infer.onnx",
319
+ dict: "rec/korean_PP-OCRv4_rec_infer/korean_dict.txt",
320
+ cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
321
+ },
322
+ {
323
+ label: "拉丁文",
324
+ key: "latin_mobile",
325
+ det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
326
+ rec: "rec/latin_PP-OCRv3_rec_infer.onnx",
327
+ dict: "rec/latin_PP-OCRv3_rec_infer/latin_dict.txt",
328
+ cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
329
+ },
330
+ {
331
+ label: "泰米尔文",
332
+ key: "ta_mobile",
333
+ det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
334
+ rec: "rec/ta_PP-OCRv4_rec_infer.onnx",
335
+ dict: "rec/ta_PP-OCRv4_rec_infer/ta_dict.txt",
336
+ cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
337
+ },
338
+ {
339
+ label: "泰卢固文",
340
+ key: "te_mobile",
341
+ det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
342
+ rec: "rec/te_PP-OCRv4_rec_infer.onnx",
343
+ dict: "rec/te_PP-OCRv4_rec_infer/te_dict.txt",
344
+ cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
345
+ }
346
+ ];
347
+ export {
348
+ ModelConfig,
349
+ Ocr
350
+ };
351
+ //# sourceMappingURL=ocr.mjs.map
package/ocr.mjs.map ADDED
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["../packages/ocr/ocr.ts", "../packages/ocr/ImageRaw.ts", "../packages/ocr/FileUtils.ts", "../packages/image/convert.ts", "../packages/image/extract.ts", "../packages/image/image-metadata.ts", "../packages/ocr/model-config.ts"],
4
+ "sourcesContent": ["import BaseOcr, {\n ModelCreateOptions,\n registerBackend,\n} from '@gutenye/ocr-common';\nimport { splitIntoLineImages } from '@gutenye/ocr-common/splitIntoLineImages';\nimport { ImageRaw } from './ImageRaw';\nimport { FileUtils } from './FileUtils';\nimport { InferenceSession } from 'onnxruntime-node';\nimport fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { convertToRaw } from '../image';\nimport * as v from 'valibot';\nconst ImageAdjustDefine = v.object({\n padding: v.pipe(\n v.optional(\n v.union([\n v.pipe(\n v.number(),\n v.transform((item) => ({\n top: item,\n left: item,\n right: item,\n bottom: item,\n })),\n ),\n v.object({\n left: v.number(),\n right: v.number(),\n top: v.number(),\n bottom: v.number(),\n }),\n ]),\n { top: 50, right: 50, left: 50, bottom: 50 },\n ),\n ),\n maxSideLen: v.optional(v.union([v.pipe(v.number())]), 1920),\n // threshold: v.optional(v.union([v.pipe(v.number())]), 0.3),\n});\nexport type ImageAdjustType = v.InferInput<typeof ImageAdjustDefine>;\nregisterBackend({\n FileUtils,\n ImageRaw,\n InferenceSession,\n splitIntoLineImages,\n defaultModels: undefined,\n});\n\nasync function convert(\n this: BaseOcr,\n input: string | Uint8Array,\n options: ImageAdjustType = {},\n) {\n const resolveOptions = v.parse(ImageAdjustDefine, options);\n //100 80 0.8\n // 50 40\n let { raw } = await convertToRaw(input);\n const metadata = await raw.metadata();\n const maxSize = Math.max(metadata.width!, metadata.height!);\n if (maxSize > resolveOptions.maxSideLen) {\n let ratio = metadata.width! / metadata.height!;\n ratio = ratio > 1 ? 1 / ratio : ratio;\n raw = raw.resize({\n width: Math.round(\n maxSize === metadata.width!\n ? resolveOptions.maxSideLen\n : ratio * resolveOptions.maxSideLen,\n ),\n height: Math.round(\n maxSize === metadata.height!\n ? resolveOptions.maxSideLen\n : ratio * resolveOptions.maxSideLen,\n ),\n });\n }\n raw = raw.extend({ ...resolveOptions.padding, background: '#fff' });\n raw = raw.ensureAlpha(1);\n return this.detect(raw as any);\n}\nexport class Ocr extends BaseOcr {\n static override async create(options: ModelCreateOptions = {}) {\n const ocr = await BaseOcr.create(options);\n if (options.debugOutputDir) {\n await fs.mkdir(path.normalize(options.debugOutputDir), {\n recursive: true,\n });\n }\n (ocr as any).convert = convert.bind(ocr);\n return ocr as BaseOcr & { convert: typeof convert };\n }\n}\n", "import filePath from 'node:path';\nimport { ImageRawBase } from '@gutenye/ocr-common';\nimport type { ImageRawData, LineImage, SizeOption } from '@gutenye/ocr-common';\nimport sharp from 'sharp';\nexport class ImageRaw extends ImageRawBase {\n #sharp!: sharp.Sharp;\n\n static async open(path: string): Promise<ImageRaw> {\n // let { raw } = await convertToRaw(path);\n return new ImageRaw(await toImageRaw(path as any));\n }\n\n constructor(imageRawData: ImageRawData) {\n super(imageRawData);\n this.#sharp = toSharp(imageRawData);\n }\n\n async write(path: string) {\n const ext = filePath.extname(path).slice(1);\n return this.#sharp.toFormat(ext as keyof sharp.FormatEnum).toFile(path);\n }\n\n async resize(size: SizeOption) {\n return this.#apply(\n this.#sharp.resize({\n width: size.width,\n height: size.height,\n fit: 'contain',\n }),\n );\n }\n\n async drawBox(lineImages: LineImage[]) {\n const svg = `\n <svg width=\"${this.width}\" height=\"${this.height}\">\n ${lineImages\n .map((lineImage) => {\n const [p1, p2, p3, p4] = lineImage.box;\n return `<polygon points=\"${p1[0]},${p1[1]} ${p2[0]},${p2[1]} ${p3[0]},${p3[1]} ${p4[0]},${p4[1]}\" fill=\"none\" stroke=\"red\" />`;\n })\n .join('\\n')}\n </svg>\n `;\n return this.#apply(\n this.#sharp.composite([{ input: Buffer.from(svg), left: 0, top: 0 }]),\n );\n }\n\n async #apply(sharp: sharp.Sharp) {\n this.#sharp = sharp;\n const result = await toImageRaw(sharp);\n this.data = result.data;\n this.width = result.width;\n this.height = result.height;\n return this;\n }\n}\n\nasync function toImageRaw(sharp: sharp.Sharp) {\n const result = await sharp.raw().toBuffer({ resolveWithObject: true });\n return {\n data: result.data,\n width: result.info.width,\n height: result.info.height,\n };\n}\nfunction toSharp(imageRawData: ImageRawData) {\n return sharp(imageRawData.data, {\n raw: {\n width: imageRawData.width,\n height: imageRawData.height,\n channels: 4,\n },\n });\n}\n", "import fs from 'node:fs/promises';\nimport { FileUtilsBase } from '@gutenye/ocr-common';\nimport { path } from '@cyia/vfs2';\nexport class FileUtils extends FileUtilsBase {\n static override async read(filePath: string) {\n return await fs.readFile(path.normalize(filePath), 'utf8');\n }\n}\n", "import * as v from 'valibot';\nimport * as fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { fileTypeFromBuffer } from 'file-type';\nimport sharp from 'sharp';\nimport heicdecode from 'heic-decode';\nimport { decode } from 'bmp-js';\n\nconst BASE64_HEAD_REPLACE_REG = /^data:image\\/[\\w]+;base64,/;\n\nconst InputDefine = v.union([\n v.pipe(\n v.string(),\n v.check((input) => BASE64_HEAD_REPLACE_REG.test(input)),\n v.transform((base64) => {\n const result = base64.match(BASE64_HEAD_REPLACE_REG)!;\n return new Uint8Array(\n Buffer.from(base64.slice(result[0].length), 'base64'),\n );\n }),\n ),\n v.pipe(\n v.string(),\n v.transform((filePath) =>\n fs\n .readFile(path.normalize(filePath))\n .then((buffer) => new Uint8Array(buffer)),\n ),\n ),\n v.pipe(v.custom<Uint8Array>((input) => input instanceof Uint8Array)),\n]);\nexport async function decodeToBuffer(input: string | Uint8Array) {\n const buffer = await v.parse(InputDefine, input);\n return buffer;\n}\n/**\n * ocr处理用\n * 支持路径,base64,uint8array\n */\nexport async function convertToRaw(input: string | Uint8Array) {\n const buffer = await decodeToBuffer(input);\n const type = await fileTypeFromBuffer(buffer);\n if (!type) {\n throw new Error(`不支持的图片类型`);\n }\n if (type.mime === 'image/bmp') {\n const data = decode(Buffer.from(buffer));\n const resolvedBuffer = data.data;\n //ABGR =>RGBA\n for (let i = 0; i < resolvedBuffer.length; i += 4) {\n const alpha = resolvedBuffer[i];\n const blue = resolvedBuffer[i + 1];\n const green = resolvedBuffer[i + 2];\n const red = resolvedBuffer[i + 3];\n resolvedBuffer[i] = red;\n resolvedBuffer[i + 1] = green;\n resolvedBuffer[i + 2] = blue;\n resolvedBuffer[i + 3] = (data as any).is_with_alpha ? alpha : 0xff;\n }\n const result = sharp(resolvedBuffer, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n }).ensureAlpha(1);\n return { type: 'image/png', raw: result };\n } else if (type?.mime === 'image/heic' || type?.mime === 'image/heif') {\n const data = await heicdecode({\n buffer: buffer as any,\n });\n const result = sharp(data.data, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n });\n return { type: 'image/png', raw: result };\n } else {\n const result = sharp(buffer);\n return { type: type.mime, raw: result };\n }\n}\n// todo 未来其实应该直接是Buffer转通道颜色\n/**\n * 转换为兼容的图片格式\n */\nexport async function convertToCompatibleBuffer(input: string | Uint8Array) {\n const result2 = await convertToRaw(input);\n\n return {\n type: result2.type,\n buffer: new Uint8Array(await result2.raw.png().toBuffer()),\n };\n}\n\nexport function bufferToImageBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return `data:${input.type};base64,${Buffer.from(input.buffer).toString('base64')}`;\n}\nexport function bufferToFileBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return Buffer.from(input.buffer).toString('base64');\n}\n", "import sharp from 'sharp';\r\nimport { getImageMetadata } from './image-metadata';\r\nexport async function imageExtract(\r\n buffer: Buffer,\r\n position: sharp.Region,\r\n padding: number = 0,\r\n) {\r\n let metadata =await getImageMetadata(buffer);\r\n let left = Math.min(\r\n Math.max(Math.round(position.left - padding), 0),\r\n metadata.width,\r\n );\r\n let top = Math.min(\r\n Math.max(Math.round(position.top - padding), 0),\r\n metadata.height,\r\n );\r\n return sharp(buffer)\r\n .extract({\r\n left,\r\n top,\r\n width: Math.min(\r\n Math.max(Math.round(position.width + padding * 2), 0),\r\n metadata.width - left,\r\n ),\r\n height: Math.min(\r\n Math.max(Math.round(position.height + padding * 2), 0),\r\n metadata.height - top,\r\n ),\r\n })\r\n .toBuffer();\r\n}\r\n", "import sharp from 'sharp';\r\n\r\nexport function getImageMetadata(buffer: Buffer) {\r\n let instance = sharp(buffer);\r\n return instance.metadata();\r\n}\r\n", "export const ModelConfig = [\n {\n label: '简体中文',\n key: 'ch_mobile',\n det: 'det/ch_PP-OCRv4_det_infer.onnx',\n rec: 'rec/ch_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/ch_PP-OCRv4_rec_infer/ppocr_keys_v1.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '简体中文(服务器)',\n key: 'ch_server',\n det: 'det/ch_PP-OCRv4_det_server_infer.onnx',\n rec: 'rec/ch_PP-OCRv4_rec_server_infer.onnx',\n dict: 'rec/ch_PP-OCRv4_rec_server_infer/ppocr_keys_v1.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '繁體中文',\n key: 'chinese_cht',\n det: 'det/ch_PP-OCRv4_det_infer.onnx',\n rec: 'rec/chinese_cht_PP-OCRv3_rec_infer.onnx',\n dict: 'rec/chinese_cht_PP-OCRv3_rec_infer/chinese_cht_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '英文',\n key: 'en_mobile',\n det: 'det/en_PP-OCRv3_det_infer.onnx',\n rec: 'rec/en_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/en_PP-OCRv4_rec_infer/en_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '阿拉伯文',\n key: 'ar_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/arabic_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/arabic_PP-OCRv4_rec_infer/arabic_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '塞尔维亚文',\n key: 'cyrillic_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/cyrillic_PP-OCRv3_rec_infer.onnx',\n dict: 'rec/cyrillic_PP-OCRv3_rec_infer/cyrillic_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '梵文',\n key: 'devanagari_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/devanagari_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/devanagari_PP-OCRv4_rec_infer/devanagari_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '日文',\n key: 'japan_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/japan_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/japan_PP-OCRv4_rec_infer/japan_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '卡纳达语',\n key: 'ka_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/ka_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/ka_PP-OCRv4_rec_infer/ka_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '韩文',\n key: 'korean_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/korean_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/korean_PP-OCRv4_rec_infer/korean_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '拉丁文',\n key: 'latin_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/latin_PP-OCRv3_rec_infer.onnx',\n dict: 'rec/latin_PP-OCRv3_rec_infer/latin_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '泰米尔文',\n key: 'ta_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/ta_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/ta_PP-OCRv4_rec_infer/ta_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '泰卢固文',\n key: 'te_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/te_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/te_PP-OCRv4_rec_infer/te_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n];\n"],
5
+ "mappings": ";AAAA,OAAO;AAAA,EAEL;AAAA,OACK;AACP,SAAS,2BAA2B;;;ACJpC,OAAO,cAAc;AACrB,SAAS,oBAAoB;AAE7B,OAAO,WAAW;AACX,IAAM,WAAN,MAAM,kBAAiB,aAAa;AAAA,EACzC;AAAA,EAEA,aAAa,KAAKA,OAAiC;AAEjD,WAAO,IAAI,UAAS,MAAM,WAAWA,KAAW,CAAC;AAAA,EACnD;AAAA,EAEA,YAAY,cAA4B;AACtC,UAAM,YAAY;AAClB,SAAK,SAAS,QAAQ,YAAY;AAAA,EACpC;AAAA,EAEA,MAAM,MAAMA,OAAc;AACxB,UAAM,MAAM,SAAS,QAAQA,KAAI,EAAE,MAAM,CAAC;AAC1C,WAAO,KAAK,OAAO,SAAS,GAA6B,EAAE,OAAOA,KAAI;AAAA,EACxE;AAAA,EAEA,MAAM,OAAO,MAAkB;AAC7B,WAAO,KAAK;AAAA,MACV,KAAK,OAAO,OAAO;AAAA,QACjB,OAAO,KAAK;AAAA,QACZ,QAAQ,KAAK;AAAA,QACb,KAAK;AAAA,MACP,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,MAAM,QAAQ,YAAyB;AACrC,UAAM,MAAM;AAAA,oBACI,KAAK,KAAK,aAAa,KAAK,MAAM;AAAA,UAC5C,WACC,IAAI,CAAC,cAAc;AAClB,YAAM,CAAC,IAAI,IAAI,IAAI,EAAE,IAAI,UAAU;AACnC,aAAO,oBAAoB,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC;AAAA,IACjG,CAAC,EACA,KAAK,IAAI,CAAC;AAAA;AAAA;AAGjB,WAAO,KAAK;AAAA,MACV,KAAK,OAAO,UAAU,CAAC,EAAE,OAAO,OAAO,KAAK,GAAG,GAAG,MAAM,GAAG,KAAK,EAAE,CAAC,CAAC;AAAA,IACtE;AAAA,EACF;AAAA,EAEA,MAAM,OAAOC,QAAoB;AAC/B,SAAK,SAASA;AACd,UAAM,SAAS,MAAM,WAAWA,MAAK;AACrC,SAAK,OAAO,OAAO;AACnB,SAAK,QAAQ,OAAO;AACpB,SAAK,SAAS,OAAO;AACrB,WAAO;AAAA,EACT;AACF;AAEA,eAAe,WAAWA,QAAoB;AAC5C,QAAM,SAAS,MAAMA,OAAM,IAAI,EAAE,SAAS,EAAE,mBAAmB,KAAK,CAAC;AACrE,SAAO;AAAA,IACL,MAAM,OAAO;AAAA,IACb,OAAO,OAAO,KAAK;AAAA,IACnB,QAAQ,OAAO,KAAK;AAAA,EACtB;AACF;AACA,SAAS,QAAQ,cAA4B;AAC3C,SAAO,MAAM,aAAa,MAAM;AAAA,IAC9B,KAAK;AAAA,MACH,OAAO,aAAa;AAAA,MACpB,QAAQ,aAAa;AAAA,MACrB,UAAU;AAAA,IACZ;AAAA,EACF,CAAC;AACH;;;AC1EA,OAAO,QAAQ;AACf,SAAS,qBAAqB;AAC9B,SAAS,YAAY;AACd,IAAM,YAAN,cAAwB,cAAc;AAAA,EAC3C,aAAsB,KAAKC,WAAkB;AAC3C,WAAO,MAAM,GAAG,SAAS,KAAK,UAAUA,SAAQ,GAAG,MAAM;AAAA,EAC3D;AACF;;;AFAA,SAAS,wBAAwB;AACjC,OAAOC,SAAQ;AACf,SAAS,QAAAC,aAAY;;;AGTrB,YAAY,OAAO;AACnB,YAAYC,SAAQ;AACpB,SAAS,QAAAC,aAAY;AACrB,SAAS,0BAA0B;AACnC,OAAOC,YAAW;AAClB,OAAO,gBAAgB;AACvB,SAAS,cAAc;AAEvB,IAAM,0BAA0B;AAEhC,IAAM,cAAgB,QAAM;AAAA,EACxB;AAAA,IACE,SAAO;AAAA,IACP,QAAM,CAAC,UAAU,wBAAwB,KAAK,KAAK,CAAC;AAAA,IACpD,YAAU,CAAC,WAAW;AACtB,YAAM,SAAS,OAAO,MAAM,uBAAuB;AACnD,aAAO,IAAI;AAAA,QACT,OAAO,KAAK,OAAO,MAAM,OAAO,CAAC,EAAE,MAAM,GAAG,QAAQ;AAAA,MACtD;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EACE;AAAA,IACE,SAAO;AAAA,IACP;AAAA,MAAU,CAACC,cAER,aAASF,MAAK,UAAUE,SAAQ,CAAC,EACjC,KAAK,CAAC,WAAW,IAAI,WAAW,MAAM,CAAC;AAAA,IAC5C;AAAA,EACF;AAAA,EACE,OAAO,SAAmB,CAAC,UAAU,iBAAiB,UAAU,CAAC;AACrE,CAAC;AACD,eAAsB,eAAe,OAA4B;AAC/D,QAAM,SAAS,MAAQ,QAAM,aAAa,KAAK;AAC/C,SAAO;AACT;AAKA,eAAsB,aAAa,OAA4B;AAC7D,QAAM,SAAS,MAAM,eAAe,KAAK;AACzC,QAAM,OAAO,MAAM,mBAAmB,MAAM;AAC5C,MAAI,CAAC,MAAM;AACT,UAAM,IAAI,MAAM,UAAU;AAAA,EAC5B;AACA,MAAI,KAAK,SAAS,aAAa;AAC7B,UAAM,OAAO,OAAO,OAAO,KAAK,MAAM,CAAC;AACvC,UAAM,iBAAiB,KAAK;AAE5B,aAAS,IAAI,GAAG,IAAI,eAAe,QAAQ,KAAK,GAAG;AACjD,YAAM,QAAQ,eAAe,CAAC;AAC9B,YAAM,OAAO,eAAe,IAAI,CAAC;AACjC,YAAM,QAAQ,eAAe,IAAI,CAAC;AAClC,YAAM,MAAM,eAAe,IAAI,CAAC;AAChC,qBAAe,CAAC,IAAI;AACpB,qBAAe,IAAI,CAAC,IAAI;AACxB,qBAAe,IAAI,CAAC,IAAI;AACxB,qBAAe,IAAI,CAAC,IAAK,KAAa,gBAAgB,QAAQ;AAAA,IAChE;AACA,UAAM,SAASD,OAAM,gBAAgB;AAAA,MACnC,KAAK;AAAA,QACH,OAAO,KAAK;AAAA,QACZ,QAAQ,KAAK;AAAA,QACb,UAAU;AAAA,MACZ;AAAA,IACF,CAAC,EAAE,YAAY,CAAC;AAChB,WAAO,EAAE,MAAM,aAAa,KAAK,OAAO;AAAA,EAC1C,WAAW,MAAM,SAAS,gBAAgB,MAAM,SAAS,cAAc;AACrE,UAAM,OAAO,MAAM,WAAW;AAAA,MAC5B;AAAA,IACF,CAAC;AACD,UAAM,SAASA,OAAM,KAAK,MAAM;AAAA,MAC9B,KAAK;AAAA,QACH,OAAO,KAAK;AAAA,QACZ,QAAQ,KAAK;AAAA,QACb,UAAU;AAAA,MACZ;AAAA,IACF,CAAC;AACD,WAAO,EAAE,MAAM,aAAa,KAAK,OAAO;AAAA,EAC1C,OAAO;AACL,UAAM,SAASA,OAAM,MAAM;AAC3B,WAAO,EAAE,MAAM,KAAK,MAAM,KAAK,OAAO;AAAA,EACxC;AACF;;;ACnFA,OAAOE,YAAW;;;ACAlB,OAAOC,YAAW;;;ALWlB,YAAYC,QAAO;AACnB,IAAM,oBAAsB,UAAO;AAAA,EACjC,SAAW;AAAA,IACP;AAAA,MACE,SAAM;AAAA,QACJ;AAAA,UACE,UAAO;AAAA,UACP,aAAU,CAAC,UAAU;AAAA,YACrB,KAAK;AAAA,YACL,MAAM;AAAA,YACN,OAAO;AAAA,YACP,QAAQ;AAAA,UACV,EAAE;AAAA,QACJ;AAAA,QACE,UAAO;AAAA,UACP,MAAQ,UAAO;AAAA,UACf,OAAS,UAAO;AAAA,UAChB,KAAO,UAAO;AAAA,UACd,QAAU,UAAO;AAAA,QACnB,CAAC;AAAA,MACH,CAAC;AAAA,MACD,EAAE,KAAK,IAAI,OAAO,IAAI,MAAM,IAAI,QAAQ,GAAG;AAAA,IAC7C;AAAA,EACF;AAAA,EACA,YAAc,YAAW,SAAM,CAAG,QAAO,UAAO,CAAC,CAAC,CAAC,GAAG,IAAI;AAAA;AAE5D,CAAC;AAED,gBAAgB;AAAA,EACd;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AACjB,CAAC;AAED,eAAe,QAEb,OACA,UAA2B,CAAC,GAC5B;AACA,QAAM,iBAAmB,SAAM,mBAAmB,OAAO;AAGzD,MAAI,EAAE,IAAI,IAAI,MAAM,aAAa,KAAK;AACtC,QAAM,WAAW,MAAM,IAAI,SAAS;AACpC,QAAM,UAAU,KAAK,IAAI,SAAS,OAAQ,SAAS,MAAO;AAC1D,MAAI,UAAU,eAAe,YAAY;AACvC,QAAI,QAAQ,SAAS,QAAS,SAAS;AACvC,YAAQ,QAAQ,IAAI,IAAI,QAAQ;AAChC,UAAM,IAAI,OAAO;AAAA,MACf,OAAO,KAAK;AAAA,QACV,YAAY,SAAS,QACjB,eAAe,aACf,QAAQ,eAAe;AAAA,MAC7B;AAAA,MACA,QAAQ,KAAK;AAAA,QACX,YAAY,SAAS,SACjB,eAAe,aACf,QAAQ,eAAe;AAAA,MAC7B;AAAA,IACF,CAAC;AAAA,EACH;AACA,QAAM,IAAI,OAAO,EAAE,GAAG,eAAe,SAAS,YAAY,OAAO,CAAC;AAClE,QAAM,IAAI,YAAY,CAAC;AACvB,SAAO,KAAK,OAAO,GAAU;AAC/B;AACO,IAAM,MAAN,cAAkB,QAAQ;AAAA,EAC/B,aAAsB,OAAO,UAA8B,CAAC,GAAG;AAC7D,UAAM,MAAM,MAAM,QAAQ,OAAO,OAAO;AACxC,QAAI,QAAQ,gBAAgB;AAC1B,YAAMC,IAAG,MAAMC,MAAK,UAAU,QAAQ,cAAc,GAAG;AAAA,QACrD,WAAW;AAAA,MACb,CAAC;AAAA,IACH;AACA,IAAC,IAAY,UAAU,QAAQ,KAAK,GAAG;AACvC,WAAO;AAAA,EACT;AACF;;;AMzFO,IAAM,cAAc;AAAA,EACzB;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AACF;",
6
+ "names": ["path", "sharp", "filePath", "fs", "path", "fs", "path", "sharp", "filePath", "sharp", "sharp", "v", "fs", "path"]
7
+ }
package/package.json ADDED
@@ -0,0 +1,105 @@
1
+ {
2
+ "name": "@shenghuabi/knowledge",
3
+ "version": "1.0.21",
4
+ "description": "知识库",
5
+ "author": "wszgrcy",
6
+ "sideEffects": false,
7
+ "peerDependencies": {
8
+ "@cyia/vfs2": "^1.1.26",
9
+ "handlebars": "^4.7.8",
10
+ "lru-cache": "^11.2.1",
11
+ "rfdc": "^1.4.1",
12
+ "rxjs": "^7.8.2",
13
+ "static-injector": "^6.1.2",
14
+ "yaml": "^2.7.0",
15
+ "valibot": "^1.1.0",
16
+ "uuid": ">=11.1.0",
17
+ "magic-string": "^0.30.18",
18
+ "lodash-es": "^4.17.21",
19
+ "html-to-text": "^9.0.5",
20
+ "fastq": "^1.19.1",
21
+ "sharp": "0.34.2",
22
+ "@cyia/dl": "^1.0.15",
23
+ "@cyia/external-call": "^1.0.35"
24
+ },
25
+ "dependencies": {
26
+ "html-entities": "^2.6.0",
27
+ "@qdrant/qdrant-js": "1.15.1",
28
+ "@cyia/mdict-reader": "^1.0.9",
29
+ "@langchain/community": "^1.1.1",
30
+ "@langchain/core": "1.1.8",
31
+ "@langchain/textsplitters": "^1.0.1",
32
+ "@xhmikosr/decompress-tarbz2": "^8.0.2",
33
+ "chardet": "^2.1.0",
34
+ "d3-dsv": "^2.0.0",
35
+ "decompress": "^4.2.1",
36
+ "domutils": "^3.2.2",
37
+ "epub2": "^3.0.2",
38
+ "file-type": "^21.0.0",
39
+ "graphology": "^0.26.0",
40
+ "graphology-types": "^0.24.8",
41
+ "iconv-lite": "^0.7.0",
42
+ "mammoth": "^1.9.0",
43
+ "officeparser": "^4.2.0",
44
+ "pdf-parse": "^1.1.1",
45
+ "srt-parser-2": "^1.2.3",
46
+ "heic-decode": "^2.1.0",
47
+ "@gutenye/ocr-common": "^1.4.8",
48
+ "bmp-js": "^0.1.0",
49
+ "onnxruntime-node": "1.20.1",
50
+ "@huggingface/transformers": "3.7.2",
51
+ "xlsx": "^0.18.5",
52
+ "pdfjs-dist": "^5.4.449"
53
+ },
54
+ "overrides": {
55
+ "@huggingface/transformers": {
56
+ "onnxruntime-node": {
57
+ ".": "1.20.1"
58
+ }
59
+ }
60
+ },
61
+ "exports": {
62
+ "./embedding": {
63
+ "types": "./embedding/index.d.ts",
64
+ "import": "./embedding.mjs"
65
+ },
66
+ "./file-parser": {
67
+ "types": "./file-parser/index.d.ts",
68
+ "import": "./file-parser.mjs"
69
+ },
70
+ "./knowledge": {
71
+ "types": "./knowledge/index.d.ts",
72
+ "import": "./knowledge.mjs"
73
+ },
74
+ "./qdrant": {
75
+ "types": "./qdrant/index.d.ts",
76
+ "import": "./qdrant.mjs"
77
+ },
78
+ "./util": {
79
+ "types": "./util/index.d.ts",
80
+ "import": "./util.mjs"
81
+ },
82
+ "./image": {
83
+ "types": "./image/index.d.ts",
84
+ "import": "./image.mjs"
85
+ },
86
+ "./ocr": {
87
+ "types": "./ocr/index.d.ts",
88
+ "import": "./ocr.mjs"
89
+ },
90
+ "./worker/ocr": {
91
+ "types": "./worker/ocr/index.d.ts",
92
+ "import": "./worker/ocr.mjs"
93
+ },
94
+ "./worker/text2vec": {
95
+ "types": "./worker/text2vec/index.d.ts",
96
+ "import": "./worker/text2vec.mjs"
97
+ }
98
+ },
99
+ "publishConfig": {
100
+ "access": "public"
101
+ },
102
+ "repository": {
103
+ "url": "https://github.com/wszgrcy/shb-knowledge"
104
+ }
105
+ }
@@ -0,0 +1,3 @@
1
+ export * from './qdrant-client.service';
2
+ export * from './type';
3
+ export * from './qdrant-server.service';