@shenghuabi/knowledge 1.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/embedding/embedding.service.d.ts +4 -0
- package/embedding/index.d.ts +2 -0
- package/embedding/type.d.ts +12 -0
- package/embedding.mjs +61 -0
- package/embedding.mjs.map +7 -0
- package/file-parser/const.d.ts +20 -0
- package/file-parser/dict/dict-format/dsl/dsl-parse.service.d.ts +6 -0
- package/file-parser/dict/dict-format/dsl/dsl.format.d.ts +1 -0
- package/file-parser/dict/dict-format/mdict-parse.service.d.ts +20 -0
- package/file-parser/dict/dict-format/stardict-parse.service.d.ts +12 -0
- package/file-parser/dict/dict-format/yaml-parse.service.d.ts +25 -0
- package/file-parser/dict/dict.service.d.ts +10 -0
- package/file-parser/dict/index.d.ts +2 -0
- package/file-parser/dict/type.d.ts +24 -0
- package/file-parser/document-file-parser.service.d.ts +15 -0
- package/file-parser/document-loader/pdf-img.loader.d.ts +8 -0
- package/file-parser/document-loader/xlsx.loader.d.ts +6 -0
- package/file-parser/file-parser.service.d.ts +13 -0
- package/file-parser/index.d.ts +6 -0
- package/file-parser/text-analyse.d.ts +1 -0
- package/file-parser/text-parser.d.ts +3 -0
- package/file-parser/vl-parser/markdown.parser.d.ts +8 -0
- package/file-parser.mjs +850 -0
- package/file-parser.mjs.map +7 -0
- package/image/convert.d.ts +25 -0
- package/image/extract.d.ts +2 -0
- package/image/image-metadata.d.ts +2 -0
- package/image/index.d.ts +3 -0
- package/image.mjs +134 -0
- package/image.mjs.map +7 -0
- package/knowledge/article/article.knowledge.service.d.ts +53 -0
- package/knowledge/article/define/config.d.ts +60 -0
- package/knowledge/article/define/index.d.ts +2 -0
- package/knowledge/article/define/payload.d.ts +16 -0
- package/knowledge/article/index.d.ts +2 -0
- package/knowledge/common/common.knowledge.service.d.ts +240 -0
- package/knowledge/common/define/base.d.ts +7 -0
- package/knowledge/common/define/chunk.d.ts +14 -0
- package/knowledge/common/define/embedding.d.ts +5 -0
- package/knowledge/common/define/index.d.ts +3 -0
- package/knowledge/common/index.d.ts +1 -0
- package/knowledge/common/query.d.ts +7 -0
- package/knowledge/const.d.ts +95 -0
- package/knowledge/define/index.d.ts +245 -0
- package/knowledge/dict/define/config.d.ts +68 -0
- package/knowledge/dict/define/index.d.ts +1 -0
- package/knowledge/dict/dict.knowledge.service.d.ts +67 -0
- package/knowledge/graph/const.d.ts +20 -0
- package/knowledge/graph/define/config.d.ts +169 -0
- package/knowledge/graph/define/define.d.ts +402 -0
- package/knowledge/graph/define/index.d.ts +2 -0
- package/knowledge/graph/define/query.d.ts +14 -0
- package/knowledge/graph/graph.handle.service.d.ts +28 -0
- package/knowledge/graph/graph.knowledge.service.d.ts +40 -0
- package/knowledge/graph/graph.local.service.d.ts +85 -0
- package/knowledge/graph/graph.query.service.d.ts +160 -0
- package/knowledge/graph/graph.service.d.ts +24 -0
- package/knowledge/graph/graph.util.service.d.ts +31 -0
- package/knowledge/graph/type.d.ts +11 -0
- package/knowledge/graph/util/format-attr.d.ts +48 -0
- package/knowledge/graph/util/graph-util.d.ts +5 -0
- package/knowledge/graph/util.d.ts +1 -0
- package/knowledge/graph/vecotr-format.d.ts +11 -0
- package/knowledge/index.d.ts +17 -0
- package/knowledge/knowledge.manager.service.d.ts +42 -0
- package/knowledge/knowledge.util.service.d.ts +21 -0
- package/knowledge/normal/define/config.d.ts +60 -0
- package/knowledge/normal/define/index.d.ts +1 -0
- package/knowledge/normal/normal.knowledge.service.d.ts +49 -0
- package/knowledge/template.format.d.ts +6 -0
- package/knowledge/type.d.ts +28 -0
- package/knowledge.mjs +2856 -0
- package/knowledge.mjs.map +7 -0
- package/ocr/FileUtils.d.ts +4 -0
- package/ocr/ImageRaw.d.ts +11 -0
- package/ocr/index.d.ts +2 -0
- package/ocr/model-config.d.ts +8 -0
- package/ocr/ocr.d.ts +29 -0
- package/ocr.mjs +351 -0
- package/ocr.mjs.map +7 -0
- package/package.json +105 -0
- package/qdrant/index.d.ts +3 -0
- package/qdrant/qdrant-client.service.d.ts +396 -0
- package/qdrant/qdrant-server.service.d.ts +21 -0
- package/qdrant/type.d.ts +18 -0
- package/qdrant/util.d.ts +1 -0
- package/qdrant.mjs +274 -0
- package/qdrant.mjs.map +7 -0
- package/util/batch-queue.d.ts +6 -0
- package/util/cache-queue.d.ts +10 -0
- package/util/clone.d.ts +1 -0
- package/util/embedding-queue.d.ts +3 -0
- package/util/get-hash.d.ts +2 -0
- package/util/html-to-text/index.d.ts +5 -0
- package/util/index.d.ts +10 -0
- package/util/is-truthy.d.ts +1 -0
- package/util/log.service.d.ts +6 -0
- package/util/promise.d.ts +5 -0
- package/util/type.d.ts +1 -0
- package/util/uniq-object-key.d.ts +1 -0
- package/util.mjs +219 -0
- package/util.mjs.map +7 -0
- package/worker/custom-cache.d.ts +28 -0
- package/worker/ocr/index.d.ts +17 -0
- package/worker/ocr.mjs +75 -0
- package/worker/ocr.mjs.map +7 -0
- package/worker/reranker.mjs +180 -0
- package/worker/reranker.mjs.map +7 -0
- package/worker/set-transformers-config.d.ts +19 -0
- package/worker/text2vec/index.d.ts +9 -0
- package/worker/text2vec.mjs +194 -0
- package/worker/text2vec.mjs.map +7 -0
package/ocr.mjs
ADDED
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
// packages/ocr/ocr.ts
|
|
2
|
+
import BaseOcr, {
|
|
3
|
+
registerBackend
|
|
4
|
+
} from "@gutenye/ocr-common";
|
|
5
|
+
import { splitIntoLineImages } from "@gutenye/ocr-common/splitIntoLineImages";
|
|
6
|
+
|
|
7
|
+
// packages/ocr/ImageRaw.ts
|
|
8
|
+
import filePath from "node:path";
|
|
9
|
+
import { ImageRawBase } from "@gutenye/ocr-common";
|
|
10
|
+
import sharp from "sharp";
|
|
11
|
+
var ImageRaw = class _ImageRaw extends ImageRawBase {
|
|
12
|
+
#sharp;
|
|
13
|
+
static async open(path4) {
|
|
14
|
+
return new _ImageRaw(await toImageRaw(path4));
|
|
15
|
+
}
|
|
16
|
+
constructor(imageRawData) {
|
|
17
|
+
super(imageRawData);
|
|
18
|
+
this.#sharp = toSharp(imageRawData);
|
|
19
|
+
}
|
|
20
|
+
async write(path4) {
|
|
21
|
+
const ext = filePath.extname(path4).slice(1);
|
|
22
|
+
return this.#sharp.toFormat(ext).toFile(path4);
|
|
23
|
+
}
|
|
24
|
+
async resize(size) {
|
|
25
|
+
return this.#apply(
|
|
26
|
+
this.#sharp.resize({
|
|
27
|
+
width: size.width,
|
|
28
|
+
height: size.height,
|
|
29
|
+
fit: "contain"
|
|
30
|
+
})
|
|
31
|
+
);
|
|
32
|
+
}
|
|
33
|
+
async drawBox(lineImages) {
|
|
34
|
+
const svg = `
|
|
35
|
+
<svg width="${this.width}" height="${this.height}">
|
|
36
|
+
${lineImages.map((lineImage) => {
|
|
37
|
+
const [p1, p2, p3, p4] = lineImage.box;
|
|
38
|
+
return `<polygon points="${p1[0]},${p1[1]} ${p2[0]},${p2[1]} ${p3[0]},${p3[1]} ${p4[0]},${p4[1]}" fill="none" stroke="red" />`;
|
|
39
|
+
}).join("\n")}
|
|
40
|
+
</svg>
|
|
41
|
+
`;
|
|
42
|
+
return this.#apply(
|
|
43
|
+
this.#sharp.composite([{ input: Buffer.from(svg), left: 0, top: 0 }])
|
|
44
|
+
);
|
|
45
|
+
}
|
|
46
|
+
async #apply(sharp5) {
|
|
47
|
+
this.#sharp = sharp5;
|
|
48
|
+
const result = await toImageRaw(sharp5);
|
|
49
|
+
this.data = result.data;
|
|
50
|
+
this.width = result.width;
|
|
51
|
+
this.height = result.height;
|
|
52
|
+
return this;
|
|
53
|
+
}
|
|
54
|
+
};
|
|
55
|
+
async function toImageRaw(sharp5) {
|
|
56
|
+
const result = await sharp5.raw().toBuffer({ resolveWithObject: true });
|
|
57
|
+
return {
|
|
58
|
+
data: result.data,
|
|
59
|
+
width: result.info.width,
|
|
60
|
+
height: result.info.height
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
function toSharp(imageRawData) {
|
|
64
|
+
return sharp(imageRawData.data, {
|
|
65
|
+
raw: {
|
|
66
|
+
width: imageRawData.width,
|
|
67
|
+
height: imageRawData.height,
|
|
68
|
+
channels: 4
|
|
69
|
+
}
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// packages/ocr/FileUtils.ts
|
|
74
|
+
import fs from "node:fs/promises";
|
|
75
|
+
import { FileUtilsBase } from "@gutenye/ocr-common";
|
|
76
|
+
import { path } from "@cyia/vfs2";
|
|
77
|
+
var FileUtils = class extends FileUtilsBase {
|
|
78
|
+
static async read(filePath2) {
|
|
79
|
+
return await fs.readFile(path.normalize(filePath2), "utf8");
|
|
80
|
+
}
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
// packages/ocr/ocr.ts
|
|
84
|
+
import { InferenceSession } from "onnxruntime-node";
|
|
85
|
+
import fs3 from "fs/promises";
|
|
86
|
+
import { path as path3 } from "@cyia/vfs2";
|
|
87
|
+
|
|
88
|
+
// packages/image/convert.ts
|
|
89
|
+
import * as v from "valibot";
|
|
90
|
+
import * as fs2 from "fs/promises";
|
|
91
|
+
import { path as path2 } from "@cyia/vfs2";
|
|
92
|
+
import { fileTypeFromBuffer } from "file-type";
|
|
93
|
+
import sharp2 from "sharp";
|
|
94
|
+
import heicdecode from "heic-decode";
|
|
95
|
+
import { decode } from "bmp-js";
|
|
96
|
+
var BASE64_HEAD_REPLACE_REG = /^data:image\/[\w]+;base64,/;
|
|
97
|
+
var InputDefine = v.union([
|
|
98
|
+
v.pipe(
|
|
99
|
+
v.string(),
|
|
100
|
+
v.check((input) => BASE64_HEAD_REPLACE_REG.test(input)),
|
|
101
|
+
v.transform((base64) => {
|
|
102
|
+
const result = base64.match(BASE64_HEAD_REPLACE_REG);
|
|
103
|
+
return new Uint8Array(
|
|
104
|
+
Buffer.from(base64.slice(result[0].length), "base64")
|
|
105
|
+
);
|
|
106
|
+
})
|
|
107
|
+
),
|
|
108
|
+
v.pipe(
|
|
109
|
+
v.string(),
|
|
110
|
+
v.transform(
|
|
111
|
+
(filePath2) => fs2.readFile(path2.normalize(filePath2)).then((buffer) => new Uint8Array(buffer))
|
|
112
|
+
)
|
|
113
|
+
),
|
|
114
|
+
v.pipe(v.custom((input) => input instanceof Uint8Array))
|
|
115
|
+
]);
|
|
116
|
+
async function decodeToBuffer(input) {
|
|
117
|
+
const buffer = await v.parse(InputDefine, input);
|
|
118
|
+
return buffer;
|
|
119
|
+
}
|
|
120
|
+
async function convertToRaw(input) {
|
|
121
|
+
const buffer = await decodeToBuffer(input);
|
|
122
|
+
const type = await fileTypeFromBuffer(buffer);
|
|
123
|
+
if (!type) {
|
|
124
|
+
throw new Error(`不支持的图片类型`);
|
|
125
|
+
}
|
|
126
|
+
if (type.mime === "image/bmp") {
|
|
127
|
+
const data = decode(Buffer.from(buffer));
|
|
128
|
+
const resolvedBuffer = data.data;
|
|
129
|
+
for (let i = 0; i < resolvedBuffer.length; i += 4) {
|
|
130
|
+
const alpha = resolvedBuffer[i];
|
|
131
|
+
const blue = resolvedBuffer[i + 1];
|
|
132
|
+
const green = resolvedBuffer[i + 2];
|
|
133
|
+
const red = resolvedBuffer[i + 3];
|
|
134
|
+
resolvedBuffer[i] = red;
|
|
135
|
+
resolvedBuffer[i + 1] = green;
|
|
136
|
+
resolvedBuffer[i + 2] = blue;
|
|
137
|
+
resolvedBuffer[i + 3] = data.is_with_alpha ? alpha : 255;
|
|
138
|
+
}
|
|
139
|
+
const result = sharp2(resolvedBuffer, {
|
|
140
|
+
raw: {
|
|
141
|
+
width: data.width,
|
|
142
|
+
height: data.height,
|
|
143
|
+
channels: 4
|
|
144
|
+
}
|
|
145
|
+
}).ensureAlpha(1);
|
|
146
|
+
return { type: "image/png", raw: result };
|
|
147
|
+
} else if (type?.mime === "image/heic" || type?.mime === "image/heif") {
|
|
148
|
+
const data = await heicdecode({
|
|
149
|
+
buffer
|
|
150
|
+
});
|
|
151
|
+
const result = sharp2(data.data, {
|
|
152
|
+
raw: {
|
|
153
|
+
width: data.width,
|
|
154
|
+
height: data.height,
|
|
155
|
+
channels: 4
|
|
156
|
+
}
|
|
157
|
+
});
|
|
158
|
+
return { type: "image/png", raw: result };
|
|
159
|
+
} else {
|
|
160
|
+
const result = sharp2(buffer);
|
|
161
|
+
return { type: type.mime, raw: result };
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// packages/image/extract.ts
|
|
166
|
+
import sharp4 from "sharp";
|
|
167
|
+
|
|
168
|
+
// packages/image/image-metadata.ts
|
|
169
|
+
import sharp3 from "sharp";
|
|
170
|
+
|
|
171
|
+
// packages/ocr/ocr.ts
|
|
172
|
+
import * as v2 from "valibot";
|
|
173
|
+
var ImageAdjustDefine = v2.object({
|
|
174
|
+
padding: v2.pipe(
|
|
175
|
+
v2.optional(
|
|
176
|
+
v2.union([
|
|
177
|
+
v2.pipe(
|
|
178
|
+
v2.number(),
|
|
179
|
+
v2.transform((item) => ({
|
|
180
|
+
top: item,
|
|
181
|
+
left: item,
|
|
182
|
+
right: item,
|
|
183
|
+
bottom: item
|
|
184
|
+
}))
|
|
185
|
+
),
|
|
186
|
+
v2.object({
|
|
187
|
+
left: v2.number(),
|
|
188
|
+
right: v2.number(),
|
|
189
|
+
top: v2.number(),
|
|
190
|
+
bottom: v2.number()
|
|
191
|
+
})
|
|
192
|
+
]),
|
|
193
|
+
{ top: 50, right: 50, left: 50, bottom: 50 }
|
|
194
|
+
)
|
|
195
|
+
),
|
|
196
|
+
maxSideLen: v2.optional(v2.union([v2.pipe(v2.number())]), 1920)
|
|
197
|
+
// threshold: v.optional(v.union([v.pipe(v.number())]), 0.3),
|
|
198
|
+
});
|
|
199
|
+
registerBackend({
|
|
200
|
+
FileUtils,
|
|
201
|
+
ImageRaw,
|
|
202
|
+
InferenceSession,
|
|
203
|
+
splitIntoLineImages,
|
|
204
|
+
defaultModels: void 0
|
|
205
|
+
});
|
|
206
|
+
async function convert(input, options = {}) {
|
|
207
|
+
const resolveOptions = v2.parse(ImageAdjustDefine, options);
|
|
208
|
+
let { raw } = await convertToRaw(input);
|
|
209
|
+
const metadata = await raw.metadata();
|
|
210
|
+
const maxSize = Math.max(metadata.width, metadata.height);
|
|
211
|
+
if (maxSize > resolveOptions.maxSideLen) {
|
|
212
|
+
let ratio = metadata.width / metadata.height;
|
|
213
|
+
ratio = ratio > 1 ? 1 / ratio : ratio;
|
|
214
|
+
raw = raw.resize({
|
|
215
|
+
width: Math.round(
|
|
216
|
+
maxSize === metadata.width ? resolveOptions.maxSideLen : ratio * resolveOptions.maxSideLen
|
|
217
|
+
),
|
|
218
|
+
height: Math.round(
|
|
219
|
+
maxSize === metadata.height ? resolveOptions.maxSideLen : ratio * resolveOptions.maxSideLen
|
|
220
|
+
)
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
raw = raw.extend({ ...resolveOptions.padding, background: "#fff" });
|
|
224
|
+
raw = raw.ensureAlpha(1);
|
|
225
|
+
return this.detect(raw);
|
|
226
|
+
}
|
|
227
|
+
var Ocr = class extends BaseOcr {
|
|
228
|
+
static async create(options = {}) {
|
|
229
|
+
const ocr = await BaseOcr.create(options);
|
|
230
|
+
if (options.debugOutputDir) {
|
|
231
|
+
await fs3.mkdir(path3.normalize(options.debugOutputDir), {
|
|
232
|
+
recursive: true
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
ocr.convert = convert.bind(ocr);
|
|
236
|
+
return ocr;
|
|
237
|
+
}
|
|
238
|
+
};
|
|
239
|
+
|
|
240
|
+
// packages/ocr/model-config.ts
|
|
241
|
+
var ModelConfig = [
|
|
242
|
+
{
|
|
243
|
+
label: "简体中文",
|
|
244
|
+
key: "ch_mobile",
|
|
245
|
+
det: "det/ch_PP-OCRv4_det_infer.onnx",
|
|
246
|
+
rec: "rec/ch_PP-OCRv4_rec_infer.onnx",
|
|
247
|
+
dict: "rec/ch_PP-OCRv4_rec_infer/ppocr_keys_v1.txt",
|
|
248
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
|
|
249
|
+
},
|
|
250
|
+
{
|
|
251
|
+
label: "简体中文(服务器)",
|
|
252
|
+
key: "ch_server",
|
|
253
|
+
det: "det/ch_PP-OCRv4_det_server_infer.onnx",
|
|
254
|
+
rec: "rec/ch_PP-OCRv4_rec_server_infer.onnx",
|
|
255
|
+
dict: "rec/ch_PP-OCRv4_rec_server_infer/ppocr_keys_v1.txt",
|
|
256
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
|
|
257
|
+
},
|
|
258
|
+
{
|
|
259
|
+
label: "繁體中文",
|
|
260
|
+
key: "chinese_cht",
|
|
261
|
+
det: "det/ch_PP-OCRv4_det_infer.onnx",
|
|
262
|
+
rec: "rec/chinese_cht_PP-OCRv3_rec_infer.onnx",
|
|
263
|
+
dict: "rec/chinese_cht_PP-OCRv3_rec_infer/chinese_cht_dict.txt",
|
|
264
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
|
|
265
|
+
},
|
|
266
|
+
{
|
|
267
|
+
label: "英文",
|
|
268
|
+
key: "en_mobile",
|
|
269
|
+
det: "det/en_PP-OCRv3_det_infer.onnx",
|
|
270
|
+
rec: "rec/en_PP-OCRv4_rec_infer.onnx",
|
|
271
|
+
dict: "rec/en_PP-OCRv4_rec_infer/en_dict.txt",
|
|
272
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
|
|
273
|
+
},
|
|
274
|
+
{
|
|
275
|
+
label: "阿拉伯文",
|
|
276
|
+
key: "ar_mobile",
|
|
277
|
+
det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
|
|
278
|
+
rec: "rec/arabic_PP-OCRv4_rec_infer.onnx",
|
|
279
|
+
dict: "rec/arabic_PP-OCRv4_rec_infer/arabic_dict.txt",
|
|
280
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
|
|
281
|
+
},
|
|
282
|
+
{
|
|
283
|
+
label: "塞尔维亚文",
|
|
284
|
+
key: "cyrillic_mobile",
|
|
285
|
+
det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
|
|
286
|
+
rec: "rec/cyrillic_PP-OCRv3_rec_infer.onnx",
|
|
287
|
+
dict: "rec/cyrillic_PP-OCRv3_rec_infer/cyrillic_dict.txt",
|
|
288
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
|
|
289
|
+
},
|
|
290
|
+
{
|
|
291
|
+
label: "梵文",
|
|
292
|
+
key: "devanagari_mobile",
|
|
293
|
+
det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
|
|
294
|
+
rec: "rec/devanagari_PP-OCRv4_rec_infer.onnx",
|
|
295
|
+
dict: "rec/devanagari_PP-OCRv4_rec_infer/devanagari_dict.txt",
|
|
296
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
|
|
297
|
+
},
|
|
298
|
+
{
|
|
299
|
+
label: "日文",
|
|
300
|
+
key: "japan_mobile",
|
|
301
|
+
det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
|
|
302
|
+
rec: "rec/japan_PP-OCRv4_rec_infer.onnx",
|
|
303
|
+
dict: "rec/japan_PP-OCRv4_rec_infer/japan_dict.txt",
|
|
304
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
|
|
305
|
+
},
|
|
306
|
+
{
|
|
307
|
+
label: "卡纳达语",
|
|
308
|
+
key: "ka_mobile",
|
|
309
|
+
det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
|
|
310
|
+
rec: "rec/ka_PP-OCRv4_rec_infer.onnx",
|
|
311
|
+
dict: "rec/ka_PP-OCRv4_rec_infer/ka_dict.txt",
|
|
312
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
|
|
313
|
+
},
|
|
314
|
+
{
|
|
315
|
+
label: "韩文",
|
|
316
|
+
key: "korean_mobile",
|
|
317
|
+
det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
|
|
318
|
+
rec: "rec/korean_PP-OCRv4_rec_infer.onnx",
|
|
319
|
+
dict: "rec/korean_PP-OCRv4_rec_infer/korean_dict.txt",
|
|
320
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
|
|
321
|
+
},
|
|
322
|
+
{
|
|
323
|
+
label: "拉丁文",
|
|
324
|
+
key: "latin_mobile",
|
|
325
|
+
det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
|
|
326
|
+
rec: "rec/latin_PP-OCRv3_rec_infer.onnx",
|
|
327
|
+
dict: "rec/latin_PP-OCRv3_rec_infer/latin_dict.txt",
|
|
328
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
|
|
329
|
+
},
|
|
330
|
+
{
|
|
331
|
+
label: "泰米尔文",
|
|
332
|
+
key: "ta_mobile",
|
|
333
|
+
det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
|
|
334
|
+
rec: "rec/ta_PP-OCRv4_rec_infer.onnx",
|
|
335
|
+
dict: "rec/ta_PP-OCRv4_rec_infer/ta_dict.txt",
|
|
336
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
|
|
337
|
+
},
|
|
338
|
+
{
|
|
339
|
+
label: "泰卢固文",
|
|
340
|
+
key: "te_mobile",
|
|
341
|
+
det: "det/Multilingual_PP-OCRv3_det_infer.onnx",
|
|
342
|
+
rec: "rec/te_PP-OCRv4_rec_infer.onnx",
|
|
343
|
+
dict: "rec/te_PP-OCRv4_rec_infer/te_dict.txt",
|
|
344
|
+
cls: "cls/ch_ppocr_mobile_v2.0_cls_infer.onnx"
|
|
345
|
+
}
|
|
346
|
+
];
|
|
347
|
+
export {
|
|
348
|
+
ModelConfig,
|
|
349
|
+
Ocr
|
|
350
|
+
};
|
|
351
|
+
//# sourceMappingURL=ocr.mjs.map
|
package/ocr.mjs.map
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 3,
|
|
3
|
+
"sources": ["../packages/ocr/ocr.ts", "../packages/ocr/ImageRaw.ts", "../packages/ocr/FileUtils.ts", "../packages/image/convert.ts", "../packages/image/extract.ts", "../packages/image/image-metadata.ts", "../packages/ocr/model-config.ts"],
|
|
4
|
+
"sourcesContent": ["import BaseOcr, {\n ModelCreateOptions,\n registerBackend,\n} from '@gutenye/ocr-common';\nimport { splitIntoLineImages } from '@gutenye/ocr-common/splitIntoLineImages';\nimport { ImageRaw } from './ImageRaw';\nimport { FileUtils } from './FileUtils';\nimport { InferenceSession } from 'onnxruntime-node';\nimport fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { convertToRaw } from '../image';\nimport * as v from 'valibot';\nconst ImageAdjustDefine = v.object({\n padding: v.pipe(\n v.optional(\n v.union([\n v.pipe(\n v.number(),\n v.transform((item) => ({\n top: item,\n left: item,\n right: item,\n bottom: item,\n })),\n ),\n v.object({\n left: v.number(),\n right: v.number(),\n top: v.number(),\n bottom: v.number(),\n }),\n ]),\n { top: 50, right: 50, left: 50, bottom: 50 },\n ),\n ),\n maxSideLen: v.optional(v.union([v.pipe(v.number())]), 1920),\n // threshold: v.optional(v.union([v.pipe(v.number())]), 0.3),\n});\nexport type ImageAdjustType = v.InferInput<typeof ImageAdjustDefine>;\nregisterBackend({\n FileUtils,\n ImageRaw,\n InferenceSession,\n splitIntoLineImages,\n defaultModels: undefined,\n});\n\nasync function convert(\n this: BaseOcr,\n input: string | Uint8Array,\n options: ImageAdjustType = {},\n) {\n const resolveOptions = v.parse(ImageAdjustDefine, options);\n //100 80 0.8\n // 50 40\n let { raw } = await convertToRaw(input);\n const metadata = await raw.metadata();\n const maxSize = Math.max(metadata.width!, metadata.height!);\n if (maxSize > resolveOptions.maxSideLen) {\n let ratio = metadata.width! / metadata.height!;\n ratio = ratio > 1 ? 1 / ratio : ratio;\n raw = raw.resize({\n width: Math.round(\n maxSize === metadata.width!\n ? resolveOptions.maxSideLen\n : ratio * resolveOptions.maxSideLen,\n ),\n height: Math.round(\n maxSize === metadata.height!\n ? resolveOptions.maxSideLen\n : ratio * resolveOptions.maxSideLen,\n ),\n });\n }\n raw = raw.extend({ ...resolveOptions.padding, background: '#fff' });\n raw = raw.ensureAlpha(1);\n return this.detect(raw as any);\n}\nexport class Ocr extends BaseOcr {\n static override async create(options: ModelCreateOptions = {}) {\n const ocr = await BaseOcr.create(options);\n if (options.debugOutputDir) {\n await fs.mkdir(path.normalize(options.debugOutputDir), {\n recursive: true,\n });\n }\n (ocr as any).convert = convert.bind(ocr);\n return ocr as BaseOcr & { convert: typeof convert };\n }\n}\n", "import filePath from 'node:path';\nimport { ImageRawBase } from '@gutenye/ocr-common';\nimport type { ImageRawData, LineImage, SizeOption } from '@gutenye/ocr-common';\nimport sharp from 'sharp';\nexport class ImageRaw extends ImageRawBase {\n #sharp!: sharp.Sharp;\n\n static async open(path: string): Promise<ImageRaw> {\n // let { raw } = await convertToRaw(path);\n return new ImageRaw(await toImageRaw(path as any));\n }\n\n constructor(imageRawData: ImageRawData) {\n super(imageRawData);\n this.#sharp = toSharp(imageRawData);\n }\n\n async write(path: string) {\n const ext = filePath.extname(path).slice(1);\n return this.#sharp.toFormat(ext as keyof sharp.FormatEnum).toFile(path);\n }\n\n async resize(size: SizeOption) {\n return this.#apply(\n this.#sharp.resize({\n width: size.width,\n height: size.height,\n fit: 'contain',\n }),\n );\n }\n\n async drawBox(lineImages: LineImage[]) {\n const svg = `\n <svg width=\"${this.width}\" height=\"${this.height}\">\n ${lineImages\n .map((lineImage) => {\n const [p1, p2, p3, p4] = lineImage.box;\n return `<polygon points=\"${p1[0]},${p1[1]} ${p2[0]},${p2[1]} ${p3[0]},${p3[1]} ${p4[0]},${p4[1]}\" fill=\"none\" stroke=\"red\" />`;\n })\n .join('\\n')}\n </svg>\n `;\n return this.#apply(\n this.#sharp.composite([{ input: Buffer.from(svg), left: 0, top: 0 }]),\n );\n }\n\n async #apply(sharp: sharp.Sharp) {\n this.#sharp = sharp;\n const result = await toImageRaw(sharp);\n this.data = result.data;\n this.width = result.width;\n this.height = result.height;\n return this;\n }\n}\n\nasync function toImageRaw(sharp: sharp.Sharp) {\n const result = await sharp.raw().toBuffer({ resolveWithObject: true });\n return {\n data: result.data,\n width: result.info.width,\n height: result.info.height,\n };\n}\nfunction toSharp(imageRawData: ImageRawData) {\n return sharp(imageRawData.data, {\n raw: {\n width: imageRawData.width,\n height: imageRawData.height,\n channels: 4,\n },\n });\n}\n", "import fs from 'node:fs/promises';\nimport { FileUtilsBase } from '@gutenye/ocr-common';\nimport { path } from '@cyia/vfs2';\nexport class FileUtils extends FileUtilsBase {\n static override async read(filePath: string) {\n return await fs.readFile(path.normalize(filePath), 'utf8');\n }\n}\n", "import * as v from 'valibot';\nimport * as fs from 'fs/promises';\nimport { path } from '@cyia/vfs2';\nimport { fileTypeFromBuffer } from 'file-type';\nimport sharp from 'sharp';\nimport heicdecode from 'heic-decode';\nimport { decode } from 'bmp-js';\n\nconst BASE64_HEAD_REPLACE_REG = /^data:image\\/[\\w]+;base64,/;\n\nconst InputDefine = v.union([\n v.pipe(\n v.string(),\n v.check((input) => BASE64_HEAD_REPLACE_REG.test(input)),\n v.transform((base64) => {\n const result = base64.match(BASE64_HEAD_REPLACE_REG)!;\n return new Uint8Array(\n Buffer.from(base64.slice(result[0].length), 'base64'),\n );\n }),\n ),\n v.pipe(\n v.string(),\n v.transform((filePath) =>\n fs\n .readFile(path.normalize(filePath))\n .then((buffer) => new Uint8Array(buffer)),\n ),\n ),\n v.pipe(v.custom<Uint8Array>((input) => input instanceof Uint8Array)),\n]);\nexport async function decodeToBuffer(input: string | Uint8Array) {\n const buffer = await v.parse(InputDefine, input);\n return buffer;\n}\n/**\n * ocr处理用\n * 支持路径,base64,uint8array\n */\nexport async function convertToRaw(input: string | Uint8Array) {\n const buffer = await decodeToBuffer(input);\n const type = await fileTypeFromBuffer(buffer);\n if (!type) {\n throw new Error(`不支持的图片类型`);\n }\n if (type.mime === 'image/bmp') {\n const data = decode(Buffer.from(buffer));\n const resolvedBuffer = data.data;\n //ABGR =>RGBA\n for (let i = 0; i < resolvedBuffer.length; i += 4) {\n const alpha = resolvedBuffer[i];\n const blue = resolvedBuffer[i + 1];\n const green = resolvedBuffer[i + 2];\n const red = resolvedBuffer[i + 3];\n resolvedBuffer[i] = red;\n resolvedBuffer[i + 1] = green;\n resolvedBuffer[i + 2] = blue;\n resolvedBuffer[i + 3] = (data as any).is_with_alpha ? alpha : 0xff;\n }\n const result = sharp(resolvedBuffer, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n }).ensureAlpha(1);\n return { type: 'image/png', raw: result };\n } else if (type?.mime === 'image/heic' || type?.mime === 'image/heif') {\n const data = await heicdecode({\n buffer: buffer as any,\n });\n const result = sharp(data.data, {\n raw: {\n width: data.width,\n height: data.height,\n channels: 4,\n },\n });\n return { type: 'image/png', raw: result };\n } else {\n const result = sharp(buffer);\n return { type: type.mime, raw: result };\n }\n}\n// todo 未来其实应该直接是Buffer转通道颜色\n/**\n * 转换为兼容的图片格式\n */\nexport async function convertToCompatibleBuffer(input: string | Uint8Array) {\n const result2 = await convertToRaw(input);\n\n return {\n type: result2.type,\n buffer: new Uint8Array(await result2.raw.png().toBuffer()),\n };\n}\n\nexport function bufferToImageBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return `data:${input.type};base64,${Buffer.from(input.buffer).toString('base64')}`;\n}\nexport function bufferToFileBase64(input: {\n type: string;\n buffer: Uint8Array;\n}) {\n return Buffer.from(input.buffer).toString('base64');\n}\n", "import sharp from 'sharp';\r\nimport { getImageMetadata } from './image-metadata';\r\nexport async function imageExtract(\r\n buffer: Buffer,\r\n position: sharp.Region,\r\n padding: number = 0,\r\n) {\r\n let metadata =await getImageMetadata(buffer);\r\n let left = Math.min(\r\n Math.max(Math.round(position.left - padding), 0),\r\n metadata.width,\r\n );\r\n let top = Math.min(\r\n Math.max(Math.round(position.top - padding), 0),\r\n metadata.height,\r\n );\r\n return sharp(buffer)\r\n .extract({\r\n left,\r\n top,\r\n width: Math.min(\r\n Math.max(Math.round(position.width + padding * 2), 0),\r\n metadata.width - left,\r\n ),\r\n height: Math.min(\r\n Math.max(Math.round(position.height + padding * 2), 0),\r\n metadata.height - top,\r\n ),\r\n })\r\n .toBuffer();\r\n}\r\n", "import sharp from 'sharp';\r\n\r\nexport function getImageMetadata(buffer: Buffer) {\r\n let instance = sharp(buffer);\r\n return instance.metadata();\r\n}\r\n", "export const ModelConfig = [\n {\n label: '简体中文',\n key: 'ch_mobile',\n det: 'det/ch_PP-OCRv4_det_infer.onnx',\n rec: 'rec/ch_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/ch_PP-OCRv4_rec_infer/ppocr_keys_v1.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '简体中文(服务器)',\n key: 'ch_server',\n det: 'det/ch_PP-OCRv4_det_server_infer.onnx',\n rec: 'rec/ch_PP-OCRv4_rec_server_infer.onnx',\n dict: 'rec/ch_PP-OCRv4_rec_server_infer/ppocr_keys_v1.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '繁體中文',\n key: 'chinese_cht',\n det: 'det/ch_PP-OCRv4_det_infer.onnx',\n rec: 'rec/chinese_cht_PP-OCRv3_rec_infer.onnx',\n dict: 'rec/chinese_cht_PP-OCRv3_rec_infer/chinese_cht_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '英文',\n key: 'en_mobile',\n det: 'det/en_PP-OCRv3_det_infer.onnx',\n rec: 'rec/en_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/en_PP-OCRv4_rec_infer/en_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '阿拉伯文',\n key: 'ar_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/arabic_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/arabic_PP-OCRv4_rec_infer/arabic_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '塞尔维亚文',\n key: 'cyrillic_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/cyrillic_PP-OCRv3_rec_infer.onnx',\n dict: 'rec/cyrillic_PP-OCRv3_rec_infer/cyrillic_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '梵文',\n key: 'devanagari_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/devanagari_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/devanagari_PP-OCRv4_rec_infer/devanagari_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '日文',\n key: 'japan_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/japan_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/japan_PP-OCRv4_rec_infer/japan_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '卡纳达语',\n key: 'ka_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/ka_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/ka_PP-OCRv4_rec_infer/ka_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '韩文',\n key: 'korean_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/korean_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/korean_PP-OCRv4_rec_infer/korean_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '拉丁文',\n key: 'latin_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/latin_PP-OCRv3_rec_infer.onnx',\n dict: 'rec/latin_PP-OCRv3_rec_infer/latin_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '泰米尔文',\n key: 'ta_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/ta_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/ta_PP-OCRv4_rec_infer/ta_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n {\n label: '泰卢固文',\n key: 'te_mobile',\n det: 'det/Multilingual_PP-OCRv3_det_infer.onnx',\n rec: 'rec/te_PP-OCRv4_rec_infer.onnx',\n dict: 'rec/te_PP-OCRv4_rec_infer/te_dict.txt',\n cls: 'cls/ch_ppocr_mobile_v2.0_cls_infer.onnx',\n },\n];\n"],
|
|
5
|
+
"mappings": ";AAAA,OAAO;AAAA,EAEL;AAAA,OACK;AACP,SAAS,2BAA2B;;;ACJpC,OAAO,cAAc;AACrB,SAAS,oBAAoB;AAE7B,OAAO,WAAW;AACX,IAAM,WAAN,MAAM,kBAAiB,aAAa;AAAA,EACzC;AAAA,EAEA,aAAa,KAAKA,OAAiC;AAEjD,WAAO,IAAI,UAAS,MAAM,WAAWA,KAAW,CAAC;AAAA,EACnD;AAAA,EAEA,YAAY,cAA4B;AACtC,UAAM,YAAY;AAClB,SAAK,SAAS,QAAQ,YAAY;AAAA,EACpC;AAAA,EAEA,MAAM,MAAMA,OAAc;AACxB,UAAM,MAAM,SAAS,QAAQA,KAAI,EAAE,MAAM,CAAC;AAC1C,WAAO,KAAK,OAAO,SAAS,GAA6B,EAAE,OAAOA,KAAI;AAAA,EACxE;AAAA,EAEA,MAAM,OAAO,MAAkB;AAC7B,WAAO,KAAK;AAAA,MACV,KAAK,OAAO,OAAO;AAAA,QACjB,OAAO,KAAK;AAAA,QACZ,QAAQ,KAAK;AAAA,QACb,KAAK;AAAA,MACP,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,MAAM,QAAQ,YAAyB;AACrC,UAAM,MAAM;AAAA,oBACI,KAAK,KAAK,aAAa,KAAK,MAAM;AAAA,UAC5C,WACC,IAAI,CAAC,cAAc;AAClB,YAAM,CAAC,IAAI,IAAI,IAAI,EAAE,IAAI,UAAU;AACnC,aAAO,oBAAoB,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC;AAAA,IACjG,CAAC,EACA,KAAK,IAAI,CAAC;AAAA;AAAA;AAGjB,WAAO,KAAK;AAAA,MACV,KAAK,OAAO,UAAU,CAAC,EAAE,OAAO,OAAO,KAAK,GAAG,GAAG,MAAM,GAAG,KAAK,EAAE,CAAC,CAAC;AAAA,IACtE;AAAA,EACF;AAAA,EAEA,MAAM,OAAOC,QAAoB;AAC/B,SAAK,SAASA;AACd,UAAM,SAAS,MAAM,WAAWA,MAAK;AACrC,SAAK,OAAO,OAAO;AACnB,SAAK,QAAQ,OAAO;AACpB,SAAK,SAAS,OAAO;AACrB,WAAO;AAAA,EACT;AACF;AAEA,eAAe,WAAWA,QAAoB;AAC5C,QAAM,SAAS,MAAMA,OAAM,IAAI,EAAE,SAAS,EAAE,mBAAmB,KAAK,CAAC;AACrE,SAAO;AAAA,IACL,MAAM,OAAO;AAAA,IACb,OAAO,OAAO,KAAK;AAAA,IACnB,QAAQ,OAAO,KAAK;AAAA,EACtB;AACF;AACA,SAAS,QAAQ,cAA4B;AAC3C,SAAO,MAAM,aAAa,MAAM;AAAA,IAC9B,KAAK;AAAA,MACH,OAAO,aAAa;AAAA,MACpB,QAAQ,aAAa;AAAA,MACrB,UAAU;AAAA,IACZ;AAAA,EACF,CAAC;AACH;;;AC1EA,OAAO,QAAQ;AACf,SAAS,qBAAqB;AAC9B,SAAS,YAAY;AACd,IAAM,YAAN,cAAwB,cAAc;AAAA,EAC3C,aAAsB,KAAKC,WAAkB;AAC3C,WAAO,MAAM,GAAG,SAAS,KAAK,UAAUA,SAAQ,GAAG,MAAM;AAAA,EAC3D;AACF;;;AFAA,SAAS,wBAAwB;AACjC,OAAOC,SAAQ;AACf,SAAS,QAAAC,aAAY;;;AGTrB,YAAY,OAAO;AACnB,YAAYC,SAAQ;AACpB,SAAS,QAAAC,aAAY;AACrB,SAAS,0BAA0B;AACnC,OAAOC,YAAW;AAClB,OAAO,gBAAgB;AACvB,SAAS,cAAc;AAEvB,IAAM,0BAA0B;AAEhC,IAAM,cAAgB,QAAM;AAAA,EACxB;AAAA,IACE,SAAO;AAAA,IACP,QAAM,CAAC,UAAU,wBAAwB,KAAK,KAAK,CAAC;AAAA,IACpD,YAAU,CAAC,WAAW;AACtB,YAAM,SAAS,OAAO,MAAM,uBAAuB;AACnD,aAAO,IAAI;AAAA,QACT,OAAO,KAAK,OAAO,MAAM,OAAO,CAAC,EAAE,MAAM,GAAG,QAAQ;AAAA,MACtD;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EACE;AAAA,IACE,SAAO;AAAA,IACP;AAAA,MAAU,CAACC,cAER,aAASF,MAAK,UAAUE,SAAQ,CAAC,EACjC,KAAK,CAAC,WAAW,IAAI,WAAW,MAAM,CAAC;AAAA,IAC5C;AAAA,EACF;AAAA,EACE,OAAO,SAAmB,CAAC,UAAU,iBAAiB,UAAU,CAAC;AACrE,CAAC;AACD,eAAsB,eAAe,OAA4B;AAC/D,QAAM,SAAS,MAAQ,QAAM,aAAa,KAAK;AAC/C,SAAO;AACT;AAKA,eAAsB,aAAa,OAA4B;AAC7D,QAAM,SAAS,MAAM,eAAe,KAAK;AACzC,QAAM,OAAO,MAAM,mBAAmB,MAAM;AAC5C,MAAI,CAAC,MAAM;AACT,UAAM,IAAI,MAAM,UAAU;AAAA,EAC5B;AACA,MAAI,KAAK,SAAS,aAAa;AAC7B,UAAM,OAAO,OAAO,OAAO,KAAK,MAAM,CAAC;AACvC,UAAM,iBAAiB,KAAK;AAE5B,aAAS,IAAI,GAAG,IAAI,eAAe,QAAQ,KAAK,GAAG;AACjD,YAAM,QAAQ,eAAe,CAAC;AAC9B,YAAM,OAAO,eAAe,IAAI,CAAC;AACjC,YAAM,QAAQ,eAAe,IAAI,CAAC;AAClC,YAAM,MAAM,eAAe,IAAI,CAAC;AAChC,qBAAe,CAAC,IAAI;AACpB,qBAAe,IAAI,CAAC,IAAI;AACxB,qBAAe,IAAI,CAAC,IAAI;AACxB,qBAAe,IAAI,CAAC,IAAK,KAAa,gBAAgB,QAAQ;AAAA,IAChE;AACA,UAAM,SAASD,OAAM,gBAAgB;AAAA,MACnC,KAAK;AAAA,QACH,OAAO,KAAK;AAAA,QACZ,QAAQ,KAAK;AAAA,QACb,UAAU;AAAA,MACZ;AAAA,IACF,CAAC,EAAE,YAAY,CAAC;AAChB,WAAO,EAAE,MAAM,aAAa,KAAK,OAAO;AAAA,EAC1C,WAAW,MAAM,SAAS,gBAAgB,MAAM,SAAS,cAAc;AACrE,UAAM,OAAO,MAAM,WAAW;AAAA,MAC5B;AAAA,IACF,CAAC;AACD,UAAM,SAASA,OAAM,KAAK,MAAM;AAAA,MAC9B,KAAK;AAAA,QACH,OAAO,KAAK;AAAA,QACZ,QAAQ,KAAK;AAAA,QACb,UAAU;AAAA,MACZ;AAAA,IACF,CAAC;AACD,WAAO,EAAE,MAAM,aAAa,KAAK,OAAO;AAAA,EAC1C,OAAO;AACL,UAAM,SAASA,OAAM,MAAM;AAC3B,WAAO,EAAE,MAAM,KAAK,MAAM,KAAK,OAAO;AAAA,EACxC;AACF;;;ACnFA,OAAOE,YAAW;;;ACAlB,OAAOC,YAAW;;;ALWlB,YAAYC,QAAO;AACnB,IAAM,oBAAsB,UAAO;AAAA,EACjC,SAAW;AAAA,IACP;AAAA,MACE,SAAM;AAAA,QACJ;AAAA,UACE,UAAO;AAAA,UACP,aAAU,CAAC,UAAU;AAAA,YACrB,KAAK;AAAA,YACL,MAAM;AAAA,YACN,OAAO;AAAA,YACP,QAAQ;AAAA,UACV,EAAE;AAAA,QACJ;AAAA,QACE,UAAO;AAAA,UACP,MAAQ,UAAO;AAAA,UACf,OAAS,UAAO;AAAA,UAChB,KAAO,UAAO;AAAA,UACd,QAAU,UAAO;AAAA,QACnB,CAAC;AAAA,MACH,CAAC;AAAA,MACD,EAAE,KAAK,IAAI,OAAO,IAAI,MAAM,IAAI,QAAQ,GAAG;AAAA,IAC7C;AAAA,EACF;AAAA,EACA,YAAc,YAAW,SAAM,CAAG,QAAO,UAAO,CAAC,CAAC,CAAC,GAAG,IAAI;AAAA;AAE5D,CAAC;AAED,gBAAgB;AAAA,EACd;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AACjB,CAAC;AAED,eAAe,QAEb,OACA,UAA2B,CAAC,GAC5B;AACA,QAAM,iBAAmB,SAAM,mBAAmB,OAAO;AAGzD,MAAI,EAAE,IAAI,IAAI,MAAM,aAAa,KAAK;AACtC,QAAM,WAAW,MAAM,IAAI,SAAS;AACpC,QAAM,UAAU,KAAK,IAAI,SAAS,OAAQ,SAAS,MAAO;AAC1D,MAAI,UAAU,eAAe,YAAY;AACvC,QAAI,QAAQ,SAAS,QAAS,SAAS;AACvC,YAAQ,QAAQ,IAAI,IAAI,QAAQ;AAChC,UAAM,IAAI,OAAO;AAAA,MACf,OAAO,KAAK;AAAA,QACV,YAAY,SAAS,QACjB,eAAe,aACf,QAAQ,eAAe;AAAA,MAC7B;AAAA,MACA,QAAQ,KAAK;AAAA,QACX,YAAY,SAAS,SACjB,eAAe,aACf,QAAQ,eAAe;AAAA,MAC7B;AAAA,IACF,CAAC;AAAA,EACH;AACA,QAAM,IAAI,OAAO,EAAE,GAAG,eAAe,SAAS,YAAY,OAAO,CAAC;AAClE,QAAM,IAAI,YAAY,CAAC;AACvB,SAAO,KAAK,OAAO,GAAU;AAC/B;AACO,IAAM,MAAN,cAAkB,QAAQ;AAAA,EAC/B,aAAsB,OAAO,UAA8B,CAAC,GAAG;AAC7D,UAAM,MAAM,MAAM,QAAQ,OAAO,OAAO;AACxC,QAAI,QAAQ,gBAAgB;AAC1B,YAAMC,IAAG,MAAMC,MAAK,UAAU,QAAQ,cAAc,GAAG;AAAA,QACrD,WAAW;AAAA,MACb,CAAC;AAAA,IACH;AACA,IAAC,IAAY,UAAU,QAAQ,KAAK,GAAG;AACvC,WAAO;AAAA,EACT;AACF;;;AMzFO,IAAM,cAAc;AAAA,EACzB;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AAAA,EACA;AAAA,IACE,OAAO;AAAA,IACP,KAAK;AAAA,IACL,KAAK;AAAA,IACL,KAAK;AAAA,IACL,MAAM;AAAA,IACN,KAAK;AAAA,EACP;AACF;",
|
|
6
|
+
"names": ["path", "sharp", "filePath", "fs", "path", "fs", "path", "sharp", "filePath", "sharp", "sharp", "v", "fs", "path"]
|
|
7
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@shenghuabi/knowledge",
|
|
3
|
+
"version": "1.0.21",
|
|
4
|
+
"description": "知识库",
|
|
5
|
+
"author": "wszgrcy",
|
|
6
|
+
"sideEffects": false,
|
|
7
|
+
"peerDependencies": {
|
|
8
|
+
"@cyia/vfs2": "^1.1.26",
|
|
9
|
+
"handlebars": "^4.7.8",
|
|
10
|
+
"lru-cache": "^11.2.1",
|
|
11
|
+
"rfdc": "^1.4.1",
|
|
12
|
+
"rxjs": "^7.8.2",
|
|
13
|
+
"static-injector": "^6.1.2",
|
|
14
|
+
"yaml": "^2.7.0",
|
|
15
|
+
"valibot": "^1.1.0",
|
|
16
|
+
"uuid": ">=11.1.0",
|
|
17
|
+
"magic-string": "^0.30.18",
|
|
18
|
+
"lodash-es": "^4.17.21",
|
|
19
|
+
"html-to-text": "^9.0.5",
|
|
20
|
+
"fastq": "^1.19.1",
|
|
21
|
+
"sharp": "0.34.2",
|
|
22
|
+
"@cyia/dl": "^1.0.15",
|
|
23
|
+
"@cyia/external-call": "^1.0.35"
|
|
24
|
+
},
|
|
25
|
+
"dependencies": {
|
|
26
|
+
"html-entities": "^2.6.0",
|
|
27
|
+
"@qdrant/qdrant-js": "1.15.1",
|
|
28
|
+
"@cyia/mdict-reader": "^1.0.9",
|
|
29
|
+
"@langchain/community": "^1.1.1",
|
|
30
|
+
"@langchain/core": "1.1.8",
|
|
31
|
+
"@langchain/textsplitters": "^1.0.1",
|
|
32
|
+
"@xhmikosr/decompress-tarbz2": "^8.0.2",
|
|
33
|
+
"chardet": "^2.1.0",
|
|
34
|
+
"d3-dsv": "^2.0.0",
|
|
35
|
+
"decompress": "^4.2.1",
|
|
36
|
+
"domutils": "^3.2.2",
|
|
37
|
+
"epub2": "^3.0.2",
|
|
38
|
+
"file-type": "^21.0.0",
|
|
39
|
+
"graphology": "^0.26.0",
|
|
40
|
+
"graphology-types": "^0.24.8",
|
|
41
|
+
"iconv-lite": "^0.7.0",
|
|
42
|
+
"mammoth": "^1.9.0",
|
|
43
|
+
"officeparser": "^4.2.0",
|
|
44
|
+
"pdf-parse": "^1.1.1",
|
|
45
|
+
"srt-parser-2": "^1.2.3",
|
|
46
|
+
"heic-decode": "^2.1.0",
|
|
47
|
+
"@gutenye/ocr-common": "^1.4.8",
|
|
48
|
+
"bmp-js": "^0.1.0",
|
|
49
|
+
"onnxruntime-node": "1.20.1",
|
|
50
|
+
"@huggingface/transformers": "3.7.2",
|
|
51
|
+
"xlsx": "^0.18.5",
|
|
52
|
+
"pdfjs-dist": "^5.4.449"
|
|
53
|
+
},
|
|
54
|
+
"overrides": {
|
|
55
|
+
"@huggingface/transformers": {
|
|
56
|
+
"onnxruntime-node": {
|
|
57
|
+
".": "1.20.1"
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
"exports": {
|
|
62
|
+
"./embedding": {
|
|
63
|
+
"types": "./embedding/index.d.ts",
|
|
64
|
+
"import": "./embedding.mjs"
|
|
65
|
+
},
|
|
66
|
+
"./file-parser": {
|
|
67
|
+
"types": "./file-parser/index.d.ts",
|
|
68
|
+
"import": "./file-parser.mjs"
|
|
69
|
+
},
|
|
70
|
+
"./knowledge": {
|
|
71
|
+
"types": "./knowledge/index.d.ts",
|
|
72
|
+
"import": "./knowledge.mjs"
|
|
73
|
+
},
|
|
74
|
+
"./qdrant": {
|
|
75
|
+
"types": "./qdrant/index.d.ts",
|
|
76
|
+
"import": "./qdrant.mjs"
|
|
77
|
+
},
|
|
78
|
+
"./util": {
|
|
79
|
+
"types": "./util/index.d.ts",
|
|
80
|
+
"import": "./util.mjs"
|
|
81
|
+
},
|
|
82
|
+
"./image": {
|
|
83
|
+
"types": "./image/index.d.ts",
|
|
84
|
+
"import": "./image.mjs"
|
|
85
|
+
},
|
|
86
|
+
"./ocr": {
|
|
87
|
+
"types": "./ocr/index.d.ts",
|
|
88
|
+
"import": "./ocr.mjs"
|
|
89
|
+
},
|
|
90
|
+
"./worker/ocr": {
|
|
91
|
+
"types": "./worker/ocr/index.d.ts",
|
|
92
|
+
"import": "./worker/ocr.mjs"
|
|
93
|
+
},
|
|
94
|
+
"./worker/text2vec": {
|
|
95
|
+
"types": "./worker/text2vec/index.d.ts",
|
|
96
|
+
"import": "./worker/text2vec.mjs"
|
|
97
|
+
}
|
|
98
|
+
},
|
|
99
|
+
"publishConfig": {
|
|
100
|
+
"access": "public"
|
|
101
|
+
},
|
|
102
|
+
"repository": {
|
|
103
|
+
"url": "https://github.com/wszgrcy/shb-knowledge"
|
|
104
|
+
}
|
|
105
|
+
}
|