@shenghuabi/knowledge 1.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/embedding/embedding.service.d.ts +4 -0
- package/embedding/index.d.ts +2 -0
- package/embedding/type.d.ts +12 -0
- package/embedding.mjs +61 -0
- package/embedding.mjs.map +7 -0
- package/file-parser/const.d.ts +20 -0
- package/file-parser/dict/dict-format/dsl/dsl-parse.service.d.ts +6 -0
- package/file-parser/dict/dict-format/dsl/dsl.format.d.ts +1 -0
- package/file-parser/dict/dict-format/mdict-parse.service.d.ts +20 -0
- package/file-parser/dict/dict-format/stardict-parse.service.d.ts +12 -0
- package/file-parser/dict/dict-format/yaml-parse.service.d.ts +25 -0
- package/file-parser/dict/dict.service.d.ts +10 -0
- package/file-parser/dict/index.d.ts +2 -0
- package/file-parser/dict/type.d.ts +24 -0
- package/file-parser/document-file-parser.service.d.ts +15 -0
- package/file-parser/document-loader/pdf-img.loader.d.ts +8 -0
- package/file-parser/document-loader/xlsx.loader.d.ts +6 -0
- package/file-parser/file-parser.service.d.ts +13 -0
- package/file-parser/index.d.ts +6 -0
- package/file-parser/text-analyse.d.ts +1 -0
- package/file-parser/text-parser.d.ts +3 -0
- package/file-parser/vl-parser/markdown.parser.d.ts +8 -0
- package/file-parser.mjs +850 -0
- package/file-parser.mjs.map +7 -0
- package/image/convert.d.ts +25 -0
- package/image/extract.d.ts +2 -0
- package/image/image-metadata.d.ts +2 -0
- package/image/index.d.ts +3 -0
- package/image.mjs +134 -0
- package/image.mjs.map +7 -0
- package/knowledge/article/article.knowledge.service.d.ts +53 -0
- package/knowledge/article/define/config.d.ts +60 -0
- package/knowledge/article/define/index.d.ts +2 -0
- package/knowledge/article/define/payload.d.ts +16 -0
- package/knowledge/article/index.d.ts +2 -0
- package/knowledge/common/common.knowledge.service.d.ts +240 -0
- package/knowledge/common/define/base.d.ts +7 -0
- package/knowledge/common/define/chunk.d.ts +14 -0
- package/knowledge/common/define/embedding.d.ts +5 -0
- package/knowledge/common/define/index.d.ts +3 -0
- package/knowledge/common/index.d.ts +1 -0
- package/knowledge/common/query.d.ts +7 -0
- package/knowledge/const.d.ts +95 -0
- package/knowledge/define/index.d.ts +245 -0
- package/knowledge/dict/define/config.d.ts +68 -0
- package/knowledge/dict/define/index.d.ts +1 -0
- package/knowledge/dict/dict.knowledge.service.d.ts +67 -0
- package/knowledge/graph/const.d.ts +20 -0
- package/knowledge/graph/define/config.d.ts +169 -0
- package/knowledge/graph/define/define.d.ts +402 -0
- package/knowledge/graph/define/index.d.ts +2 -0
- package/knowledge/graph/define/query.d.ts +14 -0
- package/knowledge/graph/graph.handle.service.d.ts +28 -0
- package/knowledge/graph/graph.knowledge.service.d.ts +40 -0
- package/knowledge/graph/graph.local.service.d.ts +85 -0
- package/knowledge/graph/graph.query.service.d.ts +160 -0
- package/knowledge/graph/graph.service.d.ts +24 -0
- package/knowledge/graph/graph.util.service.d.ts +31 -0
- package/knowledge/graph/type.d.ts +11 -0
- package/knowledge/graph/util/format-attr.d.ts +48 -0
- package/knowledge/graph/util/graph-util.d.ts +5 -0
- package/knowledge/graph/util.d.ts +1 -0
- package/knowledge/graph/vecotr-format.d.ts +11 -0
- package/knowledge/index.d.ts +17 -0
- package/knowledge/knowledge.manager.service.d.ts +42 -0
- package/knowledge/knowledge.util.service.d.ts +21 -0
- package/knowledge/normal/define/config.d.ts +60 -0
- package/knowledge/normal/define/index.d.ts +1 -0
- package/knowledge/normal/normal.knowledge.service.d.ts +49 -0
- package/knowledge/template.format.d.ts +6 -0
- package/knowledge/type.d.ts +28 -0
- package/knowledge.mjs +2856 -0
- package/knowledge.mjs.map +7 -0
- package/ocr/FileUtils.d.ts +4 -0
- package/ocr/ImageRaw.d.ts +11 -0
- package/ocr/index.d.ts +2 -0
- package/ocr/model-config.d.ts +8 -0
- package/ocr/ocr.d.ts +29 -0
- package/ocr.mjs +351 -0
- package/ocr.mjs.map +7 -0
- package/package.json +105 -0
- package/qdrant/index.d.ts +3 -0
- package/qdrant/qdrant-client.service.d.ts +396 -0
- package/qdrant/qdrant-server.service.d.ts +21 -0
- package/qdrant/type.d.ts +18 -0
- package/qdrant/util.d.ts +1 -0
- package/qdrant.mjs +274 -0
- package/qdrant.mjs.map +7 -0
- package/util/batch-queue.d.ts +6 -0
- package/util/cache-queue.d.ts +10 -0
- package/util/clone.d.ts +1 -0
- package/util/embedding-queue.d.ts +3 -0
- package/util/get-hash.d.ts +2 -0
- package/util/html-to-text/index.d.ts +5 -0
- package/util/index.d.ts +10 -0
- package/util/is-truthy.d.ts +1 -0
- package/util/log.service.d.ts +6 -0
- package/util/promise.d.ts +5 -0
- package/util/type.d.ts +1 -0
- package/util/uniq-object-key.d.ts +1 -0
- package/util.mjs +219 -0
- package/util.mjs.map +7 -0
- package/worker/custom-cache.d.ts +28 -0
- package/worker/ocr/index.d.ts +17 -0
- package/worker/ocr.mjs +75 -0
- package/worker/ocr.mjs.map +7 -0
- package/worker/reranker.mjs +180 -0
- package/worker/reranker.mjs.map +7 -0
- package/worker/set-transformers-config.d.ts +19 -0
- package/worker/text2vec/index.d.ts +9 -0
- package/worker/text2vec.mjs +194 -0
- package/worker/text2vec.mjs.map +7 -0
package/file-parser.mjs
ADDED
|
@@ -0,0 +1,850 @@
|
|
|
1
|
+
// packages/file-parser/file-parser.service.ts
|
|
2
|
+
import { fileTypeFromBuffer } from "file-type";
|
|
3
|
+
import { inject as inject2, RootStaticInjectOptions as RootStaticInjectOptions2 } from "static-injector";
|
|
4
|
+
import { path as path3 } from "@cyia/vfs2";
|
|
5
|
+
|
|
6
|
+
// packages/file-parser/text-analyse.ts
|
|
7
|
+
import { analyse } from "chardet";
|
|
8
|
+
function resultWeight(item) {
|
|
9
|
+
let offset = 0;
|
|
10
|
+
if (item.lang === "zh") {
|
|
11
|
+
offset++;
|
|
12
|
+
if (item.name === "GB18030") {
|
|
13
|
+
offset += 2;
|
|
14
|
+
} else if (item.name === "Big5") {
|
|
15
|
+
offset++;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return item.confidence + offset;
|
|
19
|
+
}
|
|
20
|
+
function textAnalyse(buffer) {
|
|
21
|
+
return analyse(buffer).sort((a, b) => resultWeight(b) - resultWeight(a))[0].name;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// packages/file-parser/text-parser.ts
|
|
25
|
+
function bufferDecodeToText(buffer, metadata) {
|
|
26
|
+
const type = textAnalyse(buffer);
|
|
27
|
+
const decoder2 = new TextDecoder(type, { fatal: true });
|
|
28
|
+
try {
|
|
29
|
+
return decoder2.decode(buffer);
|
|
30
|
+
} catch (error) {
|
|
31
|
+
throw new Error(`尝试使用 ${type} 编码解析失败;${metadata?.path ?? ""}`);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// packages/file-parser/const.ts
|
|
36
|
+
import { InjectionToken } from "static-injector";
|
|
37
|
+
var FileParserToken = new InjectionToken("FileParser");
|
|
38
|
+
var ImageParserToken = new InjectionToken("ImageParserToken");
|
|
39
|
+
var DocumentParserConfigToken = new InjectionToken("DocumentParserConfigToken");
|
|
40
|
+
|
|
41
|
+
// packages/file-parser/document-file-parser.service.ts
|
|
42
|
+
import { inject, Injector, RootStaticInjectOptions } from "static-injector";
|
|
43
|
+
import { EPubLoader } from "@langchain/community/document_loaders/fs/epub";
|
|
44
|
+
import { DocxLoader } from "@langchain/community/document_loaders/fs/docx";
|
|
45
|
+
import { CSVLoader } from "@langchain/community/document_loaders/fs/csv";
|
|
46
|
+
import { PDFLoader } from "@langchain/community/document_loaders/fs/pdf";
|
|
47
|
+
import { PPTXLoader } from "@langchain/community/document_loaders/fs/pptx";
|
|
48
|
+
import { SRTLoader } from "@langchain/community/document_loaders/fs/srt";
|
|
49
|
+
import { Blob } from "node:buffer";
|
|
50
|
+
import { path as path2 } from "@cyia/vfs2";
|
|
51
|
+
|
|
52
|
+
// packages/file-parser/document-loader/xlsx.loader.ts
|
|
53
|
+
async function xlsxLoader(buffer) {
|
|
54
|
+
const { read, utils } = await import("xlsx");
|
|
55
|
+
const workbook = read(buffer);
|
|
56
|
+
return workbook.SheetNames.map((name) => {
|
|
57
|
+
const worksheet = workbook.Sheets[name];
|
|
58
|
+
const csv = utils.sheet_to_csv(worksheet);
|
|
59
|
+
return { pageContent: csv, metadata: { title: name } };
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// packages/file-parser/document-loader/pdf-img.loader.ts
|
|
64
|
+
import { path } from "@cyia/vfs2";
|
|
65
|
+
async function pdfImageLoader(buffer, filePath, injector) {
|
|
66
|
+
let canvas = await import("@napi-rs/canvas");
|
|
67
|
+
if (!globalThis.DOMMatrix) {
|
|
68
|
+
globalThis.DOMMatrix = canvas.DOMMatrix;
|
|
69
|
+
}
|
|
70
|
+
if (!globalThis.ImageData) {
|
|
71
|
+
globalThis.ImageData = canvas.ImageData;
|
|
72
|
+
}
|
|
73
|
+
if (!globalThis.Path2D) {
|
|
74
|
+
globalThis.Path2D = canvas.Path2D;
|
|
75
|
+
}
|
|
76
|
+
const { getDocument } = await import("pdfjs-dist");
|
|
77
|
+
let pdf = await getDocument(new Uint8Array(buffer)).promise;
|
|
78
|
+
let list = [];
|
|
79
|
+
let metadata = await pdf.getMetadata();
|
|
80
|
+
const fileName = path.basename(filePath, path.extname(filePath));
|
|
81
|
+
let title = metadata.info["Title"] || fileName;
|
|
82
|
+
let imageParser = injector.get(ImageParserToken);
|
|
83
|
+
let documentParserConfig = injector.get(DocumentParserConfigToken);
|
|
84
|
+
for (let i = 1; i <= pdf.numPages; i++) {
|
|
85
|
+
const page = await pdf.getPage(i);
|
|
86
|
+
const viewport = page.getViewport({
|
|
87
|
+
scale: documentParserConfig().pdfAsImage?.viewPortOptions?.scale ?? 1
|
|
88
|
+
});
|
|
89
|
+
const canvasEl = canvas.createCanvas(viewport.width, viewport.height);
|
|
90
|
+
const ctx = canvasEl.getContext("2d");
|
|
91
|
+
await page.render({
|
|
92
|
+
canvasContext: ctx,
|
|
93
|
+
viewport,
|
|
94
|
+
canvas: canvasEl
|
|
95
|
+
}).promise;
|
|
96
|
+
const image = canvasEl.toBuffer("image/png");
|
|
97
|
+
let result = await imageParser(filePath, `${fileName}-${i}`, image);
|
|
98
|
+
list.push({
|
|
99
|
+
pageContent: result.content,
|
|
100
|
+
metadata: { title },
|
|
101
|
+
parseTo: result.parseTo
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
return list;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// packages/file-parser/document-file-parser.service.ts
|
|
108
|
+
var DocumentFileParserService = class extends RootStaticInjectOptions {
|
|
109
|
+
#documentParser = inject(DocumentParserConfigToken, { optional: true });
|
|
110
|
+
#injector = inject(Injector);
|
|
111
|
+
async parse(filePath, buffer, type) {
|
|
112
|
+
try {
|
|
113
|
+
const baseName = path2.basename(filePath, path2.extname(filePath));
|
|
114
|
+
const blob = new Blob([buffer]);
|
|
115
|
+
if (filePath.endsWith(".srt")) {
|
|
116
|
+
const instance = new SRTLoader(blob);
|
|
117
|
+
const result = await instance.load();
|
|
118
|
+
return this.#formatResult(result, baseName);
|
|
119
|
+
} else if (type?.ext === "pptx" || type?.ext === "odt" || type?.ext === "odp" || type?.ext === "ods") {
|
|
120
|
+
const instance = new PPTXLoader(blob);
|
|
121
|
+
const result = await instance.load();
|
|
122
|
+
return this.#formatResult(result, baseName);
|
|
123
|
+
} else if (type?.ext === "pdf") {
|
|
124
|
+
let result;
|
|
125
|
+
const useImage = this.#documentParser?.().pdfAsImage?.enable;
|
|
126
|
+
if (useImage) {
|
|
127
|
+
result = await pdfImageLoader(buffer, filePath, this.#injector);
|
|
128
|
+
} else {
|
|
129
|
+
const instance = new PDFLoader(blob);
|
|
130
|
+
result = await instance.load();
|
|
131
|
+
}
|
|
132
|
+
return this.#formatResult(result, baseName);
|
|
133
|
+
} else if (filePath.endsWith(".csv")) {
|
|
134
|
+
const instance = new CSVLoader(blob);
|
|
135
|
+
const result = await instance.load();
|
|
136
|
+
return this.#formatResult(result, baseName);
|
|
137
|
+
} else if (type?.ext === "docx") {
|
|
138
|
+
const instance = new DocxLoader(blob);
|
|
139
|
+
const result = await instance.load();
|
|
140
|
+
return this.#formatResult(result, baseName);
|
|
141
|
+
} else if (type?.ext === "xlsx") {
|
|
142
|
+
const result = await xlsxLoader(buffer);
|
|
143
|
+
return this.#formatResult(result, baseName);
|
|
144
|
+
} else if (type?.ext === "epub") {
|
|
145
|
+
const instance = new EPubLoader(filePath);
|
|
146
|
+
const result = await instance.load();
|
|
147
|
+
return result.filter((item) => !!item.pageContent).map((item, i) => ({
|
|
148
|
+
title: item.metadata["chapter"] || `[未命名]${i + 1}`,
|
|
149
|
+
content: item.pageContent
|
|
150
|
+
}));
|
|
151
|
+
} else {
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
} catch (error) {
|
|
155
|
+
throw new Error(
|
|
156
|
+
`文件:[${filePath}]解析失败;类型[${JSON.stringify(type) ?? ""}]`,
|
|
157
|
+
{
|
|
158
|
+
cause: error
|
|
159
|
+
}
|
|
160
|
+
);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
#formatResult(list, title) {
|
|
164
|
+
if (list.length === 1) {
|
|
165
|
+
return [
|
|
166
|
+
{
|
|
167
|
+
title: list[0].metadata?.["title"] ?? title,
|
|
168
|
+
content: list[0].pageContent.trim(),
|
|
169
|
+
parseTo: list[0].parseTo
|
|
170
|
+
}
|
|
171
|
+
];
|
|
172
|
+
}
|
|
173
|
+
return list.map((item) => ({ ...item, pageContent: item.pageContent?.trim() })).filter((item) => !!item.pageContent).map((item, index) => ({
|
|
174
|
+
title: item.metadata?.["title"] ?? `${title}-${index}`,
|
|
175
|
+
content: item.pageContent,
|
|
176
|
+
parseTo: item.parseTo
|
|
177
|
+
}));
|
|
178
|
+
}
|
|
179
|
+
};
|
|
180
|
+
|
|
181
|
+
// packages/file-parser/file-parser.service.ts
|
|
182
|
+
var FileParserService = class extends RootStaticInjectOptions2 {
|
|
183
|
+
priority = 0;
|
|
184
|
+
#parserList = inject2(FileParserToken, { optional: true })?.slice().sort((a, b) => a.priority - b.priority) ?? [
|
|
185
|
+
inject2(DocumentFileParserService)
|
|
186
|
+
];
|
|
187
|
+
async parse(fileName, buffer) {
|
|
188
|
+
const baseName = path3.basename(fileName, path3.extname(fileName));
|
|
189
|
+
let type;
|
|
190
|
+
try {
|
|
191
|
+
type = await fileTypeFromBuffer(buffer);
|
|
192
|
+
} catch (error) {
|
|
193
|
+
throw new Error(`文件:[${fileName}]类型解析失败`, {
|
|
194
|
+
cause: error
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
for (const item of this.#parserList) {
|
|
198
|
+
const result = await item.parse(fileName, buffer, type);
|
|
199
|
+
if (result) {
|
|
200
|
+
return result;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
return [
|
|
204
|
+
{ title: baseName, content: bufferDecodeToText(new Uint8Array(buffer)) }
|
|
205
|
+
];
|
|
206
|
+
}
|
|
207
|
+
/** 用于支持工作流读文件 */
|
|
208
|
+
parseOne(fileName, buffer) {
|
|
209
|
+
return this.parse(fileName, buffer).then((list) => {
|
|
210
|
+
return {
|
|
211
|
+
content: list?.map((item) => item.content).join("\n"),
|
|
212
|
+
parseTo: list[0].parseTo
|
|
213
|
+
};
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
};
|
|
217
|
+
|
|
218
|
+
// packages/file-parser/dict/dict-format/stardict-parse.service.ts
|
|
219
|
+
import * as fs from "fs/promises";
|
|
220
|
+
import * as zlib from "zlib";
|
|
221
|
+
import * as util from "util";
|
|
222
|
+
import { RootStaticInjectOptions as RootStaticInjectOptions3 } from "static-injector";
|
|
223
|
+
import decompress from "decompress";
|
|
224
|
+
import decompressTarbz from "@xhmikosr/decompress-tarbz2";
|
|
225
|
+
import { tmpdir } from "os";
|
|
226
|
+
import { v4 } from "uuid";
|
|
227
|
+
import { path as path4 } from "@cyia/vfs2";
|
|
228
|
+
import { decode } from "html-entities";
|
|
229
|
+
var decoder = new util.TextDecoder("utf-8");
|
|
230
|
+
var StardictParseService = class extends RootStaticInjectOptions3 {
|
|
231
|
+
async parse(filePathList) {
|
|
232
|
+
const filePath = filePathList;
|
|
233
|
+
let infoFileContent;
|
|
234
|
+
let indexFileBuffer;
|
|
235
|
+
let dictFileBuffer;
|
|
236
|
+
if (filePath.endsWith("tar.bz2")) {
|
|
237
|
+
const dir = path4.join(tmpdir(), v4());
|
|
238
|
+
const list2 = await decompress(filePath, dir, {
|
|
239
|
+
plugins: [decompressTarbz()]
|
|
240
|
+
});
|
|
241
|
+
for (const item of list2) {
|
|
242
|
+
if (item.path.endsWith("ifo")) {
|
|
243
|
+
infoFileContent = item.data.toString();
|
|
244
|
+
} else if (item.path.endsWith("idx")) {
|
|
245
|
+
indexFileBuffer = item.data;
|
|
246
|
+
} else if (item.path.endsWith("dict.dz")) {
|
|
247
|
+
dictFileBuffer = item.data;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
if (!infoFileContent || !indexFileBuffer || !dictFileBuffer) {
|
|
251
|
+
throw new Error(`解压后未找到指定内容,文件夹:${dir}`);
|
|
252
|
+
}
|
|
253
|
+
} else {
|
|
254
|
+
const fileName = path4.basename(filePath).replace(/\.(ifo|tar\.bz)$/, "");
|
|
255
|
+
infoFileContent = (await fs.readFile(
|
|
256
|
+
path4.resolve(path4.dirname(filePath), `${fileName}.ifo`)
|
|
257
|
+
)).toString();
|
|
258
|
+
indexFileBuffer = await fs.readFile(
|
|
259
|
+
path4.resolve(path4.dirname(filePath), `${fileName}.idx`)
|
|
260
|
+
);
|
|
261
|
+
dictFileBuffer = await fs.readFile(
|
|
262
|
+
path4.resolve(path4.dirname(filePath), `${fileName}.dict.dz`)
|
|
263
|
+
);
|
|
264
|
+
}
|
|
265
|
+
const [info, list] = await Promise.all([
|
|
266
|
+
this.#getInfo(infoFileContent).then((obj) => {
|
|
267
|
+
obj["name"] ??= obj["bookname"];
|
|
268
|
+
return obj;
|
|
269
|
+
}),
|
|
270
|
+
this.#getIndex(indexFileBuffer)
|
|
271
|
+
]);
|
|
272
|
+
return {
|
|
273
|
+
info,
|
|
274
|
+
dataListGenerator: () => this.#getDict(dictFileBuffer, list)
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
async #getInfo(content) {
|
|
278
|
+
const list = content.split(/\r\n|\n\r|\n|\r/).filter(Boolean).map((item) => item.split("=").filter(Boolean)).filter((list2) => list2.length === 2);
|
|
279
|
+
return list.reduce(
|
|
280
|
+
(obj, item) => {
|
|
281
|
+
obj[item[0]] = item[1];
|
|
282
|
+
return obj;
|
|
283
|
+
},
|
|
284
|
+
{}
|
|
285
|
+
);
|
|
286
|
+
}
|
|
287
|
+
async #getIndex(buffer) {
|
|
288
|
+
const indexData = [];
|
|
289
|
+
let index = 0;
|
|
290
|
+
while (index < buffer.length) {
|
|
291
|
+
const beg = index;
|
|
292
|
+
index = buffer.indexOf("\0", beg);
|
|
293
|
+
let word = buffer.toString("utf-8", beg, index);
|
|
294
|
+
if (word.includes("&#")) {
|
|
295
|
+
word = decode(word);
|
|
296
|
+
}
|
|
297
|
+
index++;
|
|
298
|
+
const offset = buffer.readUInt32BE(index);
|
|
299
|
+
index += 4;
|
|
300
|
+
const size = buffer.readUInt32BE(index);
|
|
301
|
+
index += 4;
|
|
302
|
+
indexData.push([word, offset, size]);
|
|
303
|
+
}
|
|
304
|
+
return indexData;
|
|
305
|
+
}
|
|
306
|
+
async *#getDict(buffer, indexData) {
|
|
307
|
+
const rawdata = new Uint8Array(buffer);
|
|
308
|
+
const buffer_1 = zlib.gunzipSync(rawdata);
|
|
309
|
+
const rawdata_1 = new Uint8Array(buffer_1);
|
|
310
|
+
for (const [word, offset, size] of indexData) {
|
|
311
|
+
const chunk = rawdata_1.slice(offset, offset + size);
|
|
312
|
+
const decoded = (decoder.decode(chunk) ?? "").trim();
|
|
313
|
+
if (!decoded) {
|
|
314
|
+
continue;
|
|
315
|
+
}
|
|
316
|
+
yield {
|
|
317
|
+
word,
|
|
318
|
+
content: decoded
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
};
|
|
323
|
+
|
|
324
|
+
// packages/file-parser/dict/dict-format/mdict-parse.service.ts
|
|
325
|
+
import { Mdict } from "@cyia/mdict-reader";
|
|
326
|
+
import { existsSync } from "fs";
|
|
327
|
+
import { RootStaticInjectOptions as RootStaticInjectOptions4 } from "static-injector";
|
|
328
|
+
import { path as path5, createNormalizeVfs } from "@cyia/vfs2";
|
|
329
|
+
var MdictParseService = class extends RootStaticInjectOptions4 {
|
|
330
|
+
async parse(filePathList, options) {
|
|
331
|
+
let passCode;
|
|
332
|
+
if (options?.regCode && options.userId) {
|
|
333
|
+
passCode = options;
|
|
334
|
+
}
|
|
335
|
+
const filePath = filePathList;
|
|
336
|
+
const fileName = path5.basename(filePath, path5.extname(filePath));
|
|
337
|
+
const mdxFilePath = path5.resolve(path5.dirname(filePath), `${fileName}.mdx`);
|
|
338
|
+
const mdxInstance = await Mdict.build(mdxFilePath, passCode);
|
|
339
|
+
const mddFilePath = path5.resolve(path5.dirname(filePath), `${fileName}.mdd`);
|
|
340
|
+
const dataInfo = mdxInstance.getDictInfo().mdx;
|
|
341
|
+
return {
|
|
342
|
+
// todo 其实还可以保存更多信息
|
|
343
|
+
info: {
|
|
344
|
+
name: dataInfo.Title === "Title (No HTML code allowed)" ? fileName : dataInfo.Title,
|
|
345
|
+
fileName
|
|
346
|
+
},
|
|
347
|
+
dataListGenerator: () => this.wordListGenerator(mdxInstance),
|
|
348
|
+
afterSave: async (assetFolder) => {
|
|
349
|
+
let pendList = [];
|
|
350
|
+
let sum = 0;
|
|
351
|
+
const waitingWrite = async () => {
|
|
352
|
+
const result = (await Promise.all(pendList)).filter(Boolean);
|
|
353
|
+
if (result.length) {
|
|
354
|
+
throw new Error(
|
|
355
|
+
result.map((item) => `文件[${item.name}]写入失败,${item?.message}`).join("\n")
|
|
356
|
+
);
|
|
357
|
+
} else {
|
|
358
|
+
sum += pendList.length;
|
|
359
|
+
}
|
|
360
|
+
pendList = [];
|
|
361
|
+
};
|
|
362
|
+
if (existsSync(mddFilePath)) {
|
|
363
|
+
const vfs = createNormalizeVfs({ dir: assetFolder });
|
|
364
|
+
const mddInstance = await Mdict.build(mddFilePath, passCode);
|
|
365
|
+
const wordGenerator = mddInstance.load();
|
|
366
|
+
for await (const item of wordGenerator) {
|
|
367
|
+
pendList.push(
|
|
368
|
+
mddInstance.getMddAsset(item).then(
|
|
369
|
+
(buffer) => vfs.writeFile(path5.join(assetFolder, item.word), buffer).then(() => void 0).catch((reason) => ({
|
|
370
|
+
name: item.word,
|
|
371
|
+
message: reason
|
|
372
|
+
}))
|
|
373
|
+
)
|
|
374
|
+
);
|
|
375
|
+
if (pendList.length >= 20) {
|
|
376
|
+
await waitingWrite();
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
if (pendList.length) {
|
|
380
|
+
await waitingWrite();
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
async *wordListGenerator(mdxInstance) {
|
|
387
|
+
const list = mdxInstance.load();
|
|
388
|
+
for await (const item of list) {
|
|
389
|
+
const definition = (await mdxInstance.getDefinition(item) || "").trim();
|
|
390
|
+
if (!definition) {
|
|
391
|
+
continue;
|
|
392
|
+
}
|
|
393
|
+
yield {
|
|
394
|
+
content: definition,
|
|
395
|
+
word: item.word
|
|
396
|
+
};
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
};
|
|
400
|
+
|
|
401
|
+
// packages/file-parser/dict/dict.service.ts
|
|
402
|
+
import { inject as inject3, Injector as Injector2, RootStaticInjectOptions as RootStaticInjectOptions7 } from "static-injector";
|
|
403
|
+
import fs4 from "fs";
|
|
404
|
+
import { path as path7 } from "@cyia/vfs2";
|
|
405
|
+
|
|
406
|
+
// packages/file-parser/dict/dict-format/yaml-parse.service.ts
|
|
407
|
+
import { path as path6 } from "@cyia/vfs2";
|
|
408
|
+
import * as fs2 from "fs/promises";
|
|
409
|
+
import { parse } from "yaml";
|
|
410
|
+
import { RootStaticInjectOptions as RootStaticInjectOptions5 } from "static-injector";
|
|
411
|
+
import * as v from "valibot";
|
|
412
|
+
var YamlDefine = v.object({
|
|
413
|
+
list: v.array(
|
|
414
|
+
v.object({
|
|
415
|
+
word: v.string(),
|
|
416
|
+
content: v.string(),
|
|
417
|
+
extra: v.optional(v.record(v.string(), v.any()))
|
|
418
|
+
})
|
|
419
|
+
)
|
|
420
|
+
});
|
|
421
|
+
var YamlDictParseService = class extends RootStaticInjectOptions5 {
|
|
422
|
+
async parse(filePathList) {
|
|
423
|
+
const filePath = filePathList;
|
|
424
|
+
const ext = path6.extname(filePath);
|
|
425
|
+
const content = await fs2.readFile(filePath, { encoding: "utf-8" });
|
|
426
|
+
const data = v.parse(YamlDefine, parse(content));
|
|
427
|
+
return {
|
|
428
|
+
info: {
|
|
429
|
+
fileName: path6.basename(filePath),
|
|
430
|
+
name: path6.basename(filePath, ext)
|
|
431
|
+
},
|
|
432
|
+
dataListGenerator: async function* () {
|
|
433
|
+
for (const item of data.list) {
|
|
434
|
+
yield item;
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
};
|
|
438
|
+
}
|
|
439
|
+
};
|
|
440
|
+
|
|
441
|
+
// packages/file-parser/dict/dict.service.ts
|
|
442
|
+
import { LRUCache } from "lru-cache";
|
|
443
|
+
|
|
444
|
+
// packages/file-parser/dict/dict-format/dsl/dsl-parse.service.ts
|
|
445
|
+
import fs3 from "fs";
|
|
446
|
+
import chardet from "chardet";
|
|
447
|
+
|
|
448
|
+
// packages/file-parser/dict/dict-format/dsl/dsl.format.ts
|
|
449
|
+
var inlineStatusList = ["b", "i", "u", "c"];
|
|
450
|
+
var sImageList = ["bmp", "pcx", "dcx", "jpg", "tif"];
|
|
451
|
+
var sSoundList = ["wav"];
|
|
452
|
+
var sVideoList = ["avi", "webm", "mp4"];
|
|
453
|
+
var NodeItem = class {
|
|
454
|
+
start;
|
|
455
|
+
// attr: Record<string, any> = {};
|
|
456
|
+
type;
|
|
457
|
+
};
|
|
458
|
+
var InlineNode = class extends NodeItem {
|
|
459
|
+
type = "inline";
|
|
460
|
+
attr;
|
|
461
|
+
tag;
|
|
462
|
+
constructor(tag, attr) {
|
|
463
|
+
super();
|
|
464
|
+
this.attr = attr;
|
|
465
|
+
this.tag = tag;
|
|
466
|
+
}
|
|
467
|
+
};
|
|
468
|
+
var BlockNode = class extends NodeItem {
|
|
469
|
+
type = "block";
|
|
470
|
+
fn;
|
|
471
|
+
tag;
|
|
472
|
+
constructor(tag, fn) {
|
|
473
|
+
super();
|
|
474
|
+
this.tag = tag;
|
|
475
|
+
this.fn = fn;
|
|
476
|
+
}
|
|
477
|
+
};
|
|
478
|
+
var TextNode = class extends NodeItem {
|
|
479
|
+
type = "text";
|
|
480
|
+
text;
|
|
481
|
+
style = {};
|
|
482
|
+
constructor(text) {
|
|
483
|
+
super();
|
|
484
|
+
this.text = text;
|
|
485
|
+
}
|
|
486
|
+
renderedText;
|
|
487
|
+
renderText() {
|
|
488
|
+
const styleAttr = Object.entries(this.style).map((item) => `${item[0]}: ${item[1]}`).join(";");
|
|
489
|
+
const formatedText = this.text.replace(/\\\[/g, "[").replace(/\\]/g, "]");
|
|
490
|
+
if (styleAttr.trim().length) {
|
|
491
|
+
this.renderedText = `<span style="${styleAttr}">${formatedText}</span>`;
|
|
492
|
+
} else {
|
|
493
|
+
this.renderedText = `${formatedText}`;
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
};
|
|
497
|
+
var DslFormat = class {
|
|
498
|
+
#inlineStatus = [];
|
|
499
|
+
#blockStatus = [];
|
|
500
|
+
#textNodeList = [];
|
|
501
|
+
#input;
|
|
502
|
+
#start = 0;
|
|
503
|
+
constructor(input) {
|
|
504
|
+
this.#input = input;
|
|
505
|
+
}
|
|
506
|
+
run() {
|
|
507
|
+
if (!this.#input) {
|
|
508
|
+
return this.#input;
|
|
509
|
+
}
|
|
510
|
+
this.#scan();
|
|
511
|
+
return this.#textNodeList.map((item) => item.renderedText).join("");
|
|
512
|
+
}
|
|
513
|
+
#scan() {
|
|
514
|
+
let startResult;
|
|
515
|
+
let endResult;
|
|
516
|
+
const startRegexp = /(?<!\\)\[(?!\/)(?<tagName>m(?<leftPadding>[0-9])|[^\]\s]+)((\s+)(?<attr>[^\]]+))?]/dg;
|
|
517
|
+
const endRegexp = /(?<!\\)\[\/(?<tagName>[^\]]+)]/dg;
|
|
518
|
+
while (true) {
|
|
519
|
+
startRegexp.lastIndex = this.#start;
|
|
520
|
+
endRegexp.lastIndex = this.#start;
|
|
521
|
+
const currentText = this.#input;
|
|
522
|
+
startResult = startRegexp.exec(currentText);
|
|
523
|
+
endResult = endRegexp.exec(currentText);
|
|
524
|
+
if (startResult && (!endResult || startResult.indices[0][0] < endResult.indices[0][0])) {
|
|
525
|
+
const tagName = startResult.groups["tagName"];
|
|
526
|
+
const matchStart = startResult.indices[0][0];
|
|
527
|
+
this.#createTextNode(this.#start, matchStart);
|
|
528
|
+
this.#createStatusNode(
|
|
529
|
+
tagName.startsWith("m") ? "m" : tagName,
|
|
530
|
+
matchStart,
|
|
531
|
+
startResult.groups["leftPadding"] || startResult.groups["attr"]
|
|
532
|
+
);
|
|
533
|
+
this.#start = startResult.indices[0][1];
|
|
534
|
+
} else {
|
|
535
|
+
if (endResult) {
|
|
536
|
+
const tagName = endResult.groups["tagName"];
|
|
537
|
+
const matchStart = endResult.indices[0][0];
|
|
538
|
+
this.#createTextNode(this.#start, matchStart);
|
|
539
|
+
this.#mergeTextNode(tagName, matchStart);
|
|
540
|
+
this.#removeStatus(tagName);
|
|
541
|
+
this.#start = endResult.indices[0][1];
|
|
542
|
+
} else {
|
|
543
|
+
break;
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
/** 普通的文本节点创建 */
|
|
549
|
+
#createTextNode(start, end) {
|
|
550
|
+
if (start < end) {
|
|
551
|
+
const node = new TextNode(this.#input.slice(start, end));
|
|
552
|
+
node.start = start;
|
|
553
|
+
node.style = this.#inlineStatus.reduce(
|
|
554
|
+
(obj, item) => ({ ...obj, ...item.attr }),
|
|
555
|
+
{}
|
|
556
|
+
);
|
|
557
|
+
node.renderText();
|
|
558
|
+
this.#textNodeList.push(node);
|
|
559
|
+
return node;
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
#createStatusNode(name, start, attrStr) {
|
|
563
|
+
if (inlineStatusList.includes(name)) {
|
|
564
|
+
attrStr;
|
|
565
|
+
let attr = {};
|
|
566
|
+
switch (name) {
|
|
567
|
+
case "c":
|
|
568
|
+
if (attrStr) {
|
|
569
|
+
attr = { color: attrStr };
|
|
570
|
+
}
|
|
571
|
+
break;
|
|
572
|
+
case "b":
|
|
573
|
+
attr = { "font-weight": "bolder" };
|
|
574
|
+
break;
|
|
575
|
+
case "u":
|
|
576
|
+
attr = { "text-decoration": "#f00 wavy underline" };
|
|
577
|
+
break;
|
|
578
|
+
case "i":
|
|
579
|
+
attr = { "font-style": "italic" };
|
|
580
|
+
break;
|
|
581
|
+
}
|
|
582
|
+
const node = new InlineNode(name, attr);
|
|
583
|
+
node.start = start;
|
|
584
|
+
this.#inlineStatus.unshift(node);
|
|
585
|
+
} else {
|
|
586
|
+
const node = new BlockNode(name, (text) => {
|
|
587
|
+
switch (name) {
|
|
588
|
+
case "m": {
|
|
589
|
+
return `<div style="padding-left: ${attrStr}em;">${text}</div>`;
|
|
590
|
+
}
|
|
591
|
+
case "url": {
|
|
592
|
+
return `<a href="${text}">${text}</a>`;
|
|
593
|
+
}
|
|
594
|
+
case "ref": {
|
|
595
|
+
return `<a href="entry://${text}">${text}</a>`;
|
|
596
|
+
}
|
|
597
|
+
case "sub":
|
|
598
|
+
case "sup": {
|
|
599
|
+
{
|
|
600
|
+
return `<${name}>${text}</${name}>`;
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
case "s": {
|
|
604
|
+
if (sImageList.some((item) => text.endsWith(item))) {
|
|
605
|
+
const baseName = text.slice(0, text.lastIndexOf("."));
|
|
606
|
+
return `<picture><source srcset="${text}"><source srcset="${baseName}.webp"><source srcset="${baseName}.jpg"><img src="${text}"></picture>`;
|
|
607
|
+
} else if (sSoundList.some((item) => text.endsWith(item))) {
|
|
608
|
+
return `<figure><audio controls src="${text}"></audio></figure>`;
|
|
609
|
+
} else if (sVideoList.some((item) => text.endsWith(item))) {
|
|
610
|
+
const baseName = text.slice(0, text.lastIndexOf("."));
|
|
611
|
+
return `<video controls style="width:100%"><source src="${text}"/><source src="${baseName}.mp4"/><source src="${baseName}.webm"/></video>`;
|
|
612
|
+
}
|
|
613
|
+
return ``;
|
|
614
|
+
}
|
|
615
|
+
default: {
|
|
616
|
+
return `<span>${text}</span>`;
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
});
|
|
620
|
+
node.start = start;
|
|
621
|
+
this.#blockStatus.unshift(node);
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
#mergeTextNode(name, end) {
|
|
625
|
+
const blockStart = this.#blockStatus.find((item) => item.tag === name);
|
|
626
|
+
if (blockStart) {
|
|
627
|
+
const start = blockStart.start;
|
|
628
|
+
let tempIndex = -1;
|
|
629
|
+
for (let i = this.#textNodeList.length - 1; i > -1; i--) {
|
|
630
|
+
const textNode = this.#textNodeList[i];
|
|
631
|
+
if (textNode.start < start) {
|
|
632
|
+
break;
|
|
633
|
+
} else {
|
|
634
|
+
tempIndex = i;
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
if (tempIndex !== -1) {
|
|
638
|
+
const delNodeList = this.#textNodeList.slice(tempIndex);
|
|
639
|
+
const mergeText = blockStart.fn(
|
|
640
|
+
delNodeList.map((item) => item.renderedText).join("")
|
|
641
|
+
);
|
|
642
|
+
this.#textNodeList = this.#textNodeList.slice(0, tempIndex);
|
|
643
|
+
const node = new TextNode(mergeText);
|
|
644
|
+
node.start = start;
|
|
645
|
+
node.renderedText = node.text;
|
|
646
|
+
this.#textNodeList.push(node);
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
#removeStatus(name) {
|
|
651
|
+
if (inlineStatusList.includes(name)) {
|
|
652
|
+
const index = this.#inlineStatus.findIndex((item) => item.tag === name);
|
|
653
|
+
this.#inlineStatus.splice(index, 1);
|
|
654
|
+
} else {
|
|
655
|
+
const index = this.#blockStatus.findIndex((item) => item.tag === name);
|
|
656
|
+
this.#blockStatus.splice(index, 1);
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
};
|
|
660
|
+
function dslFormat(input) {
|
|
661
|
+
const item = new DslFormat(input);
|
|
662
|
+
return item.run();
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
// packages/file-parser/dict/dict-format/dsl/dsl-parse.service.ts
|
|
666
|
+
import { RootStaticInjectOptions as RootStaticInjectOptions6 } from "static-injector";
|
|
667
|
+
import { basename } from "path";
|
|
668
|
+
var DslParseService = class extends RootStaticInjectOptions6 {
|
|
669
|
+
async parse(filePath, options) {
|
|
670
|
+
const instance = new DslParse(filePath);
|
|
671
|
+
await instance.init();
|
|
672
|
+
return {
|
|
673
|
+
info: {
|
|
674
|
+
...instance.info,
|
|
675
|
+
name: instance.info["NAME"],
|
|
676
|
+
fileName: basename(filePath, ".dsl")
|
|
677
|
+
},
|
|
678
|
+
dataListGenerator: () => instance.generate()
|
|
679
|
+
};
|
|
680
|
+
}
|
|
681
|
+
};
|
|
682
|
+
var DslParse = class {
|
|
683
|
+
filePath;
|
|
684
|
+
fileContent;
|
|
685
|
+
start = 0;
|
|
686
|
+
info = {};
|
|
687
|
+
wordStart;
|
|
688
|
+
constructor(filePath) {
|
|
689
|
+
this.filePath = filePath;
|
|
690
|
+
}
|
|
691
|
+
async init() {
|
|
692
|
+
const buffer = await fs3.promises.readFile(this.filePath);
|
|
693
|
+
const subBuf = new Uint8Array(buffer.subarray(0, 500));
|
|
694
|
+
const a = chardet.detect(subBuf);
|
|
695
|
+
this.fileContent = buffer.toString(a ?? "UTF-16LE").trimStart();
|
|
696
|
+
while (this.readMetadata()) {
|
|
697
|
+
}
|
|
698
|
+
this.readEntry();
|
|
699
|
+
}
|
|
700
|
+
readMetadata() {
|
|
701
|
+
const regexp = /^#(NAME|INDEX_LANGUAGE|CONTENTS_LANGUAGE|SOUND_DICTIONARY|SOURCE_CODE_PAGE)\s+(.+)/dgm;
|
|
702
|
+
regexp.lastIndex = this.start;
|
|
703
|
+
const result = regexp.exec(this.fileContent.trimStart());
|
|
704
|
+
if (result) {
|
|
705
|
+
try {
|
|
706
|
+
this.info[result[1]] = JSON.parse(result[2]);
|
|
707
|
+
} catch (error) {
|
|
708
|
+
this.info[result[1]] = result[2];
|
|
709
|
+
}
|
|
710
|
+
this.start = result.indices[0][1];
|
|
711
|
+
return true;
|
|
712
|
+
}
|
|
713
|
+
return false;
|
|
714
|
+
}
|
|
715
|
+
lastWordInfo;
|
|
716
|
+
async *generate() {
|
|
717
|
+
let result;
|
|
718
|
+
while (result = this.readEntry()) {
|
|
719
|
+
yield result;
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
readEntry() {
|
|
723
|
+
const regexp = /^[^\s]+/dgm;
|
|
724
|
+
regexp.lastIndex = this.start;
|
|
725
|
+
const result = regexp.exec(this.fileContent);
|
|
726
|
+
if (result) {
|
|
727
|
+
this.start = result.indices[0][1];
|
|
728
|
+
const lastWordInfo = this.lastWordInfo;
|
|
729
|
+
this.lastWordInfo = {
|
|
730
|
+
word: result[0],
|
|
731
|
+
range: result.indices[0]
|
|
732
|
+
};
|
|
733
|
+
if (lastWordInfo) {
|
|
734
|
+
const content = this.fileContent.slice(lastWordInfo.range[1], this.lastWordInfo.range[0]).replace(/^\s+/gm, "");
|
|
735
|
+
return {
|
|
736
|
+
word: lastWordInfo.word,
|
|
737
|
+
content,
|
|
738
|
+
htmlContent: dslFormat(content)
|
|
739
|
+
};
|
|
740
|
+
}
|
|
741
|
+
} else {
|
|
742
|
+
if (this.lastWordInfo) {
|
|
743
|
+
const content = this.fileContent.slice(this.lastWordInfo.range[1]).replace(/^\s+/gm, "");
|
|
744
|
+
const result2 = {
|
|
745
|
+
word: this.lastWordInfo.word,
|
|
746
|
+
content,
|
|
747
|
+
htmlContent: dslFormat(content)
|
|
748
|
+
};
|
|
749
|
+
this.lastWordInfo = void 0;
|
|
750
|
+
return result2;
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
return void 0;
|
|
754
|
+
}
|
|
755
|
+
};
|
|
756
|
+
|
|
757
|
+
// packages/file-parser/dict/dict.service.ts
|
|
758
|
+
var DictService = class extends RootStaticInjectOptions7 {
|
|
759
|
+
#injector = inject3(Injector2);
|
|
760
|
+
#cache = new LRUCache({
|
|
761
|
+
max: 2,
|
|
762
|
+
ttl: 12e4
|
|
763
|
+
});
|
|
764
|
+
async getDictName(input) {
|
|
765
|
+
const dict = await this.#getDictResolve(input);
|
|
766
|
+
return dict.info.name || dict.info.fileName;
|
|
767
|
+
}
|
|
768
|
+
async #getDictResolve(input) {
|
|
769
|
+
if (this.#cache.has(input.filePath)) {
|
|
770
|
+
return this.#cache.get(input.filePath);
|
|
771
|
+
}
|
|
772
|
+
const { filePath, type } = input;
|
|
773
|
+
let result;
|
|
774
|
+
if (type === "stardict") {
|
|
775
|
+
result = await this.#injector.get(StardictParseService).parse(filePath);
|
|
776
|
+
} else if (type === "mdict") {
|
|
777
|
+
result = await this.#injector.get(MdictParseService).parse(filePath, input);
|
|
778
|
+
} else if (type === "dsl") {
|
|
779
|
+
result = await this.#injector.get(DslParseService).parse(filePath);
|
|
780
|
+
} else if (type === "yaml") {
|
|
781
|
+
result = await this.#injector.get(YamlDictParseService).parse(filePath);
|
|
782
|
+
} else {
|
|
783
|
+
throw new Error(`没有找到${type}字典对应解析器`);
|
|
784
|
+
}
|
|
785
|
+
this.#cache.set(input.filePath, result);
|
|
786
|
+
return result;
|
|
787
|
+
}
|
|
788
|
+
/**
|
|
789
|
+
* 第一个需要修改为3个操作
|
|
790
|
+
*/
|
|
791
|
+
// 这里顺序反了,应该调用base,让base分配
|
|
792
|
+
async importDict(name, dir, input) {
|
|
793
|
+
const baseName = path7.basename(
|
|
794
|
+
input.filePath,
|
|
795
|
+
path7.extname(input.filePath)
|
|
796
|
+
);
|
|
797
|
+
const result = await this.#getDictResolve(input);
|
|
798
|
+
name ||= result.info.name || baseName;
|
|
799
|
+
const assetFolder = path7.join(dir, "assets");
|
|
800
|
+
if (result.afterSave) {
|
|
801
|
+
await fs4.promises.mkdir(assetFolder, {
|
|
802
|
+
recursive: true
|
|
803
|
+
});
|
|
804
|
+
await result.afterSave(assetFolder);
|
|
805
|
+
}
|
|
806
|
+
return result.dataListGenerator();
|
|
807
|
+
}
|
|
808
|
+
};
|
|
809
|
+
|
|
810
|
+
// packages/file-parser/vl-parser/markdown.parser.ts
|
|
811
|
+
import { lexer } from "marked";
|
|
812
|
+
import MS from "magic-string";
|
|
813
|
+
var regex = /<!--\s*(Image|Table)\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)\s*-->/dg;
|
|
814
|
+
async function vlMarkdownParser(content, options) {
|
|
815
|
+
let mdContent = getMdLexer(content);
|
|
816
|
+
let result = mdContent.matchAll(regex);
|
|
817
|
+
let ms = new MS(mdContent);
|
|
818
|
+
for (const item of result) {
|
|
819
|
+
let imageData = await options.imageGet("qwen3-vl", [
|
|
820
|
+
+item[2],
|
|
821
|
+
+item[3],
|
|
822
|
+
+item[4],
|
|
823
|
+
+item[5]
|
|
824
|
+
]);
|
|
825
|
+
ms.update(
|
|
826
|
+
item.index,
|
|
827
|
+
item.index + item[0].length,
|
|
828
|
+
``
|
|
829
|
+
);
|
|
830
|
+
}
|
|
831
|
+
return ms.toString();
|
|
832
|
+
}
|
|
833
|
+
function getMdLexer(content) {
|
|
834
|
+
const list = lexer(content);
|
|
835
|
+
if (list.length === 1 && list[0].type === "code" && (list[0].lang === "markdown" || !list[0].lang)) {
|
|
836
|
+
return list[0].text;
|
|
837
|
+
}
|
|
838
|
+
return content;
|
|
839
|
+
}
|
|
840
|
+
export {
|
|
841
|
+
DictService,
|
|
842
|
+
DocumentFileParserService,
|
|
843
|
+
DocumentParserConfigToken,
|
|
844
|
+
FileParserService,
|
|
845
|
+
FileParserToken,
|
|
846
|
+
ImageParserToken,
|
|
847
|
+
bufferDecodeToText,
|
|
848
|
+
vlMarkdownParser
|
|
849
|
+
};
|
|
850
|
+
//# sourceMappingURL=file-parser.mjs.map
|