@shenghuabi/knowledge 1.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/embedding/embedding.service.d.ts +4 -0
  2. package/embedding/index.d.ts +2 -0
  3. package/embedding/type.d.ts +12 -0
  4. package/embedding.mjs +61 -0
  5. package/embedding.mjs.map +7 -0
  6. package/file-parser/const.d.ts +20 -0
  7. package/file-parser/dict/dict-format/dsl/dsl-parse.service.d.ts +6 -0
  8. package/file-parser/dict/dict-format/dsl/dsl.format.d.ts +1 -0
  9. package/file-parser/dict/dict-format/mdict-parse.service.d.ts +20 -0
  10. package/file-parser/dict/dict-format/stardict-parse.service.d.ts +12 -0
  11. package/file-parser/dict/dict-format/yaml-parse.service.d.ts +25 -0
  12. package/file-parser/dict/dict.service.d.ts +10 -0
  13. package/file-parser/dict/index.d.ts +2 -0
  14. package/file-parser/dict/type.d.ts +24 -0
  15. package/file-parser/document-file-parser.service.d.ts +15 -0
  16. package/file-parser/document-loader/pdf-img.loader.d.ts +8 -0
  17. package/file-parser/document-loader/xlsx.loader.d.ts +6 -0
  18. package/file-parser/file-parser.service.d.ts +13 -0
  19. package/file-parser/index.d.ts +6 -0
  20. package/file-parser/text-analyse.d.ts +1 -0
  21. package/file-parser/text-parser.d.ts +3 -0
  22. package/file-parser/vl-parser/markdown.parser.d.ts +8 -0
  23. package/file-parser.mjs +850 -0
  24. package/file-parser.mjs.map +7 -0
  25. package/image/convert.d.ts +25 -0
  26. package/image/extract.d.ts +2 -0
  27. package/image/image-metadata.d.ts +2 -0
  28. package/image/index.d.ts +3 -0
  29. package/image.mjs +134 -0
  30. package/image.mjs.map +7 -0
  31. package/knowledge/article/article.knowledge.service.d.ts +53 -0
  32. package/knowledge/article/define/config.d.ts +60 -0
  33. package/knowledge/article/define/index.d.ts +2 -0
  34. package/knowledge/article/define/payload.d.ts +16 -0
  35. package/knowledge/article/index.d.ts +2 -0
  36. package/knowledge/common/common.knowledge.service.d.ts +240 -0
  37. package/knowledge/common/define/base.d.ts +7 -0
  38. package/knowledge/common/define/chunk.d.ts +14 -0
  39. package/knowledge/common/define/embedding.d.ts +5 -0
  40. package/knowledge/common/define/index.d.ts +3 -0
  41. package/knowledge/common/index.d.ts +1 -0
  42. package/knowledge/common/query.d.ts +7 -0
  43. package/knowledge/const.d.ts +95 -0
  44. package/knowledge/define/index.d.ts +245 -0
  45. package/knowledge/dict/define/config.d.ts +68 -0
  46. package/knowledge/dict/define/index.d.ts +1 -0
  47. package/knowledge/dict/dict.knowledge.service.d.ts +67 -0
  48. package/knowledge/graph/const.d.ts +20 -0
  49. package/knowledge/graph/define/config.d.ts +169 -0
  50. package/knowledge/graph/define/define.d.ts +402 -0
  51. package/knowledge/graph/define/index.d.ts +2 -0
  52. package/knowledge/graph/define/query.d.ts +14 -0
  53. package/knowledge/graph/graph.handle.service.d.ts +28 -0
  54. package/knowledge/graph/graph.knowledge.service.d.ts +40 -0
  55. package/knowledge/graph/graph.local.service.d.ts +85 -0
  56. package/knowledge/graph/graph.query.service.d.ts +160 -0
  57. package/knowledge/graph/graph.service.d.ts +24 -0
  58. package/knowledge/graph/graph.util.service.d.ts +31 -0
  59. package/knowledge/graph/type.d.ts +11 -0
  60. package/knowledge/graph/util/format-attr.d.ts +48 -0
  61. package/knowledge/graph/util/graph-util.d.ts +5 -0
  62. package/knowledge/graph/util.d.ts +1 -0
  63. package/knowledge/graph/vecotr-format.d.ts +11 -0
  64. package/knowledge/index.d.ts +17 -0
  65. package/knowledge/knowledge.manager.service.d.ts +42 -0
  66. package/knowledge/knowledge.util.service.d.ts +21 -0
  67. package/knowledge/normal/define/config.d.ts +60 -0
  68. package/knowledge/normal/define/index.d.ts +1 -0
  69. package/knowledge/normal/normal.knowledge.service.d.ts +49 -0
  70. package/knowledge/template.format.d.ts +6 -0
  71. package/knowledge/type.d.ts +28 -0
  72. package/knowledge.mjs +2856 -0
  73. package/knowledge.mjs.map +7 -0
  74. package/ocr/FileUtils.d.ts +4 -0
  75. package/ocr/ImageRaw.d.ts +11 -0
  76. package/ocr/index.d.ts +2 -0
  77. package/ocr/model-config.d.ts +8 -0
  78. package/ocr/ocr.d.ts +29 -0
  79. package/ocr.mjs +351 -0
  80. package/ocr.mjs.map +7 -0
  81. package/package.json +105 -0
  82. package/qdrant/index.d.ts +3 -0
  83. package/qdrant/qdrant-client.service.d.ts +396 -0
  84. package/qdrant/qdrant-server.service.d.ts +21 -0
  85. package/qdrant/type.d.ts +18 -0
  86. package/qdrant/util.d.ts +1 -0
  87. package/qdrant.mjs +274 -0
  88. package/qdrant.mjs.map +7 -0
  89. package/util/batch-queue.d.ts +6 -0
  90. package/util/cache-queue.d.ts +10 -0
  91. package/util/clone.d.ts +1 -0
  92. package/util/embedding-queue.d.ts +3 -0
  93. package/util/get-hash.d.ts +2 -0
  94. package/util/html-to-text/index.d.ts +5 -0
  95. package/util/index.d.ts +10 -0
  96. package/util/is-truthy.d.ts +1 -0
  97. package/util/log.service.d.ts +6 -0
  98. package/util/promise.d.ts +5 -0
  99. package/util/type.d.ts +1 -0
  100. package/util/uniq-object-key.d.ts +1 -0
  101. package/util.mjs +219 -0
  102. package/util.mjs.map +7 -0
  103. package/worker/custom-cache.d.ts +28 -0
  104. package/worker/ocr/index.d.ts +17 -0
  105. package/worker/ocr.mjs +75 -0
  106. package/worker/ocr.mjs.map +7 -0
  107. package/worker/reranker.mjs +180 -0
  108. package/worker/reranker.mjs.map +7 -0
  109. package/worker/set-transformers-config.d.ts +19 -0
  110. package/worker/text2vec/index.d.ts +9 -0
  111. package/worker/text2vec.mjs +194 -0
  112. package/worker/text2vec.mjs.map +7 -0
package/qdrant.mjs ADDED
@@ -0,0 +1,274 @@
1
+ // packages/qdrant/qdrant-client.service.ts
2
+ import {
3
+ computed,
4
+ inject,
5
+ Injector,
6
+ RootStaticInjectOptions
7
+ } from "static-injector";
8
+ import { QdrantClient } from "@qdrant/qdrant-js";
9
+
10
+ // packages/qdrant/type.ts
11
+ import { InjectionToken } from "static-injector";
12
+ import * as v from "valibot";
13
+ var QdrantOptionsDefine = v.object({
14
+ host: v.optional(v.string(), "127.0.0.1"),
15
+ port: v.optional(v.number(), 6333),
16
+ dir: v.optional(v.string()),
17
+ configPath: v.optional(v.string()),
18
+ version: v.optional(v.string(), "v1.15.4")
19
+ });
20
+ var QdrantOptionsToken = new InjectionToken(
21
+ "QdrantOptionsToken"
22
+ );
23
+ var QdrantStartToken = new InjectionToken("QdrantStart");
24
+
25
+ // packages/qdrant/qdrant-client.service.ts
26
+ var QdrantClientService = class extends RootStaticInjectOptions {
27
+ #injector = inject(Injector);
28
+ #qdStart$$ = inject(QdrantStartToken);
29
+ #client$$ = computed(
30
+ () => this.#qdStart$$.promise.then(() => {
31
+ const options = this.#injector.get(QdrantOptionsToken);
32
+ return new QdrantClient({
33
+ ...options(),
34
+ headers: { Connection: "Close" }
35
+ });
36
+ })
37
+ );
38
+ get originClient() {
39
+ return this.#client$$();
40
+ }
41
+ async deleteCollection(name) {
42
+ return (await this.#client$$()).deleteCollection(name);
43
+ }
44
+ async collectionExists(name) {
45
+ return (await this.#client$$()).collectionExists(name);
46
+ }
47
+ async createCollection(...args) {
48
+ return (await this.#client$$()).createCollection(...args);
49
+ }
50
+ async getCollection(...args) {
51
+ return (await this.#client$$()).getCollection(...args);
52
+ }
53
+ async createPayloadKeywordIndex(collectionName, payloadName) {
54
+ return (await this.#client$$()).createPayloadIndex(collectionName, {
55
+ wait: true,
56
+ field_name: payloadName,
57
+ field_schema: "keyword"
58
+ });
59
+ }
60
+ async recoverSnapshot(...args) {
61
+ return (await this.#client$$()).recoverSnapshot(...args);
62
+ }
63
+ async createSnapshot(...args) {
64
+ return (await this.#client$$()).createSnapshot(...args);
65
+ }
66
+ async getAliases(...args) {
67
+ return (await this.#client$$()).getAliases(...args);
68
+ }
69
+ async getCollectionAliases(...args) {
70
+ return (await this.#client$$()).getCollectionAliases(...args);
71
+ }
72
+ async updateCollectionAliases(...args) {
73
+ return (await this.#client$$()).updateCollectionAliases(...args);
74
+ }
75
+ async setActivateCollection(collection, activateCollection) {
76
+ return (await this.#client$$()).updateCollectionAliases({
77
+ actions: [
78
+ {
79
+ create_alias: {
80
+ collection_name: collection,
81
+ alias_name: activateCollection
82
+ }
83
+ }
84
+ ]
85
+ });
86
+ }
87
+ async upsert(...args) {
88
+ await (await this.#client$$()).upsert(...args);
89
+ }
90
+ async delete(...args) {
91
+ await (await this.#client$$()).delete(...args);
92
+ }
93
+ async retrieve(...args) {
94
+ return (await this.#client$$()).retrieve(...args);
95
+ }
96
+ async scroll(...args) {
97
+ return (await this.#client$$()).scroll(...args);
98
+ }
99
+ async count(...args) {
100
+ return (await this.#client$$()).count(...args);
101
+ }
102
+ async searchPointGroups(...args) {
103
+ return (await this.#client$$()).searchPointGroups(...args);
104
+ }
105
+ async queryGroups(...args) {
106
+ return (await this.#client$$()).queryGroups(...args);
107
+ }
108
+ async query(...args) {
109
+ return (await this.#client$$()).query(...args);
110
+ }
111
+ async search(...args) {
112
+ return (await this.#client$$()).search(...args);
113
+ }
114
+ };
115
+
116
+ // packages/qdrant/qdrant-server.service.ts
117
+ import { computed as computed2, inject as inject2, signal } from "static-injector";
118
+ import { path } from "@cyia/vfs2";
119
+ import { ExternalCallBaseService } from "@cyia/external-call";
120
+
121
+ // packages/qdrant/util.ts
122
+ function getQdrantFile() {
123
+ switch (`${process.platform}-${process.arch}`) {
124
+ case "win32-x64":
125
+ return `qdrant-x86_64-pc-windows-msvc.zip
126
+ `;
127
+ case "linux-x64":
128
+ return `qdrant-x86_64-unknown-linux-gnu.tar.gz`;
129
+ default:
130
+ break;
131
+ }
132
+ throw new Error("");
133
+ }
134
+
135
+ // packages/qdrant/qdrant-server.service.ts
136
+ var fileName = "qdrant" + (process.platform === "win32" ? ".exe" : "");
137
+ var QdrantServerService = class extends ExternalCallBaseService {
138
+ logName = "qdrant";
139
+ #config = inject2(QdrantOptionsToken);
140
+ #fileName = computed2(() => fileName);
141
+ startPath$$ = computed2(
142
+ () => path.join(this.#config().dir, this.#fileName())
143
+ );
144
+ execPath$$ = this.startPath$$;
145
+ checkFilePath$$ = this.startPath$$;
146
+ snapshotDir$$ = computed2(() => path.join(this.#config().dir, "snapshots"));
147
+ /** 启动时读取 */
148
+ #version;
149
+ start$ = signal(false);
150
+ #startFinished$;
151
+ async init() {
152
+ await this.startup();
153
+ }
154
+ async startup() {
155
+ await this.checkExist();
156
+ const exist = await this.exist();
157
+ if (!exist) {
158
+ return void 0;
159
+ }
160
+ if (!this.#startFinished$) {
161
+ this.#startFinished$ = this.#startup();
162
+ }
163
+ return this.#startFinished$;
164
+ }
165
+ #startup() {
166
+ return new Promise(async (resolve, reject) => {
167
+ this.log?.info(`准备启动`);
168
+ const { instance, abortController } = this.exec(
169
+ this.startPath$$(),
170
+ await this.#getOptions(),
171
+ {
172
+ env: await this.#getEnv(),
173
+ extendEnv: true,
174
+ cwd: this.#config().dir,
175
+ reject: false
176
+ }
177
+ );
178
+ this.instanceSnapshot = [instance];
179
+ let httpEnable = false;
180
+ let grpcEnable = false;
181
+ instance.stdout.on("data", (data) => {
182
+ const value = data.toString();
183
+ if (!this.start$() || !value.includes("/points?wait=false")) {
184
+ this.log?.info(value);
185
+ }
186
+ if (!grpcEnable && value.includes("gRPC listening")) {
187
+ grpcEnable = true;
188
+ }
189
+ if (!httpEnable && value.includes("HTTP listening")) {
190
+ httpEnable = true;
191
+ }
192
+ if (!this.start$() && grpcEnable && httpEnable) {
193
+ this.log?.info(`启动成功`);
194
+ this.start$.set(true);
195
+ resolve({ instance, abortController });
196
+ }
197
+ if (!this.#version) {
198
+ const result = value.match(/Version:\s+([^,]+)/);
199
+ if (result) {
200
+ this.#version = result[1];
201
+ }
202
+ }
203
+ });
204
+ });
205
+ }
206
+ async #getEnv() {
207
+ const env = {};
208
+ const port = this.#config().port;
209
+ if (port) {
210
+ env["QDRANT__SERVICE__HTTP_PORT"] = port;
211
+ }
212
+ return env;
213
+ }
214
+ async #getOptions() {
215
+ const list = [];
216
+ const configPath = this.#config().configPath;
217
+ if (configPath) {
218
+ list.push("--config-path", configPath);
219
+ }
220
+ return list;
221
+ }
222
+ async getVersion() {
223
+ await this.checkExist();
224
+ if (!this.exist$()) {
225
+ return void 0;
226
+ }
227
+ return this.#version;
228
+ }
229
+ stop() {
230
+ try {
231
+ super.stop();
232
+ } catch (error) {
233
+ this.log?.error(error);
234
+ } finally {
235
+ this.#startFinished$ = void 0;
236
+ this.start$.set(false);
237
+ }
238
+ }
239
+ #downloadExecAbort;
240
+ async downloadExec(options) {
241
+ await this.stop();
242
+ this.#downloadExecAbort?.abort();
243
+ const ac = new AbortController();
244
+ this.#downloadExecAbort = ac;
245
+ try {
246
+ await this.githubRepoDownload(
247
+ {
248
+ prefix: `qdrant/qdrant`,
249
+ version: this.#config().version,
250
+ fileName: getQdrantFile()
251
+ },
252
+ {
253
+ output: this.#config().dir,
254
+ progressMessage: options?.progressMessage,
255
+ cleanDir: false,
256
+ signal: ac.signal
257
+ }
258
+ );
259
+ } catch (error) {
260
+ if (ac.signal.aborted) {
261
+ return;
262
+ }
263
+ throw error;
264
+ }
265
+ }
266
+ };
267
+ export {
268
+ QdrantClientService,
269
+ QdrantOptionsDefine,
270
+ QdrantOptionsToken,
271
+ QdrantServerService,
272
+ QdrantStartToken
273
+ };
274
+ //# sourceMappingURL=qdrant.mjs.map
package/qdrant.mjs.map ADDED
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["../packages/qdrant/qdrant-client.service.ts", "../packages/qdrant/type.ts", "../packages/qdrant/qdrant-server.service.ts", "../packages/qdrant/util.ts"],
4
+ "sourcesContent": ["import {\n computed,\n inject,\n Injector,\n RootStaticInjectOptions,\n} from 'static-injector';\nimport { QdrantClient } from '@qdrant/qdrant-js';\n\nimport { QdrantOptionsToken, QdrantStartToken } from './type';\nexport class QdrantClientService extends RootStaticInjectOptions {\n #injector = inject(Injector);\n #qdStart$$ = inject(QdrantStartToken);\n #client$$ = computed(() =>\n this.#qdStart$$.promise.then(() => {\n const options = this.#injector.get(QdrantOptionsToken);\n return new QdrantClient({\n ...options(),\n headers: { Connection: 'Close' },\n });\n }),\n );\n\n get originClient() {\n return this.#client$$();\n }\n\n async deleteCollection(name: string) {\n return (await this.#client$$()).deleteCollection(name);\n }\n async collectionExists(name: string) {\n return (await this.#client$$()).collectionExists(name);\n }\n async createCollection(\n ...args: Parameters<QdrantClient['createCollection']>\n ) {\n return (await this.#client$$()).createCollection(...args);\n }\n async getCollection(...args: Parameters<QdrantClient['getCollection']>) {\n return (await this.#client$$()).getCollection(...args);\n }\n\n async createPayloadKeywordIndex(collectionName: string, payloadName: string) {\n return (await this.#client$$()).createPayloadIndex(collectionName, {\n wait: true,\n field_name: payloadName,\n field_schema: 'keyword',\n });\n }\n async recoverSnapshot(...args: Parameters<QdrantClient['recoverSnapshot']>) {\n return (await this.#client$$()).recoverSnapshot(...args);\n }\n async createSnapshot(...args: Parameters<QdrantClient['createSnapshot']>) {\n return (await this.#client$$()).createSnapshot(...args);\n }\n async getAliases(...args: Parameters<QdrantClient['getAliases']>) {\n return (await this.#client$$()).getAliases(...args);\n }\n async getCollectionAliases(\n ...args: Parameters<QdrantClient['getCollectionAliases']>\n ) {\n return (await this.#client$$()).getCollectionAliases(...args);\n }\n async updateCollectionAliases(\n ...args: Parameters<QdrantClient['updateCollectionAliases']>\n ) {\n return (await this.#client$$()).updateCollectionAliases(...args);\n }\n async setActivateCollection(collection: string, activateCollection: string) {\n return (await this.#client$$()).updateCollectionAliases({\n actions: [\n {\n create_alias: {\n collection_name: collection,\n alias_name: activateCollection,\n },\n },\n ],\n });\n }\n async upsert(...args: Parameters<QdrantClient['upsert']>) {\n await (await this.#client$$()).upsert(...args);\n }\n async delete(...args: Parameters<QdrantClient['delete']>) {\n await (await this.#client$$()).delete(...args);\n }\n async retrieve(...args: Parameters<QdrantClient['retrieve']>) {\n return (await this.#client$$()).retrieve(...args);\n }\n async scroll(...args: Parameters<QdrantClient['scroll']>) {\n return (await this.#client$$()).scroll(...args);\n }\n async count(...args: Parameters<QdrantClient['count']>) {\n return (await this.#client$$()).count(...args);\n }\n async searchPointGroups(\n ...args: Parameters<QdrantClient['searchPointGroups']>\n ) {\n return (await this.#client$$()).searchPointGroups(...args);\n }\n async queryGroups(...args: Parameters<QdrantClient['queryGroups']>) {\n return (await this.#client$$()).queryGroups(...args);\n }\n async query(...args: Parameters<QdrantClient['query']>) {\n return (await this.#client$$()).query(...args);\n }\n async search(...args: Parameters<QdrantClient['search']>) {\n return (await this.#client$$()).search(...args);\n }\n}\n", "import { InjectionToken, Signal } from 'static-injector';\nimport * as v from 'valibot';\nexport const QdrantOptionsDefine = v.object({\n host: v.optional(v.string(), '127.0.0.1'),\n port: v.optional(v.number(), 6333),\n dir: v.optional(v.string()),\n configPath: v.optional(v.string()),\n version: v.optional(v.string(), 'v1.15.4'),\n});\nexport type QdrantOptionsType = v.InferOutput<typeof QdrantOptionsDefine> & {\n dir: string;\n};\nexport const QdrantOptionsToken = new InjectionToken<Signal<QdrantOptionsType>>(\n 'QdrantOptionsToken',\n);\n\nexport const QdrantStartToken = new InjectionToken<{\n promise: Promise<void>;\n resolve: (value: void | PromiseLike<void>) => void;\n reject: (reason?: any) => void;\n}>('QdrantStart');\n", "import { computed, inject, signal } from 'static-injector';\nimport { QdrantOptionsToken } from './type';\nimport { path } from '@cyia/vfs2';\nimport { ExternalCallBaseService } from '@cyia/external-call';\nimport { getQdrantFile } from './util';\nimport type { DownloadFileOptions } from '@cyia/dl';\nconst fileName = 'qdrant' + (process.platform === 'win32' ? '.exe' : '');\ntype ExecInstance = ReturnType<ExternalCallBaseService['exec']>;\nexport class QdrantServerService extends ExternalCallBaseService {\n override logName = 'qdrant';\n #config = inject(QdrantOptionsToken);\n #fileName = computed(() => fileName);\n override startPath$$ = computed(() =>\n path.join(this.#config().dir, this.#fileName()),\n );\n override execPath$$ = this.startPath$$;\n override checkFilePath$$ = this.startPath$$;\n snapshotDir$$ = computed(() => path.join(this.#config().dir, 'snapshots'));\n /** 启动时读取 */\n #version?: string;\n start$ = signal(false);\n #startFinished$?: Promise<ReturnType<typeof this.exec>>;\n\n override async init() {\n await this.startup();\n }\n async startup() {\n await this.checkExist();\n const exist = await this.exist();\n if (!exist) {\n return undefined;\n }\n if (!this.#startFinished$) {\n this.#startFinished$ = this.#startup();\n }\n return this.#startFinished$;\n }\n #startup() {\n return new Promise<ExecInstance>(async (resolve, reject) => {\n this.log?.info(`准备启动`);\n const { instance, abortController } = this.exec(\n this.startPath$$(),\n await this.#getOptions(),\n {\n env: await this.#getEnv(),\n extendEnv: true,\n cwd: this.#config().dir,\n reject: false,\n },\n );\n this.instanceSnapshot = [instance];\n let httpEnable = false;\n let grpcEnable = false;\n instance.stdout.on('data', (data: Buffer) => {\n const value = data.toString();\n if (!this.start$() || !value.includes('/points?wait=false')) {\n this.log?.info(value);\n }\n if (!grpcEnable && value.includes('gRPC listening')) {\n grpcEnable = true;\n }\n if (!httpEnable && value.includes('HTTP listening')) {\n httpEnable = true;\n }\n if (!this.start$() && grpcEnable && httpEnable) {\n this.log?.info(`启动成功`);\n this.start$.set(true);\n resolve({ instance, abortController });\n }\n if (!this.#version) {\n const result = value.match(/Version:\\s+([^,]+)/);\n if (result) {\n this.#version = result[1];\n }\n }\n });\n });\n }\n async #getEnv() {\n const env: Record<string, any> = {};\n const port = this.#config().port;\n if (port) {\n env['QDRANT__SERVICE__HTTP_PORT'] = port;\n }\n return env;\n }\n async #getOptions() {\n const list: string[] = [];\n const configPath = this.#config().configPath;\n if (configPath) {\n list.push('--config-path', configPath);\n }\n return list;\n }\n\n protected override async getVersion(): Promise<string | undefined> {\n await this.checkExist();\n if (!this.exist$()) {\n return undefined;\n }\n return this.#version;\n }\n\n override stop() {\n try {\n super.stop();\n } catch (error) {\n this.log?.error(error);\n } finally {\n this.#startFinished$ = undefined;\n this.start$.set(false);\n }\n }\n #downloadExecAbort?: AbortController;\n\n async downloadExec(options?: {\n progressMessage?: DownloadFileOptions['message'];\n }) {\n await this.stop();\n this.#downloadExecAbort?.abort();\n const ac = new AbortController();\n this.#downloadExecAbort = ac;\n try {\n await this.githubRepoDownload(\n {\n prefix: `qdrant/qdrant`,\n version: this.#config().version,\n fileName: getQdrantFile(),\n },\n {\n output: this.#config().dir,\n progressMessage: options?.progressMessage,\n cleanDir: false,\n signal: ac.signal,\n },\n );\n } catch (error) {\n if (ac.signal.aborted) {\n return;\n }\n throw error;\n }\n }\n}\n", "export function getQdrantFile() {\n switch (`${process.platform}-${process.arch}` as const) {\n case 'win32-x64':\n return `qdrant-x86_64-pc-windows-msvc.zip\n`;\n case 'linux-x64':\n return `qdrant-x86_64-unknown-linux-gnu.tar.gz`;\n\n default:\n break;\n }\n throw new Error('');\n}\n"],
5
+ "mappings": ";AAAA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,oBAAoB;;;ACN7B,SAAS,sBAA8B;AACvC,YAAY,OAAO;AACZ,IAAM,sBAAwB,SAAO;AAAA,EAC1C,MAAQ,WAAW,SAAO,GAAG,WAAW;AAAA,EACxC,MAAQ,WAAW,SAAO,GAAG,IAAI;AAAA,EACjC,KAAO,WAAW,SAAO,CAAC;AAAA,EAC1B,YAAc,WAAW,SAAO,CAAC;AAAA,EACjC,SAAW,WAAW,SAAO,GAAG,SAAS;AAC3C,CAAC;AAIM,IAAM,qBAAqB,IAAI;AAAA,EACpC;AACF;AAEO,IAAM,mBAAmB,IAAI,eAIjC,aAAa;;;ADXT,IAAM,sBAAN,cAAkC,wBAAwB;AAAA,EAC/D,YAAY,OAAO,QAAQ;AAAA,EAC3B,aAAa,OAAO,gBAAgB;AAAA,EACpC,YAAY;AAAA,IAAS,MACnB,KAAK,WAAW,QAAQ,KAAK,MAAM;AACjC,YAAM,UAAU,KAAK,UAAU,IAAI,kBAAkB;AACrD,aAAO,IAAI,aAAa;AAAA,QACtB,GAAG,QAAQ;AAAA,QACX,SAAS,EAAE,YAAY,QAAQ;AAAA,MACjC,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AAAA,EAEA,IAAI,eAAe;AACjB,WAAO,KAAK,UAAU;AAAA,EACxB;AAAA,EAEA,MAAM,iBAAiB,MAAc;AACnC,YAAQ,MAAM,KAAK,UAAU,GAAG,iBAAiB,IAAI;AAAA,EACvD;AAAA,EACA,MAAM,iBAAiB,MAAc;AACnC,YAAQ,MAAM,KAAK,UAAU,GAAG,iBAAiB,IAAI;AAAA,EACvD;AAAA,EACA,MAAM,oBACD,MACH;AACA,YAAQ,MAAM,KAAK,UAAU,GAAG,iBAAiB,GAAG,IAAI;AAAA,EAC1D;AAAA,EACA,MAAM,iBAAiB,MAAiD;AACtE,YAAQ,MAAM,KAAK,UAAU,GAAG,cAAc,GAAG,IAAI;AAAA,EACvD;AAAA,EAEA,MAAM,0BAA0B,gBAAwB,aAAqB;AAC3E,YAAQ,MAAM,KAAK,UAAU,GAAG,mBAAmB,gBAAgB;AAAA,MACjE,MAAM;AAAA,MACN,YAAY;AAAA,MACZ,cAAc;AAAA,IAChB,CAAC;AAAA,EACH;AAAA,EACA,MAAM,mBAAmB,MAAmD;AAC1E,YAAQ,MAAM,KAAK,UAAU,GAAG,gBAAgB,GAAG,IAAI;AAAA,EACzD;AAAA,EACA,MAAM,kBAAkB,MAAkD;AACxE,YAAQ,MAAM,KAAK,UAAU,GAAG,eAAe,GAAG,IAAI;AAAA,EACxD;AAAA,EACA,MAAM,cAAc,MAA8C;AAChE,YAAQ,MAAM,KAAK,UAAU,GAAG,WAAW,GAAG,IAAI;AAAA,EACpD;AAAA,EACA,MAAM,wBACD,MACH;AACA,YAAQ,MAAM,KAAK,UAAU,GAAG,qBAAqB,GAAG,IAAI;AAAA,EAC9D;AAAA,EACA,MAAM,2BACD,MACH;AACA,YAAQ,MAAM,KAAK,UAAU,GAAG,wBAAwB,GAAG,IAAI;AAAA,EACjE;AAAA,EACA,MAAM,sBAAsB,YAAoB,oBAA4B;AAC1E,YAAQ,MAAM,KAAK,UAAU,GAAG,wBAAwB;AAAA,MACtD,SAAS;AAAA,QACP;AAAA,UACE,cAAc;AAAA,YACZ,iBAAiB;AAAA,YACjB,YAAY;AAAA,UACd;AAAA,QACF;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EACA,MAAM,UAAU,MAA0C;AACxD,WAAO,MAAM,KAAK,UAAU,GAAG,OAAO,GAAG,IAAI;AAAA,EAC/C;AAAA,EACA,MAAM,UAAU,MAA0C;AACxD,WAAO,MAAM,KAAK,UAAU,GAAG,OAAO,GAAG,IAAI;AAAA,EAC/C;AAAA,EACA,MAAM,YAAY,MAA4C;AAC5D,YAAQ,MAAM,KAAK,UAAU,GAAG,SAAS,GAAG,IAAI;AAAA,EAClD;AAAA,EACA,MAAM,UAAU,MAA0C;AACxD,YAAQ,MAAM,KAAK,UAAU,GAAG,OAAO,GAAG,IAAI;AAAA,EAChD;AAAA,EACA,MAAM,SAAS,MAAyC;AACtD,YAAQ,MAAM,KAAK,UAAU,GAAG,MAAM,GAAG,IAAI;AAAA,EAC/C;AAAA,EACA,MAAM,qBACD,MACH;AACA,YAAQ,MAAM,KAAK,UAAU,GAAG,kBAAkB,GAAG,IAAI;AAAA,EAC3D;AAAA,EACA,MAAM,eAAe,MAA+C;AAClE,YAAQ,MAAM,KAAK,UAAU,GAAG,YAAY,GAAG,IAAI;AAAA,EACrD;AAAA,EACA,MAAM,SAAS,MAAyC;AACtD,YAAQ,MAAM,KAAK,UAAU,GAAG,MAAM,GAAG,IAAI;AAAA,EAC/C;AAAA,EACA,MAAM,UAAU,MAA0C;AACxD,YAAQ,MAAM,KAAK,UAAU,GAAG,OAAO,GAAG,IAAI;AAAA,EAChD;AACF;;;AE5GA,SAAS,YAAAA,WAAU,UAAAC,SAAQ,cAAc;AAEzC,SAAS,YAAY;AACrB,SAAS,+BAA+B;;;ACHjC,SAAS,gBAAgB;AAC9B,UAAQ,GAAG,QAAQ,QAAQ,IAAI,QAAQ,IAAI,IAAa;AAAA,IACtD,KAAK;AACH,aAAO;AAAA;AAAA,IAET,KAAK;AACH,aAAO;AAAA,IAET;AACE;AAAA,EACJ;AACA,QAAM,IAAI,MAAM,EAAE;AACpB;;;ADNA,IAAM,WAAW,YAAY,QAAQ,aAAa,UAAU,SAAS;AAE9D,IAAM,sBAAN,cAAkC,wBAAwB;AAAA,EACtD,UAAU;AAAA,EACnB,UAAUC,QAAO,kBAAkB;AAAA,EACnC,YAAYC,UAAS,MAAM,QAAQ;AAAA,EAC1B,cAAcA;AAAA,IAAS,MAC9B,KAAK,KAAK,KAAK,QAAQ,EAAE,KAAK,KAAK,UAAU,CAAC;AAAA,EAChD;AAAA,EACS,aAAa,KAAK;AAAA,EAClB,kBAAkB,KAAK;AAAA,EAChC,gBAAgBA,UAAS,MAAM,KAAK,KAAK,KAAK,QAAQ,EAAE,KAAK,WAAW,CAAC;AAAA;AAAA,EAEzE;AAAA,EACA,SAAS,OAAO,KAAK;AAAA,EACrB;AAAA,EAEA,MAAe,OAAO;AACpB,UAAM,KAAK,QAAQ;AAAA,EACrB;AAAA,EACA,MAAM,UAAU;AACd,UAAM,KAAK,WAAW;AACtB,UAAM,QAAQ,MAAM,KAAK,MAAM;AAC/B,QAAI,CAAC,OAAO;AACV,aAAO;AAAA,IACT;AACA,QAAI,CAAC,KAAK,iBAAiB;AACzB,WAAK,kBAAkB,KAAK,SAAS;AAAA,IACvC;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EACA,WAAW;AACT,WAAO,IAAI,QAAsB,OAAO,SAAS,WAAW;AAC1D,WAAK,KAAK,KAAK,MAAM;AACrB,YAAM,EAAE,UAAU,gBAAgB,IAAI,KAAK;AAAA,QACzC,KAAK,YAAY;AAAA,QACjB,MAAM,KAAK,YAAY;AAAA,QACvB;AAAA,UACE,KAAK,MAAM,KAAK,QAAQ;AAAA,UACxB,WAAW;AAAA,UACX,KAAK,KAAK,QAAQ,EAAE;AAAA,UACpB,QAAQ;AAAA,QACV;AAAA,MACF;AACA,WAAK,mBAAmB,CAAC,QAAQ;AACjC,UAAI,aAAa;AACjB,UAAI,aAAa;AACjB,eAAS,OAAO,GAAG,QAAQ,CAAC,SAAiB;AAC3C,cAAM,QAAQ,KAAK,SAAS;AAC5B,YAAI,CAAC,KAAK,OAAO,KAAK,CAAC,MAAM,SAAS,oBAAoB,GAAG;AAC3D,eAAK,KAAK,KAAK,KAAK;AAAA,QACtB;AACA,YAAI,CAAC,cAAc,MAAM,SAAS,gBAAgB,GAAG;AACnD,uBAAa;AAAA,QACf;AACA,YAAI,CAAC,cAAc,MAAM,SAAS,gBAAgB,GAAG;AACnD,uBAAa;AAAA,QACf;AACA,YAAI,CAAC,KAAK,OAAO,KAAK,cAAc,YAAY;AAC9C,eAAK,KAAK,KAAK,MAAM;AACrB,eAAK,OAAO,IAAI,IAAI;AACpB,kBAAQ,EAAE,UAAU,gBAAgB,CAAC;AAAA,QACvC;AACA,YAAI,CAAC,KAAK,UAAU;AAClB,gBAAM,SAAS,MAAM,MAAM,oBAAoB;AAC/C,cAAI,QAAQ;AACV,iBAAK,WAAW,OAAO,CAAC;AAAA,UAC1B;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AAAA,EACA,MAAM,UAAU;AACd,UAAM,MAA2B,CAAC;AAClC,UAAM,OAAO,KAAK,QAAQ,EAAE;AAC5B,QAAI,MAAM;AACR,UAAI,4BAA4B,IAAI;AAAA,IACtC;AACA,WAAO;AAAA,EACT;AAAA,EACA,MAAM,cAAc;AAClB,UAAM,OAAiB,CAAC;AACxB,UAAM,aAAa,KAAK,QAAQ,EAAE;AAClC,QAAI,YAAY;AACd,WAAK,KAAK,iBAAiB,UAAU;AAAA,IACvC;AACA,WAAO;AAAA,EACT;AAAA,EAEA,MAAyB,aAA0C;AACjE,UAAM,KAAK,WAAW;AACtB,QAAI,CAAC,KAAK,OAAO,GAAG;AAClB,aAAO;AAAA,IACT;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAES,OAAO;AACd,QAAI;AACF,YAAM,KAAK;AAAA,IACb,SAAS,OAAO;AACd,WAAK,KAAK,MAAM,KAAK;AAAA,IACvB,UAAE;AACA,WAAK,kBAAkB;AACvB,WAAK,OAAO,IAAI,KAAK;AAAA,IACvB;AAAA,EACF;AAAA,EACA;AAAA,EAEA,MAAM,aAAa,SAEhB;AACD,UAAM,KAAK,KAAK;AAChB,SAAK,oBAAoB,MAAM;AAC/B,UAAM,KAAK,IAAI,gBAAgB;AAC/B,SAAK,qBAAqB;AAC1B,QAAI;AACF,YAAM,KAAK;AAAA,QACT;AAAA,UACE,QAAQ;AAAA,UACR,SAAS,KAAK,QAAQ,EAAE;AAAA,UACxB,UAAU,cAAc;AAAA,QAC1B;AAAA,QACA;AAAA,UACE,QAAQ,KAAK,QAAQ,EAAE;AAAA,UACvB,iBAAiB,SAAS;AAAA,UAC1B,UAAU;AAAA,UACV,QAAQ,GAAG;AAAA,QACb;AAAA,MACF;AAAA,IACF,SAAS,OAAO;AACd,UAAI,GAAG,OAAO,SAAS;AACrB;AAAA,MACF;AACA,YAAM;AAAA,IACR;AAAA,EACF;AACF;",
6
+ "names": ["computed", "inject", "inject", "computed"]
7
+ }
@@ -0,0 +1,6 @@
1
+ export declare class BatchQueue<INPUT, RETURN> {
2
+ #private;
3
+ constructor(fn: (input: INPUT[]) => Promise<RETURN[]>);
4
+ push(value: INPUT): Promise<RETURN>;
5
+ then<T>(promise: Promise<T>): Promise<Awaited<T>>;
6
+ }
@@ -0,0 +1,10 @@
1
+ import { queueAsPromised } from 'fastq';
2
+ /** 实现类似chunk 调用 */
3
+ export declare class CacheQueue<Data> {
4
+ #private;
5
+ queue: queueAsPromised<Data[], any>;
6
+ count: number;
7
+ constructor(queue: queueAsPromised<Data[], any>, count: number);
8
+ push(data: Data): void;
9
+ complete(): void;
10
+ }
@@ -0,0 +1 @@
1
+ export declare function deepClone<T>(data: T): T;
@@ -0,0 +1,3 @@
1
+ import { Text2Vec } from './type';
2
+ export type Text2VecItem = (value: string, collectionName: string) => Promise<number[]>;
3
+ export declare function runInEmbeddingContext<RETUREN>(fn: (t2v: Text2VecItem) => RETUREN, t2v: Text2Vec): Promise<RETUREN>;
@@ -0,0 +1,2 @@
1
+ export declare const UUID_NS = "4c394ecc-764e-46ea-a770-f21a7c10aee1";
2
+ export declare function getHash(content: string): string;
@@ -0,0 +1,5 @@
1
+ export declare function html2Text(html: string, config: {
2
+ assetFolder: string;
3
+ useOcr?: boolean;
4
+ ocrFn?: (data: string) => Promise<string>;
5
+ }): Promise<string>;
@@ -0,0 +1,10 @@
1
+ export * from './promise';
2
+ export * from './html-to-text';
3
+ export * from './is-truthy';
4
+ export * from './cache-queue';
5
+ export * from './batch-queue';
6
+ export * from './clone';
7
+ export * from './embedding-queue';
8
+ export * from './log.service';
9
+ export * from './get-hash';
10
+ export * from './uniq-object-key';
@@ -0,0 +1 @@
1
+ export declare function isTruthy<T>(value?: T | undefined | null | false): value is T;
@@ -0,0 +1,6 @@
1
+ import { InjectionToken } from 'static-injector';
2
+ export declare const LogToken: InjectionToken<{
3
+ info: (...args: any) => void;
4
+ warn: (...args: any) => void;
5
+ error: (...args: any) => void;
6
+ }>;
@@ -0,0 +1,5 @@
1
+ export declare function withResolvers<T = void>(): {
2
+ promise: Promise<T>;
3
+ resolve: (value: T | PromiseLike<T>) => void;
4
+ reject: (reason?: any) => void;
5
+ };
package/util/type.d.ts ADDED
@@ -0,0 +1 @@
1
+ export type Text2Vec = <T extends string | string[]>(value: T, collectionName: string) => Promise<T extends string ? number[] : number[][]>;
@@ -0,0 +1 @@
1
+ export declare function getUniqueObjectKey(object: Record<string, any>): string;
package/util.mjs ADDED
@@ -0,0 +1,219 @@
1
+ // packages/util/promise.ts
2
+ function withResolvers() {
3
+ let resolve;
4
+ let reject;
5
+ const promise = new Promise((res, rej) => {
6
+ resolve = res;
7
+ reject = rej;
8
+ });
9
+ return { promise, resolve, reject };
10
+ }
11
+
12
+ // packages/util/html-to-text/index.ts
13
+ import { htmlToText } from "html-to-text";
14
+ import { innerText } from "domutils";
15
+ import { v4 } from "uuid";
16
+ import MS from "magic-string";
17
+ import { path } from "@cyia/vfs2";
18
+ function _html2Text(text, useOcr) {
19
+ const options = {
20
+ formatters: {
21
+ anchor: (elem, walk, builder, formatOptions) => {
22
+ const a = innerText(elem);
23
+ builder.addInline(a, { noWordTransform: true });
24
+ }
25
+ }
26
+ };
27
+ const imageObject = {};
28
+ if (useOcr) {
29
+ options.formatters["image"] = (elem, walk, builder, formatOptions) => {
30
+ const src = elem.attribs?.src;
31
+ if (!src) {
32
+ return;
33
+ }
34
+ const id = v4();
35
+ imageObject[id] = src;
36
+ builder.addInline(`__{{${id}}}__`, { noWordTransform: true });
37
+ };
38
+ } else {
39
+ options.formatters["image"] = (elem, walk, builder, formatOptions) => {
40
+ };
41
+ }
42
+ return {
43
+ text: htmlToText(text, options),
44
+ imageObject
45
+ };
46
+ }
47
+ async function ocrImage(imageId2Path, assetFolder, ocrFn) {
48
+ const obj = {};
49
+ for (const key in imageId2Path) {
50
+ const filePath = imageId2Path[key];
51
+ obj[key] = await ocrFn(
52
+ filePath.startsWith(`data:image/`) ? filePath : path.join(assetFolder, filePath)
53
+ );
54
+ }
55
+ return obj;
56
+ }
57
+ function replaceImage(text, imageId2Text) {
58
+ const reg = /__{{([0-9a-z\-]{36})}}__/dg;
59
+ if (!Object.keys(imageId2Text).length) {
60
+ return text;
61
+ }
62
+ let match;
63
+ const ms = new MS(text);
64
+ while (match = reg.exec(text)) {
65
+ const range = match.indices[0];
66
+ ms.update(range[0], range[1], imageId2Text[match[1]] || "");
67
+ }
68
+ return ms.toString();
69
+ }
70
+ async function html2Text(html, config) {
71
+ const { text, imageObject } = _html2Text(html, config.useOcr);
72
+ if (config.useOcr) {
73
+ const obj = await ocrImage(imageObject, config.assetFolder, config.ocrFn);
74
+ return replaceImage(text, obj);
75
+ }
76
+ return text;
77
+ }
78
+
79
+ // packages/util/is-truthy.ts
80
+ function isTruthy(value) {
81
+ return !!value;
82
+ }
83
+
84
+ // packages/util/cache-queue.ts
85
+ var CacheQueue = class {
86
+ constructor(queue, count) {
87
+ this.queue = queue;
88
+ this.count = count;
89
+ }
90
+ #list = [];
91
+ push(data) {
92
+ this.#list.push(data);
93
+ if (this.count === this.#list.length) {
94
+ this.queue.push(this.#list);
95
+ this.#list = [];
96
+ return;
97
+ }
98
+ }
99
+ complete() {
100
+ if (this.#list.length) {
101
+ this.queue.push(this.#list);
102
+ this.#list = [];
103
+ }
104
+ }
105
+ };
106
+
107
+ // packages/util/batch-queue.ts
108
+ import { promise as fastq } from "fastq";
109
+ var BatchQueue = class {
110
+ #list = [];
111
+ #fn;
112
+ constructor(fn) {
113
+ this.#fn = fastq(
114
+ (input) => fn(input.list).then((result) => {
115
+ input.resolve(result);
116
+ }),
117
+ 999999999
118
+ );
119
+ }
120
+ #p = withResolvers();
121
+ push(value) {
122
+ const p = this.#p;
123
+ const index = this.#list.length;
124
+ this.#list.push(value);
125
+ this.#delayEnd();
126
+ return p.promise.then((list) => list[index]);
127
+ }
128
+ #complete() {
129
+ if (this.#list.length) {
130
+ const p = this.#p;
131
+ this.#fn.push({ list: this.#list, resolve: p.resolve });
132
+ this.#p = withResolvers();
133
+ this.#list = [];
134
+ }
135
+ }
136
+ then(promise) {
137
+ return Promise.all([promise, this.#complete()]).then(([result]) => result);
138
+ }
139
+ #delayEndId;
140
+ #clearDelayEnd() {
141
+ if (typeof this.#delayEndId === "number") {
142
+ clearTimeout(this.#delayEndId);
143
+ this.#delayEndId = void 0;
144
+ }
145
+ }
146
+ #delayEnd() {
147
+ this.#clearDelayEnd();
148
+ this.#delayEndId = setTimeout(() => {
149
+ this.#end();
150
+ }, 20);
151
+ }
152
+ #end() {
153
+ if (this.#list.length) {
154
+ const p = this.#p;
155
+ this.#fn.push({ list: this.#list, resolve: p.resolve });
156
+ this.#p = withResolvers();
157
+ this.#list = [];
158
+ }
159
+ }
160
+ };
161
+
162
+ // packages/util/clone.ts
163
+ import rfdc from "rfdc";
164
+ var clone = rfdc();
165
+ function deepClone(data) {
166
+ return clone(data);
167
+ }
168
+
169
+ // packages/util/embedding-queue.ts
170
+ async function runInEmbeddingContext(fn, t2v) {
171
+ const map = /* @__PURE__ */ new Map();
172
+ const result = fn((text, collectionName) => {
173
+ let instance = map.get(collectionName);
174
+ if (!instance) {
175
+ instance = new BatchQueue((str) => t2v(str, collectionName));
176
+ map.set(collectionName, instance);
177
+ }
178
+ return instance.push(text);
179
+ });
180
+ return result;
181
+ }
182
+
183
+ // packages/util/log.service.ts
184
+ import { InjectionToken } from "static-injector";
185
+ var LogToken = new InjectionToken("Log");
186
+
187
+ // packages/util/get-hash.ts
188
+ import { createHash } from "node:crypto";
189
+ import { v5 } from "uuid";
190
+ var UUID_NS = "4c394ecc-764e-46ea-a770-f21a7c10aee1";
191
+ function getHash(content) {
192
+ return v5(createHash("md5").update(content).digest("hex"), UUID_NS);
193
+ }
194
+
195
+ // packages/util/uniq-object-key.ts
196
+ function getUniqueObjectKey(object) {
197
+ return JSON.stringify(object, replacer);
198
+ }
199
+ var replacer = (key, value) => value instanceof Object && !Array.isArray(value) ? Object.keys(value).sort().reduce(
200
+ (sorted, key2) => {
201
+ sorted[key2] = value[key2];
202
+ return sorted;
203
+ },
204
+ {}
205
+ ) : value;
206
+ export {
207
+ BatchQueue,
208
+ CacheQueue,
209
+ LogToken,
210
+ UUID_NS,
211
+ deepClone,
212
+ getHash,
213
+ getUniqueObjectKey,
214
+ html2Text,
215
+ isTruthy,
216
+ runInEmbeddingContext,
217
+ withResolvers
218
+ };
219
+ //# sourceMappingURL=util.mjs.map
package/util.mjs.map ADDED
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["../packages/util/promise.ts", "../packages/util/html-to-text/index.ts", "../packages/util/is-truthy.ts", "../packages/util/cache-queue.ts", "../packages/util/batch-queue.ts", "../packages/util/clone.ts", "../packages/util/embedding-queue.ts", "../packages/util/log.service.ts", "../packages/util/get-hash.ts", "../packages/util/uniq-object-key.ts"],
4
+ "sourcesContent": ["export function withResolvers<T = void>() {\n let resolve: (value: T | PromiseLike<T>) => void;\n let reject: (reason?: any) => void;\n const promise = new Promise<T>((res, rej) => {\n resolve = res;\n reject = rej;\n });\n return { promise, resolve: resolve!, reject: reject! };\n}\n", "import { HtmlToTextOptions, htmlToText } from 'html-to-text';\nimport { innerText } from 'domutils';\nimport { v4 } from 'uuid';\nimport MS from 'magic-string';\nimport { path } from '@cyia/vfs2';\n\nfunction _html2Text(text: string, useOcr?: boolean) {\n const options: HtmlToTextOptions = {\n formatters: {\n anchor: (elem, walk, builder, formatOptions) => {\n const a = innerText(elem as any);\n builder.addInline(a, { noWordTransform: true });\n },\n },\n };\n const imageObject = {} as Record<string, string>;\n if (useOcr) {\n options.formatters!['image'] = (elem, walk, builder, formatOptions) => {\n const src = elem.attribs?.src as string | undefined;\n if (!src) {\n return;\n }\n const id = v4();\n imageObject[id] = src;\n builder.addInline(`__{{${id}}}__`, { noWordTransform: true });\n };\n } else {\n options.formatters!['image'] = (elem, walk, builder, formatOptions) => {};\n }\n return {\n text: htmlToText(text, options),\n imageObject,\n };\n}\nasync function ocrImage(\n imageId2Path: Record<string, string>,\n assetFolder: string,\n ocrFn: (str: string) => Promise<string>,\n) {\n const obj: Record<string, string> = {};\n for (const key in imageId2Path) {\n const filePath = imageId2Path[key];\n obj[key] = await ocrFn(\n filePath.startsWith(`data:image/`)\n ? filePath\n : path.join(assetFolder, filePath),\n );\n }\n return obj;\n}\nfunction replaceImage(text: string, imageId2Text: Record<string, string>) {\n const reg = /__{{([0-9a-z\\-]{36})}}__/dg;\n if (!Object.keys(imageId2Text).length) {\n return text;\n }\n let match: RegExpExecArray | null;\n const ms = new MS(text);\n\n while ((match = reg.exec(text))) {\n const range = match.indices![0];\n ms.update(range[0], range[1], imageId2Text[match[1]] || '');\n }\n return ms.toString();\n}\nexport async function html2Text(\n html: string,\n config: {\n assetFolder: string;\n useOcr?: boolean;\n ocrFn?: (data: string) => Promise<string>;\n },\n) {\n const { text, imageObject } = _html2Text(html, config.useOcr);\n if (config.useOcr) {\n const obj = await ocrImage(imageObject, config.assetFolder, config.ocrFn!);\n return replaceImage(text, obj);\n }\n return text;\n}\n", "export function isTruthy<T>(value?: T | undefined | null | false): value is T {\n return !!value;\n}\n", "import { queueAsPromised } from 'fastq';\n/** 实现类似chunk 调用 */\nexport class CacheQueue<Data> {\n #list: Data[] = [];\n constructor(\n public queue: queueAsPromised<Data[], any>,\n public count: number,\n ) {}\n push(data: Data) {\n this.#list.push(data);\n if (this.count === this.#list.length) {\n this.queue.push(this.#list);\n this.#list = [];\n return;\n }\n }\n complete() {\n if (this.#list.length) {\n this.queue.push(this.#list);\n this.#list = [];\n }\n }\n}\n", "import { withResolvers } from './promise';\nimport { promise as fastq } from 'fastq';\n\nexport class BatchQueue<INPUT, RETURN> {\n #list: INPUT[] = [];\n #fn;\n\n constructor(fn: (input: INPUT[]) => Promise<RETURN[]>) {\n this.#fn = fastq(\n (input: {\n list: INPUT[];\n resolve: (value: RETURN[] | PromiseLike<RETURN[]>) => void;\n }) =>\n fn(input.list).then((result) => {\n input.resolve(result);\n }),\n 999999999,\n );\n }\n #p = withResolvers<RETURN[]>();\n\n push(value: INPUT) {\n const p = this.#p;\n const index = this.#list.length;\n this.#list.push(value);\n this.#delayEnd();\n return p.promise.then((list) => list[index]);\n }\n\n #complete() {\n if (this.#list.length) {\n const p = this.#p;\n this.#fn.push({ list: this.#list, resolve: p.resolve });\n this.#p = withResolvers();\n this.#list = [];\n }\n }\n then<T>(promise: Promise<T>) {\n return Promise.all([promise, this.#complete()]).then(([result]) => result);\n }\n #delayEndId: any;\n\n #clearDelayEnd() {\n if (typeof this.#delayEndId === 'number') {\n clearTimeout(this.#delayEndId);\n this.#delayEndId = undefined;\n }\n }\n #delayEnd() {\n this.#clearDelayEnd();\n this.#delayEndId = setTimeout(() => {\n this.#end();\n }, 20);\n }\n #end() {\n if (this.#list.length) {\n const p = this.#p;\n this.#fn.push({ list: this.#list, resolve: p.resolve });\n this.#p = withResolvers();\n this.#list = [];\n }\n }\n}\n", "import rfdc from 'rfdc';\nconst clone = rfdc();\nexport function deepClone<T>(data: T): T {\n return clone(data);\n}\n", "import { BatchQueue } from './batch-queue';\nimport { Text2Vec } from './type';\nexport type Text2VecItem = (\n value: string,\n collectionName: string,\n) => Promise<number[]>;\nexport async function runInEmbeddingContext<RETUREN>(\n fn: (t2v: Text2VecItem) => RETUREN,\n t2v: Text2Vec,\n) {\n const map = new Map<string, BatchQueue<string, number[]>>();\n const result = fn((text: string, collectionName: string) => {\n let instance = map.get(collectionName);\n if (!instance) {\n instance = new BatchQueue((str) => t2v(str, collectionName));\n map.set(collectionName, instance);\n }\n return instance.push(text);\n });\n\n return result;\n}\n", "import { InjectionToken } from 'static-injector';\n\nexport const LogToken = new InjectionToken<{\n info: (...args: any) => void;\n warn: (...args: any) => void;\n error: (...args: any) => void;\n}>('Log');\n", "import { createHash } from 'node:crypto';\nimport { v5 } from 'uuid';\n// 用于id生成,修复直接用md5生成的hash没有`-`\nexport const UUID_NS = '4c394ecc-764e-46ea-a770-f21a7c10aee1';\n\nexport function getHash(content: string) {\n return v5(createHash('md5').update(content).digest('hex'), UUID_NS);\n}\n", "export function getUniqueObjectKey(object: Record<string, any>) {\n return JSON.stringify(object, replacer);\n}\n\nconst replacer = (key: string, value: any) =>\n value instanceof Object && !Array.isArray(value)\n ? Object.keys(value)\n .sort()\n .reduce(\n (sorted, key) => {\n sorted[key] = value[key];\n return sorted;\n },\n {} as Record<string, any>,\n )\n : value;\n"],
5
+ "mappings": ";AAAO,SAAS,gBAA0B;AACxC,MAAI;AACJ,MAAI;AACJ,QAAM,UAAU,IAAI,QAAW,CAAC,KAAK,QAAQ;AAC3C,cAAU;AACV,aAAS;AAAA,EACX,CAAC;AACD,SAAO,EAAE,SAAS,SAAmB,OAAgB;AACvD;;;ACRA,SAA4B,kBAAkB;AAC9C,SAAS,iBAAiB;AAC1B,SAAS,UAAU;AACnB,OAAO,QAAQ;AACf,SAAS,YAAY;AAErB,SAAS,WAAW,MAAc,QAAkB;AAClD,QAAM,UAA6B;AAAA,IACjC,YAAY;AAAA,MACV,QAAQ,CAAC,MAAM,MAAM,SAAS,kBAAkB;AAC9C,cAAM,IAAI,UAAU,IAAW;AAC/B,gBAAQ,UAAU,GAAG,EAAE,iBAAiB,KAAK,CAAC;AAAA,MAChD;AAAA,IACF;AAAA,EACF;AACA,QAAM,cAAc,CAAC;AACrB,MAAI,QAAQ;AACV,YAAQ,WAAY,OAAO,IAAI,CAAC,MAAM,MAAM,SAAS,kBAAkB;AACrE,YAAM,MAAM,KAAK,SAAS;AAC1B,UAAI,CAAC,KAAK;AACR;AAAA,MACF;AACA,YAAM,KAAK,GAAG;AACd,kBAAY,EAAE,IAAI;AAClB,cAAQ,UAAU,OAAO,EAAE,QAAQ,EAAE,iBAAiB,KAAK,CAAC;AAAA,IAC9D;AAAA,EACF,OAAO;AACL,YAAQ,WAAY,OAAO,IAAI,CAAC,MAAM,MAAM,SAAS,kBAAkB;AAAA,IAAC;AAAA,EAC1E;AACA,SAAO;AAAA,IACL,MAAM,WAAW,MAAM,OAAO;AAAA,IAC9B;AAAA,EACF;AACF;AACA,eAAe,SACb,cACA,aACA,OACA;AACA,QAAM,MAA8B,CAAC;AACrC,aAAW,OAAO,cAAc;AAC9B,UAAM,WAAW,aAAa,GAAG;AACjC,QAAI,GAAG,IAAI,MAAM;AAAA,MACf,SAAS,WAAW,aAAa,IAC7B,WACA,KAAK,KAAK,aAAa,QAAQ;AAAA,IACrC;AAAA,EACF;AACA,SAAO;AACT;AACA,SAAS,aAAa,MAAc,cAAsC;AACxE,QAAM,MAAM;AACZ,MAAI,CAAC,OAAO,KAAK,YAAY,EAAE,QAAQ;AACrC,WAAO;AAAA,EACT;AACA,MAAI;AACJ,QAAM,KAAK,IAAI,GAAG,IAAI;AAEtB,SAAQ,QAAQ,IAAI,KAAK,IAAI,GAAI;AAC/B,UAAM,QAAQ,MAAM,QAAS,CAAC;AAC9B,OAAG,OAAO,MAAM,CAAC,GAAG,MAAM,CAAC,GAAG,aAAa,MAAM,CAAC,CAAC,KAAK,EAAE;AAAA,EAC5D;AACA,SAAO,GAAG,SAAS;AACrB;AACA,eAAsB,UACpB,MACA,QAKA;AACA,QAAM,EAAE,MAAM,YAAY,IAAI,WAAW,MAAM,OAAO,MAAM;AAC5D,MAAI,OAAO,QAAQ;AACjB,UAAM,MAAM,MAAM,SAAS,aAAa,OAAO,aAAa,OAAO,KAAM;AACzE,WAAO,aAAa,MAAM,GAAG;AAAA,EAC/B;AACA,SAAO;AACT;;;AC9EO,SAAS,SAAY,OAAkD;AAC5E,SAAO,CAAC,CAAC;AACX;;;ACAO,IAAM,aAAN,MAAuB;AAAA,EAE5B,YACS,OACA,OACP;AAFO;AACA;AAAA,EACN;AAAA,EAJH,QAAgB,CAAC;AAAA,EAKjB,KAAK,MAAY;AACf,SAAK,MAAM,KAAK,IAAI;AACpB,QAAI,KAAK,UAAU,KAAK,MAAM,QAAQ;AACpC,WAAK,MAAM,KAAK,KAAK,KAAK;AAC1B,WAAK,QAAQ,CAAC;AACd;AAAA,IACF;AAAA,EACF;AAAA,EACA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,WAAK,MAAM,KAAK,KAAK,KAAK;AAC1B,WAAK,QAAQ,CAAC;AAAA,IAChB;AAAA,EACF;AACF;;;ACrBA,SAAS,WAAW,aAAa;AAE1B,IAAM,aAAN,MAAgC;AAAA,EACrC,QAAiB,CAAC;AAAA,EAClB;AAAA,EAEA,YAAY,IAA2C;AACrD,SAAK,MAAM;AAAA,MACT,CAAC,UAIC,GAAG,MAAM,IAAI,EAAE,KAAK,CAAC,WAAW;AAC9B,cAAM,QAAQ,MAAM;AAAA,MACtB,CAAC;AAAA,MACH;AAAA,IACF;AAAA,EACF;AAAA,EACA,KAAK,cAAwB;AAAA,EAE7B,KAAK,OAAc;AACjB,UAAM,IAAI,KAAK;AACf,UAAM,QAAQ,KAAK,MAAM;AACzB,SAAK,MAAM,KAAK,KAAK;AACrB,SAAK,UAAU;AACf,WAAO,EAAE,QAAQ,KAAK,CAAC,SAAS,KAAK,KAAK,CAAC;AAAA,EAC7C;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,KAAK;AACf,WAAK,IAAI,KAAK,EAAE,MAAM,KAAK,OAAO,SAAS,EAAE,QAAQ,CAAC;AACtD,WAAK,KAAK,cAAc;AACxB,WAAK,QAAQ,CAAC;AAAA,IAChB;AAAA,EACF;AAAA,EACA,KAAQ,SAAqB;AAC3B,WAAO,QAAQ,IAAI,CAAC,SAAS,KAAK,UAAU,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,MAAM,MAAM,MAAM;AAAA,EAC3E;AAAA,EACA;AAAA,EAEA,iBAAiB;AACf,QAAI,OAAO,KAAK,gBAAgB,UAAU;AACxC,mBAAa,KAAK,WAAW;AAC7B,WAAK,cAAc;AAAA,IACrB;AAAA,EACF;AAAA,EACA,YAAY;AACV,SAAK,eAAe;AACpB,SAAK,cAAc,WAAW,MAAM;AAClC,WAAK,KAAK;AAAA,IACZ,GAAG,EAAE;AAAA,EACP;AAAA,EACA,OAAO;AACL,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,KAAK;AACf,WAAK,IAAI,KAAK,EAAE,MAAM,KAAK,OAAO,SAAS,EAAE,QAAQ,CAAC;AACtD,WAAK,KAAK,cAAc;AACxB,WAAK,QAAQ,CAAC;AAAA,IAChB;AAAA,EACF;AACF;;;AC9DA,OAAO,UAAU;AACjB,IAAM,QAAQ,KAAK;AACZ,SAAS,UAAa,MAAY;AACvC,SAAO,MAAM,IAAI;AACnB;;;ACEA,eAAsB,sBACpB,IACA,KACA;AACA,QAAM,MAAM,oBAAI,IAA0C;AAC1D,QAAM,SAAS,GAAG,CAAC,MAAc,mBAA2B;AAC1D,QAAI,WAAW,IAAI,IAAI,cAAc;AACrC,QAAI,CAAC,UAAU;AACb,iBAAW,IAAI,WAAW,CAAC,QAAQ,IAAI,KAAK,cAAc,CAAC;AAC3D,UAAI,IAAI,gBAAgB,QAAQ;AAAA,IAClC;AACA,WAAO,SAAS,KAAK,IAAI;AAAA,EAC3B,CAAC;AAED,SAAO;AACT;;;ACrBA,SAAS,sBAAsB;AAExB,IAAM,WAAW,IAAI,eAIzB,KAAK;;;ACNR,SAAS,kBAAkB;AAC3B,SAAS,UAAU;AAEZ,IAAM,UAAU;AAEhB,SAAS,QAAQ,SAAiB;AACvC,SAAO,GAAG,WAAW,KAAK,EAAE,OAAO,OAAO,EAAE,OAAO,KAAK,GAAG,OAAO;AACpE;;;ACPO,SAAS,mBAAmB,QAA6B;AAC9D,SAAO,KAAK,UAAU,QAAQ,QAAQ;AACxC;AAEA,IAAM,WAAW,CAAC,KAAa,UAC7B,iBAAiB,UAAU,CAAC,MAAM,QAAQ,KAAK,IAC3C,OAAO,KAAK,KAAK,EACd,KAAK,EACL;AAAA,EACC,CAAC,QAAQA,SAAQ;AACf,WAAOA,IAAG,IAAI,MAAMA,IAAG;AACvB,WAAO;AAAA,EACT;AAAA,EACA,CAAC;AACH,IACF;",
6
+ "names": ["key"]
7
+ }
@@ -0,0 +1,28 @@
1
+ import fs from 'fs';
2
+ import { InitOptions } from './set-transformers-config';
3
+ export interface NodeProxy {
4
+ match: (request: string) => Promise<ArrayBuffer | undefined>;
5
+ put: (request: string, arraybuffer: ArrayBuffer) => Promise<void>;
6
+ }
7
+ export declare class FileProxyCache {
8
+ #private;
9
+ constructor(initOptions: InitOptions);
10
+ match(request: string): Promise<FileResponse | undefined>;
11
+ put(request: string, response: Response | FileResponse): Promise<void>;
12
+ }
13
+ declare class FileResponse {
14
+ filePath: string;
15
+ headers: import("undici-types").Headers;
16
+ exists: boolean;
17
+ status: number;
18
+ statusText: string;
19
+ body: fs.ReadStream;
20
+ constructor(filePath: string);
21
+ updateContentType(): void;
22
+ clone(): FileResponse;
23
+ arrayBuffer(): Promise<ArrayBuffer>;
24
+ blob(): Promise<Blob>;
25
+ text(): Promise<string>;
26
+ json(): Promise<object>;
27
+ }
28
+ export {};