@shenghuabi/knowledge 1.0.23 → 1.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -6
- package/worker/custom-cache.d.ts +2 -19
- package/worker/reranker.mjs +15 -47
- package/worker/reranker.mjs.map +2 -2
- package/worker/text2vec.mjs +15 -47
- package/worker/text2vec.mjs.map +2 -2
package/package.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@shenghuabi/knowledge",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.24",
|
|
4
4
|
"description": "知识库",
|
|
5
5
|
"author": "wszgrcy",
|
|
6
6
|
"sideEffects": false,
|
|
7
7
|
"peerDependencies": {
|
|
8
|
-
"@cyia/vfs2": "^1.5.
|
|
8
|
+
"@cyia/vfs2": "^1.5.3",
|
|
9
9
|
"handlebars": "^4.7.8",
|
|
10
10
|
"lru-cache": "^11.2.1",
|
|
11
11
|
"rfdc": "^1.4.1",
|
|
@@ -19,14 +19,14 @@
|
|
|
19
19
|
"html-to-text": "^9.0.5",
|
|
20
20
|
"fastq": "^1.19.1",
|
|
21
21
|
"sharp": "0.34.2",
|
|
22
|
-
"@cyia/dl": "^1.5.
|
|
23
|
-
"@cyia/external-call": "^1.5.
|
|
22
|
+
"@cyia/dl": "^1.5.3",
|
|
23
|
+
"@cyia/external-call": "^1.5.3"
|
|
24
24
|
},
|
|
25
25
|
"dependencies": {
|
|
26
26
|
"html-entities": "^2.6.0",
|
|
27
27
|
"@qdrant/qdrant-js": "1.15.1",
|
|
28
28
|
"@cyia/mdict-reader": "^1.0.9",
|
|
29
|
-
"@langchain/community": "
|
|
29
|
+
"@langchain/community": "1.1.1",
|
|
30
30
|
"@langchain/core": "1.1.8",
|
|
31
31
|
"@langchain/textsplitters": "^1.0.1",
|
|
32
32
|
"@xhmikosr/decompress-tarbz2": "^8.0.2",
|
|
@@ -47,7 +47,7 @@
|
|
|
47
47
|
"@gutenye/ocr-common": "^1.4.8",
|
|
48
48
|
"bmp-js": "^0.1.0",
|
|
49
49
|
"onnxruntime-node": "1.20.1",
|
|
50
|
-
"@huggingface/transformers": "
|
|
50
|
+
"@huggingface/transformers": "4.2.0",
|
|
51
51
|
"xlsx": "^0.18.5",
|
|
52
52
|
"pdfjs-dist": "^5.4.449"
|
|
53
53
|
},
|
package/worker/custom-cache.d.ts
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import fs from 'fs';
|
|
2
1
|
import { InitOptions } from './set-transformers-config';
|
|
3
2
|
export interface NodeProxy {
|
|
4
3
|
match: (request: string) => Promise<ArrayBuffer | undefined>;
|
|
@@ -7,22 +6,6 @@ export interface NodeProxy {
|
|
|
7
6
|
export declare class FileProxyCache {
|
|
8
7
|
#private;
|
|
9
8
|
constructor(initOptions: InitOptions);
|
|
10
|
-
match(request: string): Promise<
|
|
11
|
-
put(request: string, response: Response
|
|
9
|
+
match(request: string): Promise<Response | undefined>;
|
|
10
|
+
put(request: string, response: Response): Promise<void>;
|
|
12
11
|
}
|
|
13
|
-
declare class FileResponse {
|
|
14
|
-
filePath: string;
|
|
15
|
-
headers: import("undici-types").Headers;
|
|
16
|
-
exists: boolean;
|
|
17
|
-
status: number;
|
|
18
|
-
statusText: string;
|
|
19
|
-
body: fs.ReadStream;
|
|
20
|
-
constructor(filePath: string);
|
|
21
|
-
updateContentType(): void;
|
|
22
|
-
clone(): FileResponse;
|
|
23
|
-
arrayBuffer(): Promise<ArrayBuffer>;
|
|
24
|
-
blob(): Promise<Blob>;
|
|
25
|
-
text(): Promise<string>;
|
|
26
|
-
json(): Promise<object>;
|
|
27
|
-
}
|
|
28
|
-
export {};
|
package/worker/reranker.mjs
CHANGED
|
@@ -29,6 +29,20 @@ var FileProxyCache = class {
|
|
|
29
29
|
this.#downloadConfig = initOptions.downloadConfig;
|
|
30
30
|
this.#initOptions = initOptions;
|
|
31
31
|
}
|
|
32
|
+
async #createResponse(filePath) {
|
|
33
|
+
const stats = await fs.promises.stat(filePath);
|
|
34
|
+
const extension = filePath.split(".").pop().toLowerCase();
|
|
35
|
+
const contentType = CONTENT_TYPE_MAP[extension] ?? "application/octet-stream";
|
|
36
|
+
const stream = fs.createReadStream(filePath);
|
|
37
|
+
return new Response(stream, {
|
|
38
|
+
status: 200,
|
|
39
|
+
statusText: "OK",
|
|
40
|
+
headers: {
|
|
41
|
+
"content-type": contentType,
|
|
42
|
+
"content-length": stats.size.toString()
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
}
|
|
32
46
|
async match(request) {
|
|
33
47
|
let filePath;
|
|
34
48
|
if (request.startsWith("http")) {
|
|
@@ -54,7 +68,7 @@ var FileProxyCache = class {
|
|
|
54
68
|
}
|
|
55
69
|
const exists = await this.#vfs.exists(filePath);
|
|
56
70
|
if (exists) {
|
|
57
|
-
return
|
|
71
|
+
return this.#createResponse(filePath);
|
|
58
72
|
}
|
|
59
73
|
return void 0;
|
|
60
74
|
}
|
|
@@ -62,7 +76,6 @@ var FileProxyCache = class {
|
|
|
62
76
|
throw new Error("no put");
|
|
63
77
|
}
|
|
64
78
|
};
|
|
65
|
-
var decoder = new TextDecoder("utf-8");
|
|
66
79
|
var CONTENT_TYPE_MAP = {
|
|
67
80
|
txt: "text/plain",
|
|
68
81
|
html: "text/html",
|
|
@@ -74,51 +87,6 @@ var CONTENT_TYPE_MAP = {
|
|
|
74
87
|
jpeg: "image/jpeg",
|
|
75
88
|
gif: "image/gif"
|
|
76
89
|
};
|
|
77
|
-
var FileResponse = class _FileResponse {
|
|
78
|
-
filePath;
|
|
79
|
-
headers;
|
|
80
|
-
exists = true;
|
|
81
|
-
status = 200;
|
|
82
|
-
statusText = "OK";
|
|
83
|
-
body;
|
|
84
|
-
constructor(filePath) {
|
|
85
|
-
this.filePath = filePath;
|
|
86
|
-
this.headers = new Headers();
|
|
87
|
-
this.updateContentType();
|
|
88
|
-
this.body = fs.createReadStream(filePath);
|
|
89
|
-
}
|
|
90
|
-
updateContentType() {
|
|
91
|
-
const stats = fs.statSync(this.filePath);
|
|
92
|
-
this.headers.set("content-length", stats.size.toString());
|
|
93
|
-
const extension = this.filePath.toString().split(".").pop().toLowerCase();
|
|
94
|
-
this.headers.set(
|
|
95
|
-
"content-type",
|
|
96
|
-
CONTENT_TYPE_MAP[extension] ?? "application/octet-stream"
|
|
97
|
-
);
|
|
98
|
-
}
|
|
99
|
-
clone() {
|
|
100
|
-
const response = new _FileResponse(this.filePath);
|
|
101
|
-
response.exists = this.exists;
|
|
102
|
-
response.status = this.status;
|
|
103
|
-
response.statusText = this.statusText;
|
|
104
|
-
response.headers = new Headers(this.headers);
|
|
105
|
-
return response;
|
|
106
|
-
}
|
|
107
|
-
async arrayBuffer() {
|
|
108
|
-
return fs.promises.readFile(this.filePath).then((buffer) => buffer.buffer);
|
|
109
|
-
}
|
|
110
|
-
async blob() {
|
|
111
|
-
return new Blob([await this.arrayBuffer()], {
|
|
112
|
-
type: this.headers.get("content-type")
|
|
113
|
-
});
|
|
114
|
-
}
|
|
115
|
-
async text() {
|
|
116
|
-
return decoder.decode(await this.arrayBuffer());
|
|
117
|
-
}
|
|
118
|
-
async json() {
|
|
119
|
-
return JSON.parse(await this.text());
|
|
120
|
-
}
|
|
121
|
-
};
|
|
122
90
|
|
|
123
91
|
// packages/worker/set-transformers-config.ts
|
|
124
92
|
function setTransformersConfig(options) {
|
package/worker/reranker.mjs.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../packages/worker/reranker/index.ts", "../../packages/worker/set-transformers-config.ts", "../../packages/worker/custom-cache.ts"],
|
|
4
|
-
"sourcesContent": ["import {\n AutoModelForSequenceClassification,\n AutoTokenizer,\n} from '@huggingface/transformers';\nimport type {\n Tensor,\n XLMRobertaModel,\n XLMRobertaTokenizer,\n} from '@huggingface/transformers';\n\nimport { InitOptions, setTransformersConfig } from '../set-transformers-config';\n\nclass ReRanderService {\n init = async (options: InitOptions) => {\n if (!this.model || !this.tokenizer) {\n await this.#downloadOnly(options);\n }\n return true;\n };\n convert = async (input: { value: string; docs: string[] }) => {\n const inputs = this.tokenizer!(\n new Array(input.docs.length).fill(input.value),\n {\n text_pair: input.docs,\n padding: true,\n truncation: true,\n },\n );\n const { logits } = await this.model!(inputs);\n return (logits as Tensor)\n .sigmoid()\n .tolist()\n .map(([score], i: number) => ({\n index: i,\n score,\n }))\n .sort((a, b) => b.score - a.score);\n };\n tokenizer?: XLMRobertaTokenizer;\n model?: XLMRobertaModel;\n async #downloadOnly(options: InitOptions) {\n setTransformersConfig(options);\n\n this.tokenizer = await AutoTokenizer.from_pretrained(options.modelName);\n this.model = await AutoModelForSequenceClassification.from_pretrained(\n options.modelName,\n {\n ...options.options,\n } as any,\n );\n }\n}\nconst instance = new ReRanderService();\nconst init = instance.init;\nconst convert = instance.convert;\nexport { init, convert };\n", "import { env, pipeline } from '@huggingface/transformers';\nimport { MessagePort } from 'worker_threads';\nimport { FileProxyCache } from './custom-cache';\nimport type { DownloadConfigType } from '@cyia/external-call';\ntype PipeLineOptions = Partial<NonNullable<Parameters<typeof pipeline>[2]>>;\nexport interface InitOptions {\n /** 文件夹 */\n dir: string;\n /** 模型 */\n modelName: string;\n /** 模型参数 */\n options: PipeLineOptions;\n /**直接链接 */\n remoteHost: string;\n downloadConfig?: DownloadConfigType;\n port?: MessagePort;\n hfToken?: string;\n}\nexport function setTransformersConfig(options: InitOptions) {\n env.useFS = false;\n env.localModelPath = options.dir;\n env.allowLocalModels = false;\n env.allowRemoteModels = true;\n env.cacheDir = options.dir;\n env.customCache = new FileProxyCache(options);\n env.useBrowserCache = false;\n env.useFSCache = true;\n env.useCustomCache = true;\n\n env.remoteHost = `https://${options.remoteHost}`;\n}\n", "import { env } from '@huggingface/transformers';\nimport { createNormalizeVfs, path } from '@cyia/vfs2';\nimport { downloadFile } from '@cyia/dl';\nimport fs from 'fs';\nimport { InitOptions } from './set-transformers-config';\nexport interface NodeProxy {\n match: (request: string) => Promise<ArrayBuffer | undefined>;\n put: (request: string, arraybuffer: ArrayBuffer) => Promise<void>;\n}\nexport class FileProxyCache {\n #path;\n #vfs;\n #sendMessage;\n #modelName;\n #downloadConfig;\n #initOptions;\n constructor(initOptions: InitOptions) {\n this.#modelName = initOptions.modelName;\n this.#sendMessage = (message: any) => {\n initOptions.port?.postMessage({ type: 'progress', message });\n };\n this.#path = initOptions.dir;\n this.#vfs = createNormalizeVfs({ dir: initOptions.dir });\n this.#downloadConfig = initOptions.downloadConfig;\n this.#initOptions = initOptions;\n }\n async match(request: string): Promise<
|
|
5
|
-
"mappings": ";AAAA;AAAA,EACE;AAAA,EACA;AAAA,OACK;;;ACHP,SAAS,OAAAA,YAAqB;;;ACA9B,SAAS,WAAW;AACpB,SAAS,oBAAoB,YAAY;AACzC,SAAS,oBAAoB;AAC7B,OAAO,QAAQ;AAMR,IAAM,iBAAN,MAAqB;AAAA,EAC1B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY,aAA0B;AACpC,SAAK,aAAa,YAAY;AAC9B,SAAK,eAAe,CAAC,YAAiB;AACpC,kBAAY,MAAM,YAAY,EAAE,MAAM,YAAY,QAAQ,CAAC;AAAA,IAC7D;AACA,SAAK,QAAQ,YAAY;AACzB,SAAK,OAAO,mBAAmB,EAAE,KAAK,YAAY,IAAI,CAAC;AACvD,SAAK,kBAAkB,YAAY;AACnC,SAAK,eAAe;AAAA,EACtB;AAAA,EACA,MAAM,MAAM,
|
|
4
|
+
"sourcesContent": ["import {\n AutoModelForSequenceClassification,\n AutoTokenizer,\n} from '@huggingface/transformers';\nimport type {\n Tensor,\n XLMRobertaModel,\n XLMRobertaTokenizer,\n} from '@huggingface/transformers';\n\nimport { InitOptions, setTransformersConfig } from '../set-transformers-config';\n\nclass ReRanderService {\n init = async (options: InitOptions) => {\n if (!this.model || !this.tokenizer) {\n await this.#downloadOnly(options);\n }\n return true;\n };\n convert = async (input: { value: string; docs: string[] }) => {\n const inputs = this.tokenizer!(\n new Array(input.docs.length).fill(input.value),\n {\n text_pair: input.docs,\n padding: true,\n truncation: true,\n },\n );\n const { logits } = await this.model!(inputs);\n return (logits as Tensor)\n .sigmoid()\n .tolist()\n .map(([score], i: number) => ({\n index: i,\n score,\n }))\n .sort((a, b) => b.score - a.score);\n };\n tokenizer?: XLMRobertaTokenizer;\n model?: XLMRobertaModel;\n async #downloadOnly(options: InitOptions) {\n setTransformersConfig(options);\n\n this.tokenizer = await AutoTokenizer.from_pretrained(options.modelName);\n this.model = await AutoModelForSequenceClassification.from_pretrained(\n options.modelName,\n {\n ...options.options,\n } as any,\n );\n }\n}\nconst instance = new ReRanderService();\nconst init = instance.init;\nconst convert = instance.convert;\nexport { init, convert };\n", "import { env, pipeline } from '@huggingface/transformers';\nimport { MessagePort } from 'worker_threads';\nimport { FileProxyCache } from './custom-cache';\nimport type { DownloadConfigType } from '@cyia/external-call';\ntype PipeLineOptions = Partial<NonNullable<Parameters<typeof pipeline>[2]>>;\nexport interface InitOptions {\n /** 文件夹 */\n dir: string;\n /** 模型 */\n modelName: string;\n /** 模型参数 */\n options: PipeLineOptions;\n /**直接链接 */\n remoteHost: string;\n downloadConfig?: DownloadConfigType;\n port?: MessagePort;\n hfToken?: string;\n}\nexport function setTransformersConfig(options: InitOptions) {\n env.useFS = false;\n env.localModelPath = options.dir;\n env.allowLocalModels = false;\n env.allowRemoteModels = true;\n env.cacheDir = options.dir;\n env.customCache = new FileProxyCache(options);\n env.useBrowserCache = false;\n env.useFSCache = true;\n env.useCustomCache = true;\n\n env.remoteHost = `https://${options.remoteHost}`;\n}\n", "import { env } from '@huggingface/transformers';\nimport { createNormalizeVfs, path } from '@cyia/vfs2';\nimport { downloadFile } from '@cyia/dl';\nimport fs from 'fs';\nimport { InitOptions } from './set-transformers-config';\nexport interface NodeProxy {\n match: (request: string) => Promise<ArrayBuffer | undefined>;\n put: (request: string, arraybuffer: ArrayBuffer) => Promise<void>;\n}\nexport class FileProxyCache {\n #path;\n #vfs;\n #sendMessage;\n #modelName;\n #downloadConfig;\n #initOptions;\n constructor(initOptions: InitOptions) {\n this.#modelName = initOptions.modelName;\n this.#sendMessage = (message: any) => {\n initOptions.port?.postMessage({ type: 'progress', message });\n };\n this.#path = initOptions.dir;\n this.#vfs = createNormalizeVfs({ dir: initOptions.dir });\n this.#downloadConfig = initOptions.downloadConfig;\n this.#initOptions = initOptions;\n }\n async #createResponse(filePath: string): Promise<Response> {\n const stats = await fs.promises.stat(filePath);\n const extension = filePath.split('.').pop()!.toLowerCase();\n const contentType =\n (CONTENT_TYPE_MAP as any)[extension] ?? 'application/octet-stream';\n const stream = fs.createReadStream(filePath);\n return new Response(stream, {\n status: 200,\n statusText: 'OK',\n headers: {\n 'content-type': contentType,\n 'content-length': stats.size.toString(),\n },\n });\n }\n async match(request: string): Promise<Response | undefined> {\n let filePath;\n if (request.startsWith('http')) {\n const data = new URL(request);\n filePath = path.join(\n this.#path,\n data.pathname.replace(\n '/' +\n env.remotePathTemplate\n .replaceAll('{model}', this.#modelName)\n .replaceAll('{revision}', encodeURIComponent('main')),\n `/${this.#modelName}/`,\n ),\n );\n\n await downloadFile(request, {\n ...this.#downloadConfig,\n savePath: filePath,\n message: this.#sendMessage,\n headers: {\n token: this.#initOptions?.hfToken ?? '',\n 'software-bbs': 'bbs.shenghuabi.site',\n },\n });\n } else {\n filePath = request;\n }\n const exists = await this.#vfs.exists(filePath);\n if (exists) {\n return this.#createResponse(filePath);\n }\n return undefined;\n }\n\n async put(request: string, response: Response): Promise<void> {\n throw new Error('no put');\n }\n}\nconst CONTENT_TYPE_MAP: Record<string, string> = {\n txt: 'text/plain',\n html: 'text/html',\n css: 'text/css',\n js: 'text/javascript',\n json: 'application/json',\n png: 'image/png',\n jpg: 'image/jpeg',\n jpeg: 'image/jpeg',\n gif: 'image/gif',\n};\n"],
|
|
5
|
+
"mappings": ";AAAA;AAAA,EACE;AAAA,EACA;AAAA,OACK;;;ACHP,SAAS,OAAAA,YAAqB;;;ACA9B,SAAS,WAAW;AACpB,SAAS,oBAAoB,YAAY;AACzC,SAAS,oBAAoB;AAC7B,OAAO,QAAQ;AAMR,IAAM,iBAAN,MAAqB;AAAA,EAC1B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY,aAA0B;AACpC,SAAK,aAAa,YAAY;AAC9B,SAAK,eAAe,CAAC,YAAiB;AACpC,kBAAY,MAAM,YAAY,EAAE,MAAM,YAAY,QAAQ,CAAC;AAAA,IAC7D;AACA,SAAK,QAAQ,YAAY;AACzB,SAAK,OAAO,mBAAmB,EAAE,KAAK,YAAY,IAAI,CAAC;AACvD,SAAK,kBAAkB,YAAY;AACnC,SAAK,eAAe;AAAA,EACtB;AAAA,EACA,MAAM,gBAAgB,UAAqC;AACzD,UAAM,QAAQ,MAAM,GAAG,SAAS,KAAK,QAAQ;AAC7C,UAAM,YAAY,SAAS,MAAM,GAAG,EAAE,IAAI,EAAG,YAAY;AACzD,UAAM,cACH,iBAAyB,SAAS,KAAK;AAC1C,UAAM,SAAS,GAAG,iBAAiB,QAAQ;AAC3C,WAAO,IAAI,SAAS,QAAQ;AAAA,MAC1B,QAAQ;AAAA,MACR,YAAY;AAAA,MACZ,SAAS;AAAA,QACP,gBAAgB;AAAA,QAChB,kBAAkB,MAAM,KAAK,SAAS;AAAA,MACxC;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EACA,MAAM,MAAM,SAAgD;AAC1D,QAAI;AACJ,QAAI,QAAQ,WAAW,MAAM,GAAG;AAC9B,YAAM,OAAO,IAAI,IAAI,OAAO;AAC5B,iBAAW,KAAK;AAAA,QACd,KAAK;AAAA,QACL,KAAK,SAAS;AAAA,UACZ,MACE,IAAI,mBACD,WAAW,WAAW,KAAK,UAAU,EACrC,WAAW,cAAc,mBAAmB,MAAM,CAAC;AAAA,UACxD,IAAI,KAAK,UAAU;AAAA,QACrB;AAAA,MACF;AAEA,YAAM,aAAa,SAAS;AAAA,QAC1B,GAAG,KAAK;AAAA,QACR,UAAU;AAAA,QACV,SAAS,KAAK;AAAA,QACd,SAAS;AAAA,UACP,OAAO,KAAK,cAAc,WAAW;AAAA,UACrC,gBAAgB;AAAA,QAClB;AAAA,MACF,CAAC;AAAA,IACH,OAAO;AACL,iBAAW;AAAA,IACb;AACA,UAAM,SAAS,MAAM,KAAK,KAAK,OAAO,QAAQ;AAC9C,QAAI,QAAQ;AACV,aAAO,KAAK,gBAAgB,QAAQ;AAAA,IACtC;AACA,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,IAAI,SAAiB,UAAmC;AAC5D,UAAM,IAAI,MAAM,QAAQ;AAAA,EAC1B;AACF;AACA,IAAM,mBAA2C;AAAA,EAC/C,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AAAA,EACL,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,KAAK;AAAA,EACL,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AACP;;;ADvEO,SAAS,sBAAsB,SAAsB;AAC1D,EAAAC,KAAI,QAAQ;AACZ,EAAAA,KAAI,iBAAiB,QAAQ;AAC7B,EAAAA,KAAI,mBAAmB;AACvB,EAAAA,KAAI,oBAAoB;AACxB,EAAAA,KAAI,WAAW,QAAQ;AACvB,EAAAA,KAAI,cAAc,IAAI,eAAe,OAAO;AAC5C,EAAAA,KAAI,kBAAkB;AACtB,EAAAA,KAAI,aAAa;AACjB,EAAAA,KAAI,iBAAiB;AAErB,EAAAA,KAAI,aAAa,WAAW,QAAQ,UAAU;AAChD;;;ADlBA,IAAM,kBAAN,MAAsB;AAAA,EACpB,OAAO,OAAO,YAAyB;AACrC,QAAI,CAAC,KAAK,SAAS,CAAC,KAAK,WAAW;AAClC,YAAM,KAAK,cAAc,OAAO;AAAA,IAClC;AACA,WAAO;AAAA,EACT;AAAA,EACA,UAAU,OAAO,UAA6C;AAC5D,UAAM,SAAS,KAAK;AAAA,MAClB,IAAI,MAAM,MAAM,KAAK,MAAM,EAAE,KAAK,MAAM,KAAK;AAAA,MAC7C;AAAA,QACE,WAAW,MAAM;AAAA,QACjB,SAAS;AAAA,QACT,YAAY;AAAA,MACd;AAAA,IACF;AACA,UAAM,EAAE,OAAO,IAAI,MAAM,KAAK,MAAO,MAAM;AAC3C,WAAQ,OACL,QAAQ,EACR,OAAO,EACP,IAAI,CAAC,CAAC,KAAK,GAAG,OAAe;AAAA,MAC5B,OAAO;AAAA,MACP;AAAA,IACF,EAAE,EACD,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EACA,MAAM,cAAc,SAAsB;AACxC,0BAAsB,OAAO;AAE7B,SAAK,YAAY,MAAM,cAAc,gBAAgB,QAAQ,SAAS;AACtE,SAAK,QAAQ,MAAM,mCAAmC;AAAA,MACpD,QAAQ;AAAA,MACR;AAAA,QACE,GAAG,QAAQ;AAAA,MACb;AAAA,IACF;AAAA,EACF;AACF;AACA,IAAM,WAAW,IAAI,gBAAgB;AACrC,IAAM,OAAO,SAAS;AACtB,IAAM,UAAU,SAAS;",
|
|
6
6
|
"names": ["env", "env"]
|
|
7
7
|
}
|
package/worker/text2vec.mjs
CHANGED
|
@@ -26,6 +26,20 @@ var FileProxyCache = class {
|
|
|
26
26
|
this.#downloadConfig = initOptions.downloadConfig;
|
|
27
27
|
this.#initOptions = initOptions;
|
|
28
28
|
}
|
|
29
|
+
async #createResponse(filePath) {
|
|
30
|
+
const stats = await fs.promises.stat(filePath);
|
|
31
|
+
const extension = filePath.split(".").pop().toLowerCase();
|
|
32
|
+
const contentType = CONTENT_TYPE_MAP[extension] ?? "application/octet-stream";
|
|
33
|
+
const stream = fs.createReadStream(filePath);
|
|
34
|
+
return new Response(stream, {
|
|
35
|
+
status: 200,
|
|
36
|
+
statusText: "OK",
|
|
37
|
+
headers: {
|
|
38
|
+
"content-type": contentType,
|
|
39
|
+
"content-length": stats.size.toString()
|
|
40
|
+
}
|
|
41
|
+
});
|
|
42
|
+
}
|
|
29
43
|
async match(request) {
|
|
30
44
|
let filePath;
|
|
31
45
|
if (request.startsWith("http")) {
|
|
@@ -51,7 +65,7 @@ var FileProxyCache = class {
|
|
|
51
65
|
}
|
|
52
66
|
const exists = await this.#vfs.exists(filePath);
|
|
53
67
|
if (exists) {
|
|
54
|
-
return
|
|
68
|
+
return this.#createResponse(filePath);
|
|
55
69
|
}
|
|
56
70
|
return void 0;
|
|
57
71
|
}
|
|
@@ -59,7 +73,6 @@ var FileProxyCache = class {
|
|
|
59
73
|
throw new Error("no put");
|
|
60
74
|
}
|
|
61
75
|
};
|
|
62
|
-
var decoder = new TextDecoder("utf-8");
|
|
63
76
|
var CONTENT_TYPE_MAP = {
|
|
64
77
|
txt: "text/plain",
|
|
65
78
|
html: "text/html",
|
|
@@ -71,51 +84,6 @@ var CONTENT_TYPE_MAP = {
|
|
|
71
84
|
jpeg: "image/jpeg",
|
|
72
85
|
gif: "image/gif"
|
|
73
86
|
};
|
|
74
|
-
var FileResponse = class _FileResponse {
|
|
75
|
-
filePath;
|
|
76
|
-
headers;
|
|
77
|
-
exists = true;
|
|
78
|
-
status = 200;
|
|
79
|
-
statusText = "OK";
|
|
80
|
-
body;
|
|
81
|
-
constructor(filePath) {
|
|
82
|
-
this.filePath = filePath;
|
|
83
|
-
this.headers = new Headers();
|
|
84
|
-
this.updateContentType();
|
|
85
|
-
this.body = fs.createReadStream(filePath);
|
|
86
|
-
}
|
|
87
|
-
updateContentType() {
|
|
88
|
-
const stats = fs.statSync(this.filePath);
|
|
89
|
-
this.headers.set("content-length", stats.size.toString());
|
|
90
|
-
const extension = this.filePath.toString().split(".").pop().toLowerCase();
|
|
91
|
-
this.headers.set(
|
|
92
|
-
"content-type",
|
|
93
|
-
CONTENT_TYPE_MAP[extension] ?? "application/octet-stream"
|
|
94
|
-
);
|
|
95
|
-
}
|
|
96
|
-
clone() {
|
|
97
|
-
const response = new _FileResponse(this.filePath);
|
|
98
|
-
response.exists = this.exists;
|
|
99
|
-
response.status = this.status;
|
|
100
|
-
response.statusText = this.statusText;
|
|
101
|
-
response.headers = new Headers(this.headers);
|
|
102
|
-
return response;
|
|
103
|
-
}
|
|
104
|
-
async arrayBuffer() {
|
|
105
|
-
return fs.promises.readFile(this.filePath).then((buffer) => buffer.buffer);
|
|
106
|
-
}
|
|
107
|
-
async blob() {
|
|
108
|
-
return new Blob([await this.arrayBuffer()], {
|
|
109
|
-
type: this.headers.get("content-type")
|
|
110
|
-
});
|
|
111
|
-
}
|
|
112
|
-
async text() {
|
|
113
|
-
return decoder.decode(await this.arrayBuffer());
|
|
114
|
-
}
|
|
115
|
-
async json() {
|
|
116
|
-
return JSON.parse(await this.text());
|
|
117
|
-
}
|
|
118
|
-
};
|
|
119
87
|
|
|
120
88
|
// packages/worker/set-transformers-config.ts
|
|
121
89
|
function setTransformersConfig(options) {
|
package/worker/text2vec.mjs.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../packages/worker/text2vec/index.ts", "../../packages/worker/set-transformers-config.ts", "../../packages/worker/custom-cache.ts"],
|
|
4
|
-
"sourcesContent": ["import { pipeline } from '@huggingface/transformers';\nimport type { FeatureExtractionPipeline } from '@huggingface/transformers';\n\nimport { InitOptions, setTransformersConfig } from '../set-transformers-config';\nfunction qwen3ToVec(\n extractor: FeatureExtractionPipeline,\n query: string[],\n description = 'Given a web search query, retrieve relevant passages that answer the query',\n) {\n return extractor!(\n query.map((item) => `Instruct: ${description}\\nQuery:${item}`),\n {\n pooling: 'last_token',\n normalize: true,\n },\n );\n}\n\nclass Text2VecService {\n #extractor!: FeatureExtractionPipeline | undefined;\n constructor() {}\n init = async (options: InitOptions) => {\n if (this.#extractor) {\n return true;\n }\n this.#extractor = await this.#downloadOnly(options);\n return !!this.#extractor;\n };\n convert = async (\n input: {\n value: string | string[];\n mode?: 'qwen3';\n taskDescription?: string;\n } & InitOptions,\n ) => {\n if (!this.#extractor) {\n await this.init(input);\n }\n const inputList =\n typeof input.value === 'string' ? [input.value] : input.value;\n let result;\n if (input.mode === 'qwen3') {\n result = qwen3ToVec(this.#extractor!, inputList, input.taskDescription);\n } else {\n result = this.#extractor!(inputList, {\n pooling: 'mean',\n normalize: true,\n });\n }\n return result.then((result) => {\n const list = result.tolist();\n return typeof input.value === 'string' ? list[0] : list;\n });\n };\n async #downloadOnly(options: InitOptions) {\n setTransformersConfig(options);\n\n return await pipeline(\n 'feature-extraction',\n options.modelName,\n options.options,\n );\n }\n getSize = () => (this.#extractor!.model.config as any).hidden_size;\n}\nconst instance = new Text2VecService();\nconst init = instance.init;\nconst getSize = instance.getSize;\nconst convert = instance.convert;\nexport { init, getSize, convert };\n", "import { env, pipeline } from '@huggingface/transformers';\nimport { MessagePort } from 'worker_threads';\nimport { FileProxyCache } from './custom-cache';\nimport type { DownloadConfigType } from '@cyia/external-call';\ntype PipeLineOptions = Partial<NonNullable<Parameters<typeof pipeline>[2]>>;\nexport interface InitOptions {\n /** 文件夹 */\n dir: string;\n /** 模型 */\n modelName: string;\n /** 模型参数 */\n options: PipeLineOptions;\n /**直接链接 */\n remoteHost: string;\n downloadConfig?: DownloadConfigType;\n port?: MessagePort;\n hfToken?: string;\n}\nexport function setTransformersConfig(options: InitOptions) {\n env.useFS = false;\n env.localModelPath = options.dir;\n env.allowLocalModels = false;\n env.allowRemoteModels = true;\n env.cacheDir = options.dir;\n env.customCache = new FileProxyCache(options);\n env.useBrowserCache = false;\n env.useFSCache = true;\n env.useCustomCache = true;\n\n env.remoteHost = `https://${options.remoteHost}`;\n}\n", "import { env } from '@huggingface/transformers';\nimport { createNormalizeVfs, path } from '@cyia/vfs2';\nimport { downloadFile } from '@cyia/dl';\nimport fs from 'fs';\nimport { InitOptions } from './set-transformers-config';\nexport interface NodeProxy {\n match: (request: string) => Promise<ArrayBuffer | undefined>;\n put: (request: string, arraybuffer: ArrayBuffer) => Promise<void>;\n}\nexport class FileProxyCache {\n #path;\n #vfs;\n #sendMessage;\n #modelName;\n #downloadConfig;\n #initOptions;\n constructor(initOptions: InitOptions) {\n this.#modelName = initOptions.modelName;\n this.#sendMessage = (message: any) => {\n initOptions.port?.postMessage({ type: 'progress', message });\n };\n this.#path = initOptions.dir;\n this.#vfs = createNormalizeVfs({ dir: initOptions.dir });\n this.#downloadConfig = initOptions.downloadConfig;\n this.#initOptions = initOptions;\n }\n async match(request: string): Promise<
|
|
5
|
-
"mappings": ";AAAA,SAAS,YAAAA,iBAAgB;;;ACAzB,SAAS,OAAAC,YAAqB;;;ACA9B,SAAS,WAAW;AACpB,SAAS,oBAAoB,YAAY;AACzC,SAAS,oBAAoB;AAC7B,OAAO,QAAQ;AAMR,IAAM,iBAAN,MAAqB;AAAA,EAC1B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY,aAA0B;AACpC,SAAK,aAAa,YAAY;AAC9B,SAAK,eAAe,CAAC,YAAiB;AACpC,kBAAY,MAAM,YAAY,EAAE,MAAM,YAAY,QAAQ,CAAC;AAAA,IAC7D;AACA,SAAK,QAAQ,YAAY;AACzB,SAAK,OAAO,mBAAmB,EAAE,KAAK,YAAY,IAAI,CAAC;AACvD,SAAK,kBAAkB,YAAY;AACnC,SAAK,eAAe;AAAA,EACtB;AAAA,EACA,MAAM,MAAM,
|
|
4
|
+
"sourcesContent": ["import { pipeline } from '@huggingface/transformers';\nimport type { FeatureExtractionPipeline } from '@huggingface/transformers';\n\nimport { InitOptions, setTransformersConfig } from '../set-transformers-config';\nfunction qwen3ToVec(\n extractor: FeatureExtractionPipeline,\n query: string[],\n description = 'Given a web search query, retrieve relevant passages that answer the query',\n) {\n return extractor!(\n query.map((item) => `Instruct: ${description}\\nQuery:${item}`),\n {\n pooling: 'last_token',\n normalize: true,\n },\n );\n}\n\nclass Text2VecService {\n #extractor!: FeatureExtractionPipeline | undefined;\n constructor() {}\n init = async (options: InitOptions) => {\n if (this.#extractor) {\n return true;\n }\n this.#extractor = await this.#downloadOnly(options);\n return !!this.#extractor;\n };\n convert = async (\n input: {\n value: string | string[];\n mode?: 'qwen3';\n taskDescription?: string;\n } & InitOptions,\n ) => {\n if (!this.#extractor) {\n await this.init(input);\n }\n const inputList =\n typeof input.value === 'string' ? [input.value] : input.value;\n let result;\n if (input.mode === 'qwen3') {\n result = qwen3ToVec(this.#extractor!, inputList, input.taskDescription);\n } else {\n result = this.#extractor!(inputList, {\n pooling: 'mean',\n normalize: true,\n });\n }\n return result.then((result) => {\n const list = result.tolist();\n return typeof input.value === 'string' ? list[0] : list;\n });\n };\n async #downloadOnly(options: InitOptions) {\n setTransformersConfig(options);\n\n return await pipeline(\n 'feature-extraction',\n options.modelName,\n options.options,\n );\n }\n getSize = () => (this.#extractor!.model.config as any).hidden_size;\n}\nconst instance = new Text2VecService();\nconst init = instance.init;\nconst getSize = instance.getSize;\nconst convert = instance.convert;\nexport { init, getSize, convert };\n", "import { env, pipeline } from '@huggingface/transformers';\nimport { MessagePort } from 'worker_threads';\nimport { FileProxyCache } from './custom-cache';\nimport type { DownloadConfigType } from '@cyia/external-call';\ntype PipeLineOptions = Partial<NonNullable<Parameters<typeof pipeline>[2]>>;\nexport interface InitOptions {\n /** 文件夹 */\n dir: string;\n /** 模型 */\n modelName: string;\n /** 模型参数 */\n options: PipeLineOptions;\n /**直接链接 */\n remoteHost: string;\n downloadConfig?: DownloadConfigType;\n port?: MessagePort;\n hfToken?: string;\n}\nexport function setTransformersConfig(options: InitOptions) {\n env.useFS = false;\n env.localModelPath = options.dir;\n env.allowLocalModels = false;\n env.allowRemoteModels = true;\n env.cacheDir = options.dir;\n env.customCache = new FileProxyCache(options);\n env.useBrowserCache = false;\n env.useFSCache = true;\n env.useCustomCache = true;\n\n env.remoteHost = `https://${options.remoteHost}`;\n}\n", "import { env } from '@huggingface/transformers';\nimport { createNormalizeVfs, path } from '@cyia/vfs2';\nimport { downloadFile } from '@cyia/dl';\nimport fs from 'fs';\nimport { InitOptions } from './set-transformers-config';\nexport interface NodeProxy {\n match: (request: string) => Promise<ArrayBuffer | undefined>;\n put: (request: string, arraybuffer: ArrayBuffer) => Promise<void>;\n}\nexport class FileProxyCache {\n #path;\n #vfs;\n #sendMessage;\n #modelName;\n #downloadConfig;\n #initOptions;\n constructor(initOptions: InitOptions) {\n this.#modelName = initOptions.modelName;\n this.#sendMessage = (message: any) => {\n initOptions.port?.postMessage({ type: 'progress', message });\n };\n this.#path = initOptions.dir;\n this.#vfs = createNormalizeVfs({ dir: initOptions.dir });\n this.#downloadConfig = initOptions.downloadConfig;\n this.#initOptions = initOptions;\n }\n async #createResponse(filePath: string): Promise<Response> {\n const stats = await fs.promises.stat(filePath);\n const extension = filePath.split('.').pop()!.toLowerCase();\n const contentType =\n (CONTENT_TYPE_MAP as any)[extension] ?? 'application/octet-stream';\n const stream = fs.createReadStream(filePath);\n return new Response(stream, {\n status: 200,\n statusText: 'OK',\n headers: {\n 'content-type': contentType,\n 'content-length': stats.size.toString(),\n },\n });\n }\n async match(request: string): Promise<Response | undefined> {\n let filePath;\n if (request.startsWith('http')) {\n const data = new URL(request);\n filePath = path.join(\n this.#path,\n data.pathname.replace(\n '/' +\n env.remotePathTemplate\n .replaceAll('{model}', this.#modelName)\n .replaceAll('{revision}', encodeURIComponent('main')),\n `/${this.#modelName}/`,\n ),\n );\n\n await downloadFile(request, {\n ...this.#downloadConfig,\n savePath: filePath,\n message: this.#sendMessage,\n headers: {\n token: this.#initOptions?.hfToken ?? '',\n 'software-bbs': 'bbs.shenghuabi.site',\n },\n });\n } else {\n filePath = request;\n }\n const exists = await this.#vfs.exists(filePath);\n if (exists) {\n return this.#createResponse(filePath);\n }\n return undefined;\n }\n\n async put(request: string, response: Response): Promise<void> {\n throw new Error('no put');\n }\n}\nconst CONTENT_TYPE_MAP: Record<string, string> = {\n txt: 'text/plain',\n html: 'text/html',\n css: 'text/css',\n js: 'text/javascript',\n json: 'application/json',\n png: 'image/png',\n jpg: 'image/jpeg',\n jpeg: 'image/jpeg',\n gif: 'image/gif',\n};\n"],
|
|
5
|
+
"mappings": ";AAAA,SAAS,YAAAA,iBAAgB;;;ACAzB,SAAS,OAAAC,YAAqB;;;ACA9B,SAAS,WAAW;AACpB,SAAS,oBAAoB,YAAY;AACzC,SAAS,oBAAoB;AAC7B,OAAO,QAAQ;AAMR,IAAM,iBAAN,MAAqB;AAAA,EAC1B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY,aAA0B;AACpC,SAAK,aAAa,YAAY;AAC9B,SAAK,eAAe,CAAC,YAAiB;AACpC,kBAAY,MAAM,YAAY,EAAE,MAAM,YAAY,QAAQ,CAAC;AAAA,IAC7D;AACA,SAAK,QAAQ,YAAY;AACzB,SAAK,OAAO,mBAAmB,EAAE,KAAK,YAAY,IAAI,CAAC;AACvD,SAAK,kBAAkB,YAAY;AACnC,SAAK,eAAe;AAAA,EACtB;AAAA,EACA,MAAM,gBAAgB,UAAqC;AACzD,UAAM,QAAQ,MAAM,GAAG,SAAS,KAAK,QAAQ;AAC7C,UAAM,YAAY,SAAS,MAAM,GAAG,EAAE,IAAI,EAAG,YAAY;AACzD,UAAM,cACH,iBAAyB,SAAS,KAAK;AAC1C,UAAM,SAAS,GAAG,iBAAiB,QAAQ;AAC3C,WAAO,IAAI,SAAS,QAAQ;AAAA,MAC1B,QAAQ;AAAA,MACR,YAAY;AAAA,MACZ,SAAS;AAAA,QACP,gBAAgB;AAAA,QAChB,kBAAkB,MAAM,KAAK,SAAS;AAAA,MACxC;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EACA,MAAM,MAAM,SAAgD;AAC1D,QAAI;AACJ,QAAI,QAAQ,WAAW,MAAM,GAAG;AAC9B,YAAM,OAAO,IAAI,IAAI,OAAO;AAC5B,iBAAW,KAAK;AAAA,QACd,KAAK;AAAA,QACL,KAAK,SAAS;AAAA,UACZ,MACE,IAAI,mBACD,WAAW,WAAW,KAAK,UAAU,EACrC,WAAW,cAAc,mBAAmB,MAAM,CAAC;AAAA,UACxD,IAAI,KAAK,UAAU;AAAA,QACrB;AAAA,MACF;AAEA,YAAM,aAAa,SAAS;AAAA,QAC1B,GAAG,KAAK;AAAA,QACR,UAAU;AAAA,QACV,SAAS,KAAK;AAAA,QACd,SAAS;AAAA,UACP,OAAO,KAAK,cAAc,WAAW;AAAA,UACrC,gBAAgB;AAAA,QAClB;AAAA,MACF,CAAC;AAAA,IACH,OAAO;AACL,iBAAW;AAAA,IACb;AACA,UAAM,SAAS,MAAM,KAAK,KAAK,OAAO,QAAQ;AAC9C,QAAI,QAAQ;AACV,aAAO,KAAK,gBAAgB,QAAQ;AAAA,IACtC;AACA,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,IAAI,SAAiB,UAAmC;AAC5D,UAAM,IAAI,MAAM,QAAQ;AAAA,EAC1B;AACF;AACA,IAAM,mBAA2C;AAAA,EAC/C,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AAAA,EACL,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,KAAK;AAAA,EACL,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AACP;;;ADvEO,SAAS,sBAAsB,SAAsB;AAC1D,EAAAC,KAAI,QAAQ;AACZ,EAAAA,KAAI,iBAAiB,QAAQ;AAC7B,EAAAA,KAAI,mBAAmB;AACvB,EAAAA,KAAI,oBAAoB;AACxB,EAAAA,KAAI,WAAW,QAAQ;AACvB,EAAAA,KAAI,cAAc,IAAI,eAAe,OAAO;AAC5C,EAAAA,KAAI,kBAAkB;AACtB,EAAAA,KAAI,aAAa;AACjB,EAAAA,KAAI,iBAAiB;AAErB,EAAAA,KAAI,aAAa,WAAW,QAAQ,UAAU;AAChD;;;AD1BA,SAAS,WACP,WACA,OACA,cAAc,8EACd;AACA,SAAO;AAAA,IACL,MAAM,IAAI,CAAC,SAAS,aAAa,WAAW;AAAA,QAAW,IAAI,EAAE;AAAA,IAC7D;AAAA,MACE,SAAS;AAAA,MACT,WAAW;AAAA,IACb;AAAA,EACF;AACF;AAEA,IAAM,kBAAN,MAAsB;AAAA,EACpB;AAAA,EACA,cAAc;AAAA,EAAC;AAAA,EACf,OAAO,OAAO,YAAyB;AACrC,QAAI,KAAK,YAAY;AACnB,aAAO;AAAA,IACT;AACA,SAAK,aAAa,MAAM,KAAK,cAAc,OAAO;AAClD,WAAO,CAAC,CAAC,KAAK;AAAA,EAChB;AAAA,EACA,UAAU,OACR,UAKG;AACH,QAAI,CAAC,KAAK,YAAY;AACpB,YAAM,KAAK,KAAK,KAAK;AAAA,IACvB;AACA,UAAM,YACJ,OAAO,MAAM,UAAU,WAAW,CAAC,MAAM,KAAK,IAAI,MAAM;AAC1D,QAAI;AACJ,QAAI,MAAM,SAAS,SAAS;AAC1B,eAAS,WAAW,KAAK,YAAa,WAAW,MAAM,eAAe;AAAA,IACxE,OAAO;AACL,eAAS,KAAK,WAAY,WAAW;AAAA,QACnC,SAAS;AAAA,QACT,WAAW;AAAA,MACb,CAAC;AAAA,IACH;AACA,WAAO,OAAO,KAAK,CAACC,YAAW;AAC7B,YAAM,OAAOA,QAAO,OAAO;AAC3B,aAAO,OAAO,MAAM,UAAU,WAAW,KAAK,CAAC,IAAI;AAAA,IACrD,CAAC;AAAA,EACH;AAAA,EACA,MAAM,cAAc,SAAsB;AACxC,0BAAsB,OAAO;AAE7B,WAAO,MAAMC;AAAA,MACX;AAAA,MACA,QAAQ;AAAA,MACR,QAAQ;AAAA,IACV;AAAA,EACF;AAAA,EACA,UAAU,MAAO,KAAK,WAAY,MAAM,OAAe;AACzD;AACA,IAAM,WAAW,IAAI,gBAAgB;AACrC,IAAM,OAAO,SAAS;AACtB,IAAM,UAAU,SAAS;AACzB,IAAM,UAAU,SAAS;",
|
|
6
6
|
"names": ["pipeline", "env", "env", "result", "pipeline"]
|
|
7
7
|
}
|