qwen-embedder 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -0
- package/dist/index.d.ts +22 -0
- package/dist/index.js +109 -0
- package/dist/index.js.map +1 -0
- package/package.json +37 -0
- package/src/embedder.ts +146 -0
- package/src/index.ts +2 -0
- package/src/types.ts +5 -0
package/README.md
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# qwen-embedder
|
|
2
|
+
|
|
3
|
+
Local text embedding for Node.js. Runs a quantized Qwen ONNX model via [Transformers.js](https://github.com/huggingface/transformers.js) — no Python, no external services.
|
|
4
|
+
|
|
5
|
+
```ts
|
|
6
|
+
import { Embedder } from 'qwen-embedder'
|
|
7
|
+
|
|
8
|
+
const embedder = await Embedder.create()
|
|
9
|
+
|
|
10
|
+
const vector = await embedder.embed('hello world')
|
|
11
|
+
// => number[1024]
|
|
12
|
+
|
|
13
|
+
const batch = await embedder.embed(['cat', 'dog', 'fish'])
|
|
14
|
+
// => number[][3][1024]
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```sh
|
|
20
|
+
npm install qwen-embedder
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Node.js 20+ (ESM only).
|
|
24
|
+
|
|
25
|
+
## Usage
|
|
26
|
+
|
|
27
|
+
```ts
|
|
28
|
+
import { Embedder } from 'qwen-embedder'
|
|
29
|
+
|
|
30
|
+
// First call downloads the model (~600MB q8) to
|
|
31
|
+
// ~/.qwenembedder/.cache/models/ — subsequent runs use cache
|
|
32
|
+
const embedder = await Embedder.create()
|
|
33
|
+
|
|
34
|
+
// Single string → number[]
|
|
35
|
+
const vec = await embedder.embed('your text here')
|
|
36
|
+
console.log(vec.length) // 1024
|
|
37
|
+
|
|
38
|
+
// Batch → number[][]
|
|
39
|
+
const vecs = await embedder.embed(['first', 'second', 'third'])
|
|
40
|
+
|
|
41
|
+
// Repeated input hits the in-memory cache (FIFO, default 100 entries)
|
|
42
|
+
const again = await embedder.embed('your text here')
|
|
43
|
+
// same values as vec, returned instantly without inference
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Options
|
|
47
|
+
|
|
48
|
+
```ts
|
|
49
|
+
interface EmbedderOptions {
|
|
50
|
+
queue?: boolean // serialize inference calls (concurrency: 1)
|
|
51
|
+
concurrency?: number // set a specific concurrency limit
|
|
52
|
+
cacheSize?: number // in-memory result cache size (default 100, 0 to disable)
|
|
53
|
+
}
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
```ts
|
|
57
|
+
// Serial execution — safe for memory-constrained environments
|
|
58
|
+
const e = await Embedder.create({ queue: true })
|
|
59
|
+
|
|
60
|
+
// Limited parallelism
|
|
61
|
+
const e = await Embedder.create({ concurrency: 2 })
|
|
62
|
+
|
|
63
|
+
// No result caching
|
|
64
|
+
const e = await Embedder.create({ cacheSize: 0 })
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Model
|
|
68
|
+
|
|
69
|
+
- Model: [`onnx-community/Qwen3-Embedding-0.6B-ONNX`](https://huggingface.co/onnx-community/Qwen3-Embedding-0.6B-ONNX)
|
|
70
|
+
- Quantization: `q8`
|
|
71
|
+
- Pipeline: `feature-extraction` with mean pooling + L2 normalization
|
|
72
|
+
- Output dimension: 1024
|
|
73
|
+
- Cache: `~/.qwenembedder/.cache/models/` (auto-downloaded on first use)
|
|
74
|
+
|
|
75
|
+
## Docs
|
|
76
|
+
|
|
77
|
+
- [Architecture](docs/architecture.md)
|
|
78
|
+
- [Technical spec](docs/technical-spec.md)
|
|
79
|
+
- [ADRs](docs/adr/)
|
|
80
|
+
|
|
81
|
+
## License
|
|
82
|
+
|
|
83
|
+
MIT
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
interface EmbedderOptions {
|
|
2
|
+
queue?: boolean;
|
|
3
|
+
concurrency?: number;
|
|
4
|
+
cacheSize?: number;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
declare class Embedder {
|
|
8
|
+
private pipe;
|
|
9
|
+
private cache;
|
|
10
|
+
private maxCacheSize;
|
|
11
|
+
private queue;
|
|
12
|
+
private constructor();
|
|
13
|
+
static create(options?: EmbedderOptions): Promise<Embedder>;
|
|
14
|
+
embed(text: string): Promise<number[]>;
|
|
15
|
+
embed(texts: string[]): Promise<number[][]>;
|
|
16
|
+
private embedSingle;
|
|
17
|
+
private embedBatch;
|
|
18
|
+
private runInference;
|
|
19
|
+
private setCache;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export { Embedder, type EmbedderOptions };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
// src/embedder.ts
|
|
2
|
+
import { pipeline } from "@huggingface/transformers";
|
|
3
|
+
import PQueue from "p-queue";
|
|
4
|
+
import { homedir } from "os";
|
|
5
|
+
import { join } from "path";
|
|
6
|
+
var MODEL_ID = "onnx-community/Qwen3-Embedding-0.6B-ONNX";
|
|
7
|
+
var CACHE_DIR = join(homedir(), ".qwenembedder", ".cache", "models");
|
|
8
|
+
function resolveConcurrency(options) {
|
|
9
|
+
if (options.concurrency !== void 0) return options.concurrency;
|
|
10
|
+
if (options.queue) return 1;
|
|
11
|
+
return null;
|
|
12
|
+
}
|
|
13
|
+
var Embedder = class _Embedder {
|
|
14
|
+
pipe;
|
|
15
|
+
cache;
|
|
16
|
+
maxCacheSize;
|
|
17
|
+
queue;
|
|
18
|
+
constructor(pipe, concurrency, maxCacheSize) {
|
|
19
|
+
this.pipe = pipe;
|
|
20
|
+
this.maxCacheSize = maxCacheSize;
|
|
21
|
+
this.cache = /* @__PURE__ */ new Map();
|
|
22
|
+
this.queue = concurrency !== null ? new PQueue({ concurrency }) : null;
|
|
23
|
+
}
|
|
24
|
+
static async create(options = {}) {
|
|
25
|
+
const cacheSize = options.cacheSize ?? 100;
|
|
26
|
+
const concurrency = resolveConcurrency(options);
|
|
27
|
+
const pipe = await pipeline("feature-extraction", MODEL_ID, {
|
|
28
|
+
cache_dir: CACHE_DIR,
|
|
29
|
+
dtype: "q8"
|
|
30
|
+
});
|
|
31
|
+
return new _Embedder(pipe, concurrency, cacheSize);
|
|
32
|
+
}
|
|
33
|
+
async embed(input) {
|
|
34
|
+
if (Array.isArray(input)) {
|
|
35
|
+
return this.embedBatch(input);
|
|
36
|
+
}
|
|
37
|
+
return this.embedSingle(input);
|
|
38
|
+
}
|
|
39
|
+
async embedSingle(text) {
|
|
40
|
+
if (text.length === 0) {
|
|
41
|
+
throw new TypeError("Input must be a non-empty string");
|
|
42
|
+
}
|
|
43
|
+
const cached = this.cache.get(text);
|
|
44
|
+
if (cached !== void 0) {
|
|
45
|
+
return cached.slice();
|
|
46
|
+
}
|
|
47
|
+
const result = await this.runInference(text);
|
|
48
|
+
this.setCache(text, result);
|
|
49
|
+
return result.slice();
|
|
50
|
+
}
|
|
51
|
+
async embedBatch(texts) {
|
|
52
|
+
if (texts.length === 0) {
|
|
53
|
+
return [];
|
|
54
|
+
}
|
|
55
|
+
const results = new Array(texts.length);
|
|
56
|
+
const uncached = [];
|
|
57
|
+
for (let i = 0; i < texts.length; i++) {
|
|
58
|
+
const t = texts[i];
|
|
59
|
+
if (typeof t !== "string" || t.length === 0) {
|
|
60
|
+
throw new TypeError("Input must be a non-empty string");
|
|
61
|
+
}
|
|
62
|
+
const cached = this.cache.get(t);
|
|
63
|
+
if (cached !== void 0) {
|
|
64
|
+
results[i] = cached;
|
|
65
|
+
} else {
|
|
66
|
+
uncached.push(i);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
if (uncached.length === 0) {
|
|
70
|
+
return results;
|
|
71
|
+
}
|
|
72
|
+
const uncachedTexts = uncached.map((i) => texts[i]);
|
|
73
|
+
const inferred = await this.runInference(uncachedTexts);
|
|
74
|
+
for (let j = 0; j < uncached.length; j++) {
|
|
75
|
+
const vector = inferred[j];
|
|
76
|
+
this.setCache(uncachedTexts[j], vector);
|
|
77
|
+
results[uncached[j]] = vector;
|
|
78
|
+
}
|
|
79
|
+
return results;
|
|
80
|
+
}
|
|
81
|
+
async runInference(input) {
|
|
82
|
+
const exec = () => this.pipe(input, { pooling: "mean", normalize: true });
|
|
83
|
+
const output = this.queue ? await this.queue.add(exec) : await exec();
|
|
84
|
+
const data = Array.from(output.data);
|
|
85
|
+
const hiddenDim = output.dims[output.dims.length - 1];
|
|
86
|
+
if (typeof input === "string") {
|
|
87
|
+
return data;
|
|
88
|
+
}
|
|
89
|
+
const result = [];
|
|
90
|
+
for (let i = 0; i < data.length; i += hiddenDim) {
|
|
91
|
+
result.push(data.slice(i, i + hiddenDim));
|
|
92
|
+
}
|
|
93
|
+
return result;
|
|
94
|
+
}
|
|
95
|
+
setCache(key, value) {
|
|
96
|
+
if (this.maxCacheSize === 0) return;
|
|
97
|
+
if (this.cache.size >= this.maxCacheSize) {
|
|
98
|
+
const oldest = this.cache.keys().next();
|
|
99
|
+
if (!oldest.done) {
|
|
100
|
+
this.cache.delete(oldest.value);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
this.cache.set(key, value);
|
|
104
|
+
}
|
|
105
|
+
};
|
|
106
|
+
export {
|
|
107
|
+
Embedder
|
|
108
|
+
};
|
|
109
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/embedder.ts"],"sourcesContent":["import { pipeline } from '@huggingface/transformers'\nimport PQueue from 'p-queue'\nimport { homedir } from 'os'\nimport { join } from 'path'\nimport type { EmbedderOptions } from './types.js'\n\nconst MODEL_ID = 'onnx-community/Qwen3-Embedding-0.6B-ONNX'\nconst CACHE_DIR = join(homedir(), '.qwenembedder', '.cache', 'models')\n\nfunction resolveConcurrency(options: EmbedderOptions): number | null {\n if (options.concurrency !== undefined) return options.concurrency\n if (options.queue) return 1\n return null\n}\n\ninterface InferenceResult {\n data: Float32Array | number[]\n dims: number[]\n tolist(): number[][]\n}\n\nexport class Embedder {\n private pipe: (texts: string | string[], options?: Record<string, unknown>) => Promise<InferenceResult>\n private cache: Map<string, number[]>\n private maxCacheSize: number\n private queue: PQueue | null\n\n private constructor(\n pipe: Embedder['pipe'],\n concurrency: number | null,\n maxCacheSize: number,\n ) {\n this.pipe = pipe\n this.maxCacheSize = maxCacheSize\n this.cache = new Map()\n this.queue = concurrency !== null ? new PQueue({ concurrency }) : null\n }\n\n static async create(options: EmbedderOptions = {}): Promise<Embedder> {\n const cacheSize = options.cacheSize ?? 100\n const concurrency = resolveConcurrency(options)\n\n const pipe = await pipeline('feature-extraction', MODEL_ID, {\n cache_dir: CACHE_DIR,\n dtype: 'q8',\n })\n\n return new Embedder(pipe as unknown as Embedder['pipe'], concurrency, cacheSize)\n }\n\n async embed(text: string): Promise<number[]>\n async embed(texts: string[]): Promise<number[][]>\n async embed(input: string | string[]): Promise<number[] | number[][]> {\n if (Array.isArray(input)) {\n return this.embedBatch(input)\n }\n return this.embedSingle(input)\n }\n\n private async embedSingle(text: string): Promise<number[]> {\n if (text.length === 0) {\n throw new TypeError('Input must be a non-empty string')\n }\n\n const cached = this.cache.get(text)\n if (cached !== undefined) {\n return cached.slice()\n }\n\n const result = await this.runInference(text)\n this.setCache(text, result)\n return result.slice()\n }\n\n private async embedBatch(texts: string[]): Promise<number[][]> {\n if (texts.length === 0) {\n return []\n }\n\n const results: (number[] | undefined)[] = new Array(texts.length)\n const uncached: number[] = []\n\n for (let i = 0; i < texts.length; i++) {\n const t = texts[i]\n if (typeof t !== 'string' || t.length === 0) {\n throw new TypeError('Input must be a non-empty string')\n }\n const cached = this.cache.get(t)\n if (cached !== undefined) {\n results[i] = cached\n } else {\n uncached.push(i)\n }\n }\n\n if (uncached.length === 0) {\n return results as number[][]\n }\n\n const uncachedTexts = uncached.map(i => texts[i])\n const inferred = await this.runInference(uncachedTexts)\n\n for (let j = 0; j < uncached.length; j++) {\n const vector = inferred[j]\n this.setCache(uncachedTexts[j], vector)\n results[uncached[j]] = vector\n }\n\n return results as number[][]\n }\n\n private async runInference(text: string): Promise<number[]>\n private async runInference(texts: string[]): Promise<number[][]>\n private async runInference(input: string | string[]): Promise<number[] | number[][]> {\n const exec = () => this.pipe(input, { pooling: 'mean', normalize: true })\n\n const output = this.queue\n ? await this.queue.add(exec)\n : await exec()\n\n const data = Array.from(output.data) as number[]\n const hiddenDim = output.dims[output.dims.length - 1]\n\n if (typeof input === 'string') {\n return data\n }\n\n const result: number[][] = []\n for (let i = 0; i < data.length; i += hiddenDim) {\n result.push(data.slice(i, i + hiddenDim))\n }\n return result\n }\n\n private setCache(key: string, value: number[]): void {\n if (this.maxCacheSize === 0) return\n\n if (this.cache.size >= this.maxCacheSize) {\n const oldest = this.cache.keys().next()\n if (!oldest.done) {\n this.cache.delete(oldest.value)\n }\n }\n this.cache.set(key, value)\n }\n}\n"],"mappings":";AAAA,SAAS,gBAAgB;AACzB,OAAO,YAAY;AACnB,SAAS,eAAe;AACxB,SAAS,YAAY;AAGrB,IAAM,WAAW;AACjB,IAAM,YAAY,KAAK,QAAQ,GAAG,iBAAiB,UAAU,QAAQ;AAErE,SAAS,mBAAmB,SAAyC;AACnE,MAAI,QAAQ,gBAAgB,OAAW,QAAO,QAAQ;AACtD,MAAI,QAAQ,MAAO,QAAO;AAC1B,SAAO;AACT;AAQO,IAAM,WAAN,MAAM,UAAS;AAAA,EACZ;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YACN,MACA,aACA,cACA;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,QAAQ,oBAAI,IAAI;AACrB,SAAK,QAAQ,gBAAgB,OAAO,IAAI,OAAO,EAAE,YAAY,CAAC,IAAI;AAAA,EACpE;AAAA,EAEA,aAAa,OAAO,UAA2B,CAAC,GAAsB;AACpE,UAAM,YAAY,QAAQ,aAAa;AACvC,UAAM,cAAc,mBAAmB,OAAO;AAE9C,UAAM,OAAO,MAAM,SAAS,sBAAsB,UAAU;AAAA,MAC1D,WAAW;AAAA,MACX,OAAO;AAAA,IACT,CAAC;AAED,WAAO,IAAI,UAAS,MAAqC,aAAa,SAAS;AAAA,EACjF;AAAA,EAIA,MAAM,MAAM,OAA0D;AACpE,QAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,aAAO,KAAK,WAAW,KAAK;AAAA,IAC9B;AACA,WAAO,KAAK,YAAY,KAAK;AAAA,EAC/B;AAAA,EAEA,MAAc,YAAY,MAAiC;AACzD,QAAI,KAAK,WAAW,GAAG;AACrB,YAAM,IAAI,UAAU,kCAAkC;AAAA,IACxD;AAEA,UAAM,SAAS,KAAK,MAAM,IAAI,IAAI;AAClC,QAAI,WAAW,QAAW;AACxB,aAAO,OAAO,MAAM;AAAA,IACtB;AAEA,UAAM,SAAS,MAAM,KAAK,aAAa,IAAI;AAC3C,SAAK,SAAS,MAAM,MAAM;AAC1B,WAAO,OAAO,MAAM;AAAA,EACtB;AAAA,EAEA,MAAc,WAAW,OAAsC;AAC7D,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,UAAoC,IAAI,MAAM,MAAM,MAAM;AAChE,UAAM,WAAqB,CAAC;AAE5B,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,YAAM,IAAI,MAAM,CAAC;AACjB,UAAI,OAAO,MAAM,YAAY,EAAE,WAAW,GAAG;AAC3C,cAAM,IAAI,UAAU,kCAAkC;AAAA,MACxD;AACA,YAAM,SAAS,KAAK,MAAM,IAAI,CAAC;AAC/B,UAAI,WAAW,QAAW;AACxB,gBAAQ,CAAC,IAAI;AAAA,MACf,OAAO;AACL,iBAAS,KAAK,CAAC;AAAA,MACjB;AAAA,IACF;AAEA,QAAI,SAAS,WAAW,GAAG;AACzB,aAAO;AAAA,IACT;AAEA,UAAM,gBAAgB,SAAS,IAAI,OAAK,MAAM,CAAC,CAAC;AAChD,UAAM,WAAW,MAAM,KAAK,aAAa,aAAa;AAEtD,aAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,YAAM,SAAS,SAAS,CAAC;AACzB,WAAK,SAAS,cAAc,CAAC,GAAG,MAAM;AACtC,cAAQ,SAAS,CAAC,CAAC,IAAI;AAAA,IACzB;AAEA,WAAO;AAAA,EACT;AAAA,EAIA,MAAc,aAAa,OAA0D;AACnF,UAAM,OAAO,MAAM,KAAK,KAAK,OAAO,EAAE,SAAS,QAAQ,WAAW,KAAK,CAAC;AAExE,UAAM,SAAS,KAAK,QAChB,MAAM,KAAK,MAAM,IAAI,IAAI,IACzB,MAAM,KAAK;AAEf,UAAM,OAAO,MAAM,KAAK,OAAO,IAAI;AACnC,UAAM,YAAY,OAAO,KAAK,OAAO,KAAK,SAAS,CAAC;AAEpD,QAAI,OAAO,UAAU,UAAU;AAC7B,aAAO;AAAA,IACT;AAEA,UAAM,SAAqB,CAAC;AAC5B,aAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK,WAAW;AAC/C,aAAO,KAAK,KAAK,MAAM,GAAG,IAAI,SAAS,CAAC;AAAA,IAC1C;AACA,WAAO;AAAA,EACT;AAAA,EAEQ,SAAS,KAAa,OAAuB;AACnD,QAAI,KAAK,iBAAiB,EAAG;AAE7B,QAAI,KAAK,MAAM,QAAQ,KAAK,cAAc;AACxC,YAAM,SAAS,KAAK,MAAM,KAAK,EAAE,KAAK;AACtC,UAAI,CAAC,OAAO,MAAM;AAChB,aAAK,MAAM,OAAO,OAAO,KAAK;AAAA,MAChC;AAAA,IACF;AACA,SAAK,MAAM,IAAI,KAAK,KAAK;AAAA,EAC3B;AACF;","names":[]}
|
package/package.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "qwen-embedder",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Local text embedding with Qwen ONNX model via Transformers.js",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.js",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.js"
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"files": [
|
|
15
|
+
"src",
|
|
16
|
+
"dist"
|
|
17
|
+
],
|
|
18
|
+
"scripts": {
|
|
19
|
+
"build": "tsup",
|
|
20
|
+
"test": "vitest run",
|
|
21
|
+
"test:watch": "vitest"
|
|
22
|
+
},
|
|
23
|
+
"license": "MIT",
|
|
24
|
+
"dependencies": {
|
|
25
|
+
"@huggingface/transformers": "^4.2.0",
|
|
26
|
+
"p-queue": "^9.3.0"
|
|
27
|
+
},
|
|
28
|
+
"devDependencies": {
|
|
29
|
+
"@types/node": "^25.9.3",
|
|
30
|
+
"tsup": "^8.5.1",
|
|
31
|
+
"typescript": "^6.0.3",
|
|
32
|
+
"vitest": "^4.1.8"
|
|
33
|
+
},
|
|
34
|
+
"allowScripts": {
|
|
35
|
+
"onnxruntime-node@1.24.3": true
|
|
36
|
+
}
|
|
37
|
+
}
|
package/src/embedder.ts
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import { pipeline } from '@huggingface/transformers'
|
|
2
|
+
import PQueue from 'p-queue'
|
|
3
|
+
import { homedir } from 'os'
|
|
4
|
+
import { join } from 'path'
|
|
5
|
+
import type { EmbedderOptions } from './types.js'
|
|
6
|
+
|
|
7
|
+
const MODEL_ID = 'onnx-community/Qwen3-Embedding-0.6B-ONNX'
|
|
8
|
+
const CACHE_DIR = join(homedir(), '.qwenembedder', '.cache', 'models')
|
|
9
|
+
|
|
10
|
+
function resolveConcurrency(options: EmbedderOptions): number | null {
|
|
11
|
+
if (options.concurrency !== undefined) return options.concurrency
|
|
12
|
+
if (options.queue) return 1
|
|
13
|
+
return null
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
interface InferenceResult {
|
|
17
|
+
data: Float32Array | number[]
|
|
18
|
+
dims: number[]
|
|
19
|
+
tolist(): number[][]
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export class Embedder {
|
|
23
|
+
private pipe: (texts: string | string[], options?: Record<string, unknown>) => Promise<InferenceResult>
|
|
24
|
+
private cache: Map<string, number[]>
|
|
25
|
+
private maxCacheSize: number
|
|
26
|
+
private queue: PQueue | null
|
|
27
|
+
|
|
28
|
+
private constructor(
|
|
29
|
+
pipe: Embedder['pipe'],
|
|
30
|
+
concurrency: number | null,
|
|
31
|
+
maxCacheSize: number,
|
|
32
|
+
) {
|
|
33
|
+
this.pipe = pipe
|
|
34
|
+
this.maxCacheSize = maxCacheSize
|
|
35
|
+
this.cache = new Map()
|
|
36
|
+
this.queue = concurrency !== null ? new PQueue({ concurrency }) : null
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
static async create(options: EmbedderOptions = {}): Promise<Embedder> {
|
|
40
|
+
const cacheSize = options.cacheSize ?? 100
|
|
41
|
+
const concurrency = resolveConcurrency(options)
|
|
42
|
+
|
|
43
|
+
const pipe = await pipeline('feature-extraction', MODEL_ID, {
|
|
44
|
+
cache_dir: CACHE_DIR,
|
|
45
|
+
dtype: 'q8',
|
|
46
|
+
})
|
|
47
|
+
|
|
48
|
+
return new Embedder(pipe as unknown as Embedder['pipe'], concurrency, cacheSize)
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
async embed(text: string): Promise<number[]>
|
|
52
|
+
async embed(texts: string[]): Promise<number[][]>
|
|
53
|
+
async embed(input: string | string[]): Promise<number[] | number[][]> {
|
|
54
|
+
if (Array.isArray(input)) {
|
|
55
|
+
return this.embedBatch(input)
|
|
56
|
+
}
|
|
57
|
+
return this.embedSingle(input)
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
private async embedSingle(text: string): Promise<number[]> {
|
|
61
|
+
if (text.length === 0) {
|
|
62
|
+
throw new TypeError('Input must be a non-empty string')
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const cached = this.cache.get(text)
|
|
66
|
+
if (cached !== undefined) {
|
|
67
|
+
return cached.slice()
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const result = await this.runInference(text)
|
|
71
|
+
this.setCache(text, result)
|
|
72
|
+
return result.slice()
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
private async embedBatch(texts: string[]): Promise<number[][]> {
|
|
76
|
+
if (texts.length === 0) {
|
|
77
|
+
return []
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const results: (number[] | undefined)[] = new Array(texts.length)
|
|
81
|
+
const uncached: number[] = []
|
|
82
|
+
|
|
83
|
+
for (let i = 0; i < texts.length; i++) {
|
|
84
|
+
const t = texts[i]
|
|
85
|
+
if (typeof t !== 'string' || t.length === 0) {
|
|
86
|
+
throw new TypeError('Input must be a non-empty string')
|
|
87
|
+
}
|
|
88
|
+
const cached = this.cache.get(t)
|
|
89
|
+
if (cached !== undefined) {
|
|
90
|
+
results[i] = cached
|
|
91
|
+
} else {
|
|
92
|
+
uncached.push(i)
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (uncached.length === 0) {
|
|
97
|
+
return results as number[][]
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const uncachedTexts = uncached.map(i => texts[i])
|
|
101
|
+
const inferred = await this.runInference(uncachedTexts)
|
|
102
|
+
|
|
103
|
+
for (let j = 0; j < uncached.length; j++) {
|
|
104
|
+
const vector = inferred[j]
|
|
105
|
+
this.setCache(uncachedTexts[j], vector)
|
|
106
|
+
results[uncached[j]] = vector
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return results as number[][]
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
private async runInference(text: string): Promise<number[]>
|
|
113
|
+
private async runInference(texts: string[]): Promise<number[][]>
|
|
114
|
+
private async runInference(input: string | string[]): Promise<number[] | number[][]> {
|
|
115
|
+
const exec = () => this.pipe(input, { pooling: 'mean', normalize: true })
|
|
116
|
+
|
|
117
|
+
const output = this.queue
|
|
118
|
+
? await this.queue.add(exec)
|
|
119
|
+
: await exec()
|
|
120
|
+
|
|
121
|
+
const data = Array.from(output.data) as number[]
|
|
122
|
+
const hiddenDim = output.dims[output.dims.length - 1]
|
|
123
|
+
|
|
124
|
+
if (typeof input === 'string') {
|
|
125
|
+
return data
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const result: number[][] = []
|
|
129
|
+
for (let i = 0; i < data.length; i += hiddenDim) {
|
|
130
|
+
result.push(data.slice(i, i + hiddenDim))
|
|
131
|
+
}
|
|
132
|
+
return result
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
private setCache(key: string, value: number[]): void {
|
|
136
|
+
if (this.maxCacheSize === 0) return
|
|
137
|
+
|
|
138
|
+
if (this.cache.size >= this.maxCacheSize) {
|
|
139
|
+
const oldest = this.cache.keys().next()
|
|
140
|
+
if (!oldest.done) {
|
|
141
|
+
this.cache.delete(oldest.value)
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
this.cache.set(key, value)
|
|
145
|
+
}
|
|
146
|
+
}
|
package/src/index.ts
ADDED