@vivantel/virage-embedder-fastembed 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,62 @@
1
+ # @vivantel/virage-embedder-fastembed
2
+
3
+ [![npm version](https://img.shields.io/npm/v/@vivantel/virage-embedder-fastembed.svg)](https://www.npmjs.com/package/@vivantel/virage-embedder-fastembed)
4
+
5
+ Fast, local ONNX-based embedding provider for `@vivantel/virage-core` using [FastEmbed](https://github.com/Anush008/fastembed-js). No API key required.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ npm install @vivantel/virage-embedder-fastembed
11
+ ```
12
+
13
+ > **Note**: `fastembed` downloads ONNX Runtime binaries during install. If you're in a CI environment or want to skip GPU binaries: `npm install @vivantel/virage-embedder-fastembed --ignore-scripts` — the CPU runtime works fine without the postinstall scripts.
14
+
15
+ ## Usage
16
+
17
+ ```typescript
18
+ import { FastEmbedEmbedder } from "@vivantel/virage-embedder-fastembed";
19
+
20
+ const embedder = new FastEmbedEmbedder({
21
+ model: "BAAI/bge-small-en-v1.5",
22
+ dimensions: 384,
23
+ });
24
+ ```
25
+
26
+ The inner model is initialized lazily on the first `embed()` call.
27
+
28
+ ## JSON config
29
+
30
+ ```json
31
+ {
32
+ "embedder": {
33
+ "package": "@vivantel/virage-embedder-fastembed",
34
+ "config": {
35
+ "model": "BAAI/bge-small-en-v1.5",
36
+ "dimensions": 384
37
+ }
38
+ }
39
+ }
40
+ ```
41
+
42
+ ## Supported models
43
+
44
+ | Model | Dimensions | Notes |
45
+ | --- | --- | --- |
46
+ | `BAAI/bge-small-en-v1.5` | 384 | Default — fast and compact |
47
+ | `BAAI/bge-base-en-v1.5` | 768 | Better quality |
48
+ | `BAAI/bge-large-en-v1.5` | 1024 | Highest quality |
49
+ | `nomic-ai/nomic-embed-text-v1.5` | 768 | Long context (8192 tokens) |
50
+
51
+ ## Options
52
+
53
+ | Option | Type | Description |
54
+ | --- | --- | --- |
55
+ | `model` | `string` | Model name (default: `"BAAI/bge-small-en-v1.5"`) |
56
+ | `dimensions` | `number` | Output dimensions (auto-detected from model if omitted) |
57
+ | `cacheDir` | `string` | Local model cache directory |
58
+ | `showDownloadProgress` | `boolean` | Show download progress bar (default: `false`) |
59
+
60
+ ## License
61
+
62
+ MIT
@@ -0,0 +1,27 @@
1
+ import type { EmbeddingProvider, Logger } from "@vivantel/virage-core";
2
+ export interface FastEmbedEmbedderOptions {
3
+ /** FastEmbed model name, e.g. "BAAI/bge-small-en-v1.5" */
4
+ model?: string;
5
+ /** Output vector dimensions. Defaults based on model. */
6
+ dimensions?: number;
7
+ /** Local directory for caching downloaded models. */
8
+ cacheDir?: string;
9
+ showDownloadProgress?: boolean;
10
+ }
11
+ export declare class FastEmbedEmbedder implements EmbeddingProvider {
12
+ readonly name = "fastembed";
13
+ readonly dimensions: number;
14
+ readonly model: string;
15
+ readonly preferredBatchSize = 256;
16
+ private readonly cacheDir?;
17
+ private readonly showDownloadProgress;
18
+ private _inner;
19
+ private logger;
20
+ constructor(options?: FastEmbedEmbedderOptions);
21
+ setLogger(logger: Logger): void;
22
+ private getModel;
23
+ embed(text: string): Promise<number[]>;
24
+ embedBatch(texts: string[]): Promise<number[][]>;
25
+ healthCheck(): Promise<boolean>;
26
+ }
27
+ //# sourceMappingURL=embedder.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedder.d.ts","sourceRoot":"","sources":["../src/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAIvE,MAAM,WAAW,wBAAwB;IACvC,0DAA0D;IAC1D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,yDAAyD;IACzD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,qDAAqD;IACrD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,oBAAoB,CAAC,EAAE,OAAO,CAAC;CAChC;AAwBD,qBAAa,iBAAkB,YAAW,iBAAiB;IACzD,QAAQ,CAAC,IAAI,eAAe;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,kBAAkB,OAAO;IAElC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAU;IAC/C,OAAO,CAAC,MAAM,CAA+B;IAC7C,OAAO,CAAC,MAAM,CAAuB;gBAEzB,OAAO,GAAE,wBAA6B;IAQlD,SAAS,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI;YAIjB,QAAQ;IAsChB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAKtC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAahD,WAAW,IAAI,OAAO,CAAC,OAAO,CAAC;CAQtC"}
@@ -0,0 +1,84 @@
1
+ import { mkdir } from "fs/promises";
2
+ import path from "path";
3
+ const DEFAULT_MODEL = "fast-bge-small-en-v1.5";
4
+ const DEFAULT_DIMENSIONS = {
5
+ "fast-bge-small-en-v1.5": 384,
6
+ "fast-bge-base-en-v1.5": 768,
7
+ "fast-multilingual-e5-large": 1024,
8
+ "fast-all-MiniLM-L6-v2": 384,
9
+ };
10
+ export class FastEmbedEmbedder {
11
+ name = "fastembed";
12
+ dimensions;
13
+ model;
14
+ preferredBatchSize = 256;
15
+ cacheDir;
16
+ showDownloadProgress;
17
+ _inner = null;
18
+ logger = null;
19
+ constructor(options = {}) {
20
+ this.model = options.model ?? DEFAULT_MODEL;
21
+ this.dimensions =
22
+ options.dimensions ?? DEFAULT_DIMENSIONS[this.model] ?? 384;
23
+ this.cacheDir = options.cacheDir;
24
+ this.showDownloadProgress = options.showDownloadProgress ?? false;
25
+ }
26
+ setLogger(logger) {
27
+ this.logger = logger.withTag("fastembed");
28
+ }
29
+ async getModel() {
30
+ if (this._inner)
31
+ return this._inner;
32
+ this.logger?.info(`Loading model ${this.model}`);
33
+ // Ensure the parent directory exists before fastembed tries to write the
34
+ // model tarball to it. fastembed creates the top-level cacheDir itself but
35
+ // not nested vendor subdirectories, so "BAAI/bge-small-en-v1.5" needs
36
+ // "<cacheDir>/BAAI/" to pre-exist. We must NOT create the model directory
37
+ // itself or fastembed skips the download and fails with "Tokenizer file
38
+ // not found". Apply whether or not cacheDir was explicitly configured —
39
+ // fastembed defaults to "local_cache" when omitted.
40
+ const effectiveCacheDir = this.cacheDir ?? "local_cache";
41
+ const modelParent = path.dirname(this.model);
42
+ if (modelParent !== ".") {
43
+ await mkdir(path.join(effectiveCacheDir, modelParent), {
44
+ recursive: true,
45
+ });
46
+ }
47
+ this.logger?.debug(`Cache dir: ${effectiveCacheDir}`);
48
+ // Lazy import — consumers must install fastembed.
49
+ // Variable specifier prevents TS from erroring when fastembed isn't installed.
50
+ const mod = "fastembed";
51
+ const { FlagEmbedding } = (await import(mod));
52
+ this._inner = await FlagEmbedding.init({
53
+ model: this.model,
54
+ cacheDir: this.cacheDir,
55
+ showDownloadProgress: this.showDownloadProgress,
56
+ });
57
+ this.logger?.info(`Model ${this.model} ready (${this.dimensions}d, batch=${this.preferredBatchSize})`);
58
+ return this._inner;
59
+ }
60
+ async embed(text) {
61
+ const [result] = await this.embedBatch([text]);
62
+ return result;
63
+ }
64
+ async embedBatch(texts) {
65
+ const model = await this.getModel();
66
+ this.logger?.verbose(`Batch ${texts.length} texts`);
67
+ this.logger?.trace(`Text lengths: ${texts.map((t) => t.length).join(", ")}`);
68
+ const results = [];
69
+ for await (const batch of model.embed(texts)) {
70
+ results.push(...batch);
71
+ }
72
+ return results;
73
+ }
74
+ async healthCheck() {
75
+ try {
76
+ await this.getModel();
77
+ return true;
78
+ }
79
+ catch {
80
+ return false;
81
+ }
82
+ }
83
+ }
84
+ //# sourceMappingURL=embedder.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedder.js","sourceRoot":"","sources":["../src/embedder.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AACpC,OAAO,IAAI,MAAM,MAAM,CAAC;AA0BxB,MAAM,aAAa,GAAG,wBAAwB,CAAC;AAC/C,MAAM,kBAAkB,GAA2B;IACjD,wBAAwB,EAAE,GAAG;IAC7B,uBAAuB,EAAE,GAAG;IAC5B,4BAA4B,EAAE,IAAI;IAClC,uBAAuB,EAAE,GAAG;CAC7B,CAAC;AAEF,MAAM,OAAO,iBAAiB;IACnB,IAAI,GAAG,WAAW,CAAC;IACnB,UAAU,CAAS;IACnB,KAAK,CAAS;IACd,kBAAkB,GAAG,GAAG,CAAC;IAEjB,QAAQ,CAAU;IAClB,oBAAoB,CAAU;IACvC,MAAM,GAA0B,IAAI,CAAC;IACrC,MAAM,GAAkB,IAAI,CAAC;IAErC,YAAY,UAAoC,EAAE;QAChD,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,aAAa,CAAC;QAC5C,IAAI,CAAC,UAAU;YACb,OAAO,CAAC,UAAU,IAAI,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC;QAC9D,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;QACjC,IAAI,CAAC,oBAAoB,GAAG,OAAO,CAAC,oBAAoB,IAAI,KAAK,CAAC;IACpE,CAAC;IAED,SAAS,CAAC,MAAc;QACtB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;IAC5C,CAAC;IAEO,KAAK,CAAC,QAAQ;QACpB,IAAI,IAAI,CAAC,MAAM;YAAE,OAAO,IAAI,CAAC,MAAM,CAAC;QAEpC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,iBAAiB,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;QAEjD,yEAAyE;QACzE,2EAA2E;QAC3E,sEAAsE;QACtE,0EAA0E;QAC1E,wEAAwE;QACxE,wEAAwE;QACxE,oDAAoD;QACpD,MAAM,iBAAiB,GAAG,IAAI,CAAC,QAAQ,IAAI,aAAa,CAAC;QACzD,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC7C,IAAI,WAAW,KAAK,GAAG,EAAE,CAAC;YACxB,MAAM,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,iBAAiB,EAAE,WAAW,CAAC,EAAE;gBACrD,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;QACL,CAAC;QAED,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,cAAc,iBAAiB,EAAE,CAAC,CAAC;QAEtD,kDAAkD;QAClD,+EAA+E;QAC/E,MAAM,GAAG,GAAG,WAAW,CAAC;QACxB,MAAM,EAAE,aAAa,EAAE,GAAG,CAAC,MAAM,MAAM,CAAC,GAAG,CAAC,CAA+B,CAAC;QAC5E,IAAI,CAAC,MAAM,GAAG,MAAM,aAAa,CAAC,IAAI,CAAC;YACrC,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,oBAAoB,EAAE,IAAI,CAAC,oBAAoB;SAChD,CAAC,CAAC;QAEH,IAAI,CAAC,MAAM,EAAE,IAAI,CACf,SAAS,IAAI,CAAC,KAAK,WAAW,IAAI,CAAC,UAAU,YAAY,IAAI,CAAC,kBAAkB,GAAG,CACpF,CAAC;QACF,OAAO,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,MAAM,CAAC,MAAM,CAAC,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAC/C,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;QACpC,IAAI,CAAC,MAAM,EAAE,OAAO,CAAC,SAAS,KAAK,CAAC,MAAM,QAAQ,CAAC,CAAC;QACpD,IAAI,CAAC,MAAM,EAAE,KAAK,CAChB,iBAAiB,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CACzD,CAAC;QACF,MAAM,OAAO,GAAe,EAAE,CAAC;QAC/B,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;YAC7C,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;QACzB,CAAC;QACD,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,KAAK,CAAC,WAAW;QACf,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;YACtB,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,5 @@
1
+ export { FastEmbedEmbedder, type FastEmbedEmbedderOptions, } from "./embedder.js";
2
+ import type { EmbeddingProvider } from "@vivantel/virage-core";
3
+ /** Factory used by the JSON config loader. */
4
+ export declare function createEmbedder(config: Record<string, unknown>): EmbeddingProvider;
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,iBAAiB,EACjB,KAAK,wBAAwB,GAC9B,MAAM,eAAe,CAAC;AAEvB,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAM/D,8CAA8C;AAC9C,wBAAgB,cAAc,CAC5B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC9B,iBAAiB,CAanB"}
package/dist/index.js ADDED
@@ -0,0 +1,15 @@
1
+ export { FastEmbedEmbedder, } from "./embedder.js";
2
+ import { FastEmbedEmbedder, } from "./embedder.js";
3
+ /** Factory used by the JSON config loader. */
4
+ export function createEmbedder(config) {
5
+ const opts = {
6
+ model: typeof config.model === "string" ? config.model : undefined,
7
+ dimensions: typeof config.dimensions === "number" ? config.dimensions : undefined,
8
+ cacheDir: typeof config.cacheDir === "string" ? config.cacheDir : undefined,
9
+ showDownloadProgress: typeof config.showDownloadProgress === "boolean"
10
+ ? config.showDownloadProgress
11
+ : undefined,
12
+ };
13
+ return new FastEmbedEmbedder(opts);
14
+ }
15
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,iBAAiB,GAElB,MAAM,eAAe,CAAC;AAGvB,OAAO,EACL,iBAAiB,GAElB,MAAM,eAAe,CAAC;AAEvB,8CAA8C;AAC9C,MAAM,UAAU,cAAc,CAC5B,MAA+B;IAE/B,MAAM,IAAI,GAA6B;QACrC,KAAK,EAAE,OAAO,MAAM,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;QAClE,UAAU,EACR,OAAO,MAAM,CAAC,UAAU,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS;QACvE,QAAQ,EAAE,OAAO,MAAM,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;QAC3E,oBAAoB,EAClB,OAAO,MAAM,CAAC,oBAAoB,KAAK,SAAS;YAC9C,CAAC,CAAC,MAAM,CAAC,oBAAoB;YAC7B,CAAC,CAAC,SAAS;KAChB,CAAC;IAEF,OAAO,IAAI,iBAAiB,CAAC,IAAI,CAAC,CAAC;AACrC,CAAC"}
package/package.json ADDED
@@ -0,0 +1,62 @@
1
+ {
2
+ "name": "@vivantel/virage-embedder-fastembed",
3
+ "version": "0.2.0",
4
+ "description": "FastEmbed ONNX-based local embedding provider for @vivantel/rag-core",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "import": "./dist/index.js",
11
+ "types": "./dist/index.d.ts"
12
+ }
13
+ },
14
+ "files": [
15
+ "dist",
16
+ "README.md"
17
+ ],
18
+ "sideEffects": false,
19
+ "publishConfig": {
20
+ "access": "public"
21
+ },
22
+ "scripts": {
23
+ "build": "tsc",
24
+ "type-check": "tsc --noEmit",
25
+ "test": "vitest run",
26
+ "prepublishOnly": "npm run build",
27
+ "lint": "eslint src/",
28
+ "lint:fix": "eslint src/ --fix",
29
+ "format": "prettier --write \"src/**/*.ts\"",
30
+ "fix": "npm run lint:fix && npm run format"
31
+ },
32
+ "keywords": [
33
+ "rag",
34
+ "embeddings",
35
+ "fastembed",
36
+ "onnx",
37
+ "local",
38
+ "offline"
39
+ ],
40
+ "author": "Vivantel",
41
+ "license": "MIT",
42
+ "repository": {
43
+ "type": "git",
44
+ "url": "https://github.com/vivantel/virage",
45
+ "directory": "packages/virage-embedder-fastembed"
46
+ },
47
+ "dependencies": {
48
+ "fastembed": "^2.1.0"
49
+ },
50
+ "peerDependencies": {
51
+ "@vivantel/virage-core": "*"
52
+ },
53
+ "devDependencies": {
54
+ "@types/node": "^25.9.1",
55
+ "@vivantel/virage-core": "0.2.0",
56
+ "typescript": "^6.0.3",
57
+ "vitest": "^4.1.8"
58
+ },
59
+ "engines": {
60
+ "node": ">=18.0.0"
61
+ }
62
+ }