embedded-raptor 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -13
- package/dist/cli.cjs +1 -1
- package/dist/cli.mjs +1 -1
- package/dist/engine-CTvg_66e.cjs +404 -0
- package/dist/engine-ez6nFONK.mjs +365 -0
- package/dist/index.cjs +1 -1
- package/dist/index.mjs +1 -1
- package/package.json +4 -4
package/README.md
CHANGED
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
# Raptor
|
|
1
|
+
# Embedded Raptor
|
|
2
2
|
|
|
3
|
-
[](https://github.com/artmann/raptor/actions/workflows/ci.yml)
|
|
4
|
-
[](https://www.npmjs.com/package/raptor)
|
|
5
|
-
[](https://www.npmjs.com/package/raptor)
|
|
3
|
+
[](https://github.com/artmann/embedded-raptor/actions/workflows/ci.yml)
|
|
4
|
+
[](https://www.npmjs.com/package/embedded-raptor)
|
|
5
|
+
[](https://www.npmjs.com/package/embedded-raptor)
|
|
6
6
|
[](https://opensource.org/licenses/MIT)
|
|
7
7
|
[](https://www.typescriptlang.org/)
|
|
8
8
|
|
|
9
9
|
> A lightweight semantic search database with text embeddings for Node.js and
|
|
10
10
|
> Bun
|
|
11
11
|
|
|
12
|
-
Raptor lets you build semantic search into your applications with just a few
|
|
12
|
+
Embedded Raptor lets you build semantic search into your applications with just a few
|
|
13
13
|
lines of code. Store text, search by meaning, and find similar content—perfect
|
|
14
14
|
for RAG systems, chatbots, and recommendation engines.
|
|
15
15
|
|
|
16
|
-
## What is Raptor?
|
|
16
|
+
## What is Embedded Raptor?
|
|
17
17
|
|
|
18
|
-
Raptor is an embedding database that automatically converts text into vector
|
|
18
|
+
Embedded Raptor is an embedding database that automatically converts text into vector
|
|
19
19
|
embeddings and stores them in an efficient binary format. Instead of searching
|
|
20
20
|
by exact keywords, you can search by semantic similarity—finding documents that
|
|
21
21
|
mean the same thing, even if they use different words.
|
|
@@ -23,7 +23,7 @@ mean the same thing, even if they use different words.
|
|
|
23
23
|
**Example:** Search for "how to reset password" and find results like "forgot my
|
|
24
24
|
login credentials" or "change account password".
|
|
25
25
|
|
|
26
|
-
## Why Raptor?
|
|
26
|
+
## Why Embedded Raptor?
|
|
27
27
|
|
|
28
28
|
- **Simple API** - No complex setup, just store and search
|
|
29
29
|
- **Semantic Search** - Find content by meaning, not just keywords
|
|
@@ -44,10 +44,10 @@ login credentials" or "change account password".
|
|
|
44
44
|
|
|
45
45
|
```bash
|
|
46
46
|
# Using npm
|
|
47
|
-
npm install raptor
|
|
47
|
+
npm install embedded-raptor
|
|
48
48
|
|
|
49
49
|
# Using bun
|
|
50
|
-
bun add raptor
|
|
50
|
+
bun add embedded-raptor
|
|
51
51
|
```
|
|
52
52
|
|
|
53
53
|
## Quick Start
|
|
@@ -55,7 +55,7 @@ bun add raptor
|
|
|
55
55
|
### Programmatic API
|
|
56
56
|
|
|
57
57
|
```typescript
|
|
58
|
-
import { EmbeddingEngine } from 'raptor'
|
|
58
|
+
import { EmbeddingEngine } from 'embedded-raptor'
|
|
59
59
|
|
|
60
60
|
const engine = new EmbeddingEngine({
|
|
61
61
|
storePath: './my-database.raptor'
|
|
@@ -203,11 +203,11 @@ raptor store key1 "Some text" --storePath ./data/custom.raptor
|
|
|
203
203
|
|
|
204
204
|
## How It Works
|
|
205
205
|
|
|
206
|
-
1. **Text → Embeddings**: Raptor uses the BGE-Base-EN model to convert text into
|
|
206
|
+
1. **Text → Embeddings**: Embedded Raptor uses the BGE-Base-EN model to convert text into
|
|
207
207
|
768-dimensional vector embeddings
|
|
208
208
|
2. **Storage**: Embeddings are stored in an efficient binary format (.raptor
|
|
209
209
|
files)
|
|
210
|
-
3. **Search**: When you search, Raptor compares your query embedding against all
|
|
210
|
+
3. **Search**: When you search, Embedded Raptor compares your query embedding against all
|
|
211
211
|
stored embeddings using cosine similarity
|
|
212
212
|
4. **Results**: Returns the most similar results ranked by similarity score
|
|
213
213
|
|
package/dist/cli.cjs
CHANGED
package/dist/cli.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { t as EmbeddingEngine } from "./engine-
|
|
2
|
+
import { t as EmbeddingEngine } from "./engine-ez6nFONK.mjs";
|
|
3
3
|
import { readFileSync } from "node:fs";
|
|
4
4
|
import { dirname, resolve } from "node:path";
|
|
5
5
|
import { cli, command } from "cleye";
|
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
//#region rolldown:runtime
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __copyProps = (to, from, except, desc) => {
|
|
9
|
+
if (from && typeof from === "object" || typeof from === "function") for (var keys = __getOwnPropNames(from), i = 0, n = keys.length, key; i < n; i++) {
|
|
10
|
+
key = keys[i];
|
|
11
|
+
if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, {
|
|
12
|
+
get: ((k) => from[k]).bind(null, key),
|
|
13
|
+
enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", {
|
|
19
|
+
value: mod,
|
|
20
|
+
enumerable: true
|
|
21
|
+
}) : target, mod));
|
|
22
|
+
|
|
23
|
+
//#endregion
|
|
24
|
+
let __xenova_transformers = require("@xenova/transformers");
|
|
25
|
+
__xenova_transformers = __toESM(__xenova_transformers);
|
|
26
|
+
let node_fs = require("node:fs");
|
|
27
|
+
node_fs = __toESM(node_fs);
|
|
28
|
+
let node_fs_promises = require("node:fs/promises");
|
|
29
|
+
node_fs_promises = __toESM(node_fs_promises);
|
|
30
|
+
let node_path = require("node:path");
|
|
31
|
+
node_path = __toESM(node_path);
|
|
32
|
+
let tiny_invariant = require("tiny-invariant");
|
|
33
|
+
tiny_invariant = __toESM(tiny_invariant);
|
|
34
|
+
|
|
35
|
+
//#region src/binary-format.ts
|
|
36
|
+
const magicBytes = "EMBD";
|
|
37
|
+
const currentVersion = 1;
|
|
38
|
+
const headerSize = 16;
|
|
39
|
+
function calculateRecordLength(keyLength, dimension) {
|
|
40
|
+
return 2 + keyLength + dimension * 4 + 4;
|
|
41
|
+
}
|
|
42
|
+
async function writeHeader(filePath, dimension) {
|
|
43
|
+
const buffer = new ArrayBuffer(headerSize);
|
|
44
|
+
const view = new DataView(buffer);
|
|
45
|
+
for (let i = 0; i < 4; i++) view.setUint8(i, magicBytes.charCodeAt(i));
|
|
46
|
+
view.setUint16(4, currentVersion, true);
|
|
47
|
+
view.setUint32(6, dimension, true);
|
|
48
|
+
await (0, node_fs_promises.writeFile)(filePath, new Uint8Array(buffer));
|
|
49
|
+
}
|
|
50
|
+
async function readHeader(filePath) {
|
|
51
|
+
const file = await (0, node_fs_promises.open)(filePath, "r");
|
|
52
|
+
try {
|
|
53
|
+
const buffer = new ArrayBuffer(headerSize);
|
|
54
|
+
const uint8View = new Uint8Array(buffer);
|
|
55
|
+
const { bytesRead } = await file.read(uint8View, 0, headerSize, 0);
|
|
56
|
+
if (bytesRead < headerSize) throw new Error(`File too small: expected at least ${headerSize} bytes, got ${bytesRead}`);
|
|
57
|
+
const view = new DataView(buffer);
|
|
58
|
+
const magic = new Uint8Array(buffer, 0, 4);
|
|
59
|
+
const magicString = String.fromCharCode(...magic);
|
|
60
|
+
if (magicString !== magicBytes) throw new Error(`Invalid file format: magic bytes expected "${magicBytes}", got "${magicString}"`);
|
|
61
|
+
const version = view.getUint16(4, true);
|
|
62
|
+
if (version !== currentVersion) throw new Error(`Unsupported version: ${version}. Current version is ${currentVersion}`);
|
|
63
|
+
return {
|
|
64
|
+
version,
|
|
65
|
+
dimension: view.getUint32(6, true)
|
|
66
|
+
};
|
|
67
|
+
} finally {
|
|
68
|
+
await file.close();
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
async function writeRecord(filePath, key, embedding) {
|
|
72
|
+
const keyBytes = new TextEncoder().encode(key);
|
|
73
|
+
const keyLength = keyBytes.length;
|
|
74
|
+
const dimension = embedding.length;
|
|
75
|
+
const recordLength = calculateRecordLength(keyLength, dimension);
|
|
76
|
+
const buffer = new ArrayBuffer(recordLength);
|
|
77
|
+
const view = new DataView(buffer);
|
|
78
|
+
const uint8View = new Uint8Array(buffer);
|
|
79
|
+
let offset = 0;
|
|
80
|
+
view.setUint16(offset, keyLength, true);
|
|
81
|
+
offset += 2;
|
|
82
|
+
uint8View.set(keyBytes, offset);
|
|
83
|
+
offset += keyLength;
|
|
84
|
+
for (let i = 0; i < dimension; i++) {
|
|
85
|
+
view.setFloat32(offset, embedding[i], true);
|
|
86
|
+
offset += 4;
|
|
87
|
+
}
|
|
88
|
+
view.setUint32(offset, recordLength, true);
|
|
89
|
+
await (0, node_fs_promises.appendFile)(filePath, uint8View);
|
|
90
|
+
}
|
|
91
|
+
async function writeRecords(filePath, records) {
|
|
92
|
+
if (records.length === 0) return;
|
|
93
|
+
let totalSize = 0;
|
|
94
|
+
const recordBuffers = [];
|
|
95
|
+
for (const record of records) {
|
|
96
|
+
const keyBytes = new TextEncoder().encode(record.key);
|
|
97
|
+
const keyLength = keyBytes.length;
|
|
98
|
+
const dimension = record.embedding.length;
|
|
99
|
+
const recordLength = calculateRecordLength(keyLength, dimension);
|
|
100
|
+
const buffer = new ArrayBuffer(recordLength);
|
|
101
|
+
const view = new DataView(buffer);
|
|
102
|
+
const uint8View = new Uint8Array(buffer);
|
|
103
|
+
let offset = 0;
|
|
104
|
+
view.setUint16(offset, keyLength, true);
|
|
105
|
+
offset += 2;
|
|
106
|
+
uint8View.set(keyBytes, offset);
|
|
107
|
+
offset += keyLength;
|
|
108
|
+
for (let i = 0; i < dimension; i++) {
|
|
109
|
+
view.setFloat32(offset, record.embedding[i], true);
|
|
110
|
+
offset += 4;
|
|
111
|
+
}
|
|
112
|
+
view.setUint32(offset, recordLength, true);
|
|
113
|
+
recordBuffers.push(uint8View);
|
|
114
|
+
totalSize += recordLength;
|
|
115
|
+
}
|
|
116
|
+
const combinedBuffer = new Uint8Array(totalSize);
|
|
117
|
+
let position = 0;
|
|
118
|
+
for (const buffer of recordBuffers) {
|
|
119
|
+
combinedBuffer.set(buffer, position);
|
|
120
|
+
position += buffer.length;
|
|
121
|
+
}
|
|
122
|
+
await (0, node_fs_promises.appendFile)(filePath, combinedBuffer);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
//#endregion
|
|
126
|
+
//#region src/binary-file-reader.ts
|
|
127
|
+
const chunkSize = 64 * 1024;
|
|
128
|
+
var BinaryFileReader = class {
|
|
129
|
+
constructor(storePath) {
|
|
130
|
+
this.storePath = storePath;
|
|
131
|
+
}
|
|
132
|
+
async *entries() {
|
|
133
|
+
const dimension = (await readHeader(this.storePath)).dimension;
|
|
134
|
+
const fileSize = (await (0, node_fs_promises.stat)(this.storePath)).size;
|
|
135
|
+
if (fileSize <= 16) return;
|
|
136
|
+
const seenKeys = /* @__PURE__ */ new Set();
|
|
137
|
+
const file = await (0, node_fs_promises.open)(this.storePath, "r");
|
|
138
|
+
try {
|
|
139
|
+
let currentPosition = fileSize;
|
|
140
|
+
while (currentPosition > 16) {
|
|
141
|
+
const chunkStart = Math.max(16, currentPosition - chunkSize);
|
|
142
|
+
const bytesToRead = currentPosition - chunkStart;
|
|
143
|
+
const buffer = new ArrayBuffer(bytesToRead);
|
|
144
|
+
const uint8View = new Uint8Array(buffer);
|
|
145
|
+
await file.read(uint8View, 0, bytesToRead, chunkStart);
|
|
146
|
+
let chunkPosition = bytesToRead;
|
|
147
|
+
while (chunkPosition > 0) {
|
|
148
|
+
if (chunkPosition < 4) break;
|
|
149
|
+
const recordLength = new DataView(buffer).getUint32(chunkPosition - 4, true);
|
|
150
|
+
if (recordLength > chunkPosition) {
|
|
151
|
+
const recordStart = chunkStart + chunkPosition - recordLength;
|
|
152
|
+
const recordBuffer = new ArrayBuffer(recordLength);
|
|
153
|
+
const recordUint8View = new Uint8Array(recordBuffer);
|
|
154
|
+
await file.read(recordUint8View, 0, recordLength, recordStart);
|
|
155
|
+
const record = this.parseRecord(recordBuffer, dimension);
|
|
156
|
+
if (record && !seenKeys.has(record.key)) {
|
|
157
|
+
seenKeys.add(record.key);
|
|
158
|
+
yield record;
|
|
159
|
+
}
|
|
160
|
+
chunkPosition -= recordLength;
|
|
161
|
+
} else {
|
|
162
|
+
const recordStart = chunkPosition - recordLength;
|
|
163
|
+
const recordView = new DataView(buffer, recordStart, recordLength);
|
|
164
|
+
const record = this.parseRecordFromView(recordView, dimension, recordLength);
|
|
165
|
+
if (record && !seenKeys.has(record.key)) {
|
|
166
|
+
seenKeys.add(record.key);
|
|
167
|
+
yield record;
|
|
168
|
+
}
|
|
169
|
+
chunkPosition -= recordLength;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
currentPosition = chunkStart + chunkPosition;
|
|
173
|
+
}
|
|
174
|
+
} finally {
|
|
175
|
+
await file.close();
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
parseRecord(buffer, dimension) {
|
|
179
|
+
const view = new DataView(buffer);
|
|
180
|
+
return this.parseRecordFromView(view, dimension, buffer.byteLength);
|
|
181
|
+
}
|
|
182
|
+
parseRecordFromView(view, dimension, _recordLength) {
|
|
183
|
+
try {
|
|
184
|
+
let offset = 0;
|
|
185
|
+
const keyLength = view.getUint16(offset, true);
|
|
186
|
+
offset += 2;
|
|
187
|
+
const keyBytes = new Uint8Array(view.buffer, view.byteOffset + offset, keyLength);
|
|
188
|
+
const key = new TextDecoder().decode(keyBytes);
|
|
189
|
+
offset += keyLength;
|
|
190
|
+
const embedding = new Float32Array(dimension);
|
|
191
|
+
for (let i = 0; i < dimension; i++) {
|
|
192
|
+
embedding[i] = view.getFloat32(offset, true);
|
|
193
|
+
offset += 4;
|
|
194
|
+
}
|
|
195
|
+
return {
|
|
196
|
+
key,
|
|
197
|
+
text: "",
|
|
198
|
+
embedding: Array.from(embedding),
|
|
199
|
+
timestamp: 0
|
|
200
|
+
};
|
|
201
|
+
} catch {
|
|
202
|
+
return null;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
//#endregion
|
|
208
|
+
//#region src/candidate-set.ts
|
|
209
|
+
var CandidateSet = class {
|
|
210
|
+
size;
|
|
211
|
+
entries = [];
|
|
212
|
+
constructor(size = 5) {
|
|
213
|
+
(0, tiny_invariant.default)(size > 0, "Size must be a positive integer.");
|
|
214
|
+
this.size = size;
|
|
215
|
+
}
|
|
216
|
+
add(key, value) {
|
|
217
|
+
(0, tiny_invariant.default)(key, "Key must be provided.");
|
|
218
|
+
(0, tiny_invariant.default)(value, "Value must be provided.");
|
|
219
|
+
if (this.entries.length < this.size) {
|
|
220
|
+
this.entries.push(new CandidateSetEntry(key, value));
|
|
221
|
+
return;
|
|
222
|
+
}
|
|
223
|
+
let minIndex = 0;
|
|
224
|
+
let minValue = this.entries[0].value;
|
|
225
|
+
for (let i = 1; i < this.entries.length; i++) if (this.entries[i].value < minValue) {
|
|
226
|
+
minValue = this.entries[i].value;
|
|
227
|
+
minIndex = i;
|
|
228
|
+
}
|
|
229
|
+
if (value > minValue) this.entries[minIndex] = new CandidateSetEntry(key, value);
|
|
230
|
+
}
|
|
231
|
+
count() {
|
|
232
|
+
return this.entries.length;
|
|
233
|
+
}
|
|
234
|
+
getEntries() {
|
|
235
|
+
return this.entries.slice().sort((a, b) => b.value - a.value);
|
|
236
|
+
}
|
|
237
|
+
getKeys() {
|
|
238
|
+
return this.getEntries().map((entry) => entry.key);
|
|
239
|
+
}
|
|
240
|
+
};
|
|
241
|
+
var CandidateSetEntry = class {
|
|
242
|
+
key;
|
|
243
|
+
value;
|
|
244
|
+
constructor(key, value) {
|
|
245
|
+
this.key = key;
|
|
246
|
+
this.value = value;
|
|
247
|
+
}
|
|
248
|
+
};
|
|
249
|
+
|
|
250
|
+
//#endregion
|
|
251
|
+
//#region src/engine.ts
|
|
252
|
+
var EmbeddingEngine = class {
|
|
253
|
+
fileReader;
|
|
254
|
+
storePath;
|
|
255
|
+
extractor;
|
|
256
|
+
constructor(options) {
|
|
257
|
+
this.storePath = options.storePath;
|
|
258
|
+
this.fileReader = new BinaryFileReader(options.storePath);
|
|
259
|
+
__xenova_transformers.env.cacheDir = "./.cache";
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Gets or initializes the embedding model
|
|
263
|
+
* Caches the model instance to avoid repeated initialization overhead
|
|
264
|
+
* @returns Initialized feature extraction pipeline
|
|
265
|
+
*/
|
|
266
|
+
async getOrInitModel() {
|
|
267
|
+
this.extractor ??= await (0, __xenova_transformers.pipeline)("feature-extraction", "Xenova/bge-small-en-v1.5");
|
|
268
|
+
return this.extractor;
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* Generates embedding from text using Transformers.js bge-small-en-v1.5 model
|
|
272
|
+
* @param text - Text to embed
|
|
273
|
+
* @returns 384-dimensional embedding vector (normalized)
|
|
274
|
+
*/
|
|
275
|
+
async generateEmbedding(text) {
|
|
276
|
+
const output = await (await this.getOrInitModel())(text, {
|
|
277
|
+
pooling: "mean",
|
|
278
|
+
normalize: true
|
|
279
|
+
});
|
|
280
|
+
return Array.from(output.data);
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Retrieves an embedding entry by key
|
|
284
|
+
* Reads the file in reverse order for efficiency (most recent first)
|
|
285
|
+
* @param key - Unique identifier for the entry
|
|
286
|
+
* @returns The embedding entry, or null if not found
|
|
287
|
+
*/
|
|
288
|
+
async get(key) {
|
|
289
|
+
(0, tiny_invariant.default)(key, "Key must be provided.");
|
|
290
|
+
if (!(0, node_fs.existsSync)(this.storePath)) return null;
|
|
291
|
+
for await (const entry of this.fileReader.entries()) if (entry.key === key) return entry;
|
|
292
|
+
return null;
|
|
293
|
+
}
|
|
294
|
+
/**
|
|
295
|
+
* Searches for similar embeddings using cosine similarity
|
|
296
|
+
* @param query - Text query to search for
|
|
297
|
+
* @param limit - Maximum number of results to return (default: 10)
|
|
298
|
+
* @param minSimilarity - Minimum similarity threshold (default: 0, range: -1 to 1)
|
|
299
|
+
* @returns Array of search results sorted by similarity (highest first)
|
|
300
|
+
*/
|
|
301
|
+
async search(query, limit = 10, minSimilarity = .5) {
|
|
302
|
+
(0, tiny_invariant.default)(query, "Query text must be provided.");
|
|
303
|
+
(0, tiny_invariant.default)(limit > 0, "Limit must be a positive integer.");
|
|
304
|
+
(0, tiny_invariant.default)(minSimilarity >= 0 && minSimilarity <= 1, "minSimilarity must be between 0 and 1.");
|
|
305
|
+
if (!(0, node_fs.existsSync)(this.storePath)) return [];
|
|
306
|
+
const queryEmbedding = await this.generateEmbedding(query);
|
|
307
|
+
const candidateSet = new CandidateSet(limit);
|
|
308
|
+
for await (const entry of this.fileReader.entries()) {
|
|
309
|
+
const similarity = this.cosineSimilarity(queryEmbedding, entry.embedding);
|
|
310
|
+
if (similarity < minSimilarity) continue;
|
|
311
|
+
candidateSet.add(entry.key, similarity);
|
|
312
|
+
}
|
|
313
|
+
return candidateSet.getEntries().map((entry) => ({
|
|
314
|
+
key: entry.key,
|
|
315
|
+
similarity: entry.value
|
|
316
|
+
}));
|
|
317
|
+
}
|
|
318
|
+
/**
|
|
319
|
+
* Stores a text embedding in the binary append-only file
|
|
320
|
+
* Creates header on first write
|
|
321
|
+
* @param key - Unique identifier for this entry
|
|
322
|
+
* @param text - Text to embed and store
|
|
323
|
+
*/
|
|
324
|
+
async store(key, text) {
|
|
325
|
+
const embedding = await this.generateEmbedding(text);
|
|
326
|
+
const embeddingFloat32 = new Float32Array(embedding);
|
|
327
|
+
await (0, node_fs_promises.mkdir)((0, node_path.dirname)(this.storePath), { recursive: true });
|
|
328
|
+
if (!(0, node_fs.existsSync)(this.storePath)) await writeHeader(this.storePath, embedding.length);
|
|
329
|
+
await writeRecord(this.storePath, key, embeddingFloat32);
|
|
330
|
+
}
|
|
331
|
+
/**
|
|
332
|
+
* Stores multiple text embeddings in batch
|
|
333
|
+
* More efficient than calling store() multiple times
|
|
334
|
+
* Generates embeddings in a single batch and writes all records at once
|
|
335
|
+
* @param items - Array of {key, text} objects to store
|
|
336
|
+
*/
|
|
337
|
+
async storeMany(items) {
|
|
338
|
+
(0, tiny_invariant.default)(items.length > 0, "Items array must not be empty.");
|
|
339
|
+
const texts = items.map((item) => item.text);
|
|
340
|
+
const output = await (await this.getOrInitModel())(texts, {
|
|
341
|
+
pooling: "mean",
|
|
342
|
+
normalize: true
|
|
343
|
+
});
|
|
344
|
+
const batchSize = output.dims[0];
|
|
345
|
+
const embeddingDim = output.dims[1];
|
|
346
|
+
const embeddingsList = [];
|
|
347
|
+
for (let i = 0; i < batchSize; i++) {
|
|
348
|
+
const start = i * embeddingDim;
|
|
349
|
+
const end = start + embeddingDim;
|
|
350
|
+
const data = Array.from(output.data);
|
|
351
|
+
embeddingsList.push(data.slice(start, end));
|
|
352
|
+
}
|
|
353
|
+
(0, tiny_invariant.default)(embeddingsList.length === items.length, "Number of embeddings must match number of items.");
|
|
354
|
+
await (0, node_fs_promises.mkdir)((0, node_path.dirname)(this.storePath), { recursive: true });
|
|
355
|
+
if (!(0, node_fs.existsSync)(this.storePath)) await writeHeader(this.storePath, embeddingsList[0].length);
|
|
356
|
+
const records = items.map((item, index) => ({
|
|
357
|
+
key: item.key,
|
|
358
|
+
embedding: new Float32Array(embeddingsList[index])
|
|
359
|
+
}));
|
|
360
|
+
await writeRecords(this.storePath, records);
|
|
361
|
+
}
|
|
362
|
+
/**
|
|
363
|
+
* Calculates cosine similarity between two embeddings
|
|
364
|
+
* @param a - First embedding vector
|
|
365
|
+
* @param b - Second embedding vector
|
|
366
|
+
* @returns Cosine similarity score between -1 and 1 (1 = identical, -1 = opposite)
|
|
367
|
+
*/
|
|
368
|
+
cosineSimilarity(a, b) {
|
|
369
|
+
if (a.length !== b.length) throw new Error("Embeddings must have the same dimensions");
|
|
370
|
+
let dotProduct = 0;
|
|
371
|
+
let magnitudeA = 0;
|
|
372
|
+
let magnitudeB = 0;
|
|
373
|
+
for (let i = 0; i < a.length; i++) {
|
|
374
|
+
dotProduct += a[i] * b[i];
|
|
375
|
+
magnitudeA += a[i] * a[i];
|
|
376
|
+
magnitudeB += b[i] * b[i];
|
|
377
|
+
}
|
|
378
|
+
magnitudeA = Math.sqrt(magnitudeA);
|
|
379
|
+
magnitudeB = Math.sqrt(magnitudeB);
|
|
380
|
+
if (magnitudeA === 0 || magnitudeB === 0) return 0;
|
|
381
|
+
return dotProduct / (magnitudeA * magnitudeB);
|
|
382
|
+
}
|
|
383
|
+
/**
|
|
384
|
+
* Disposes of the cached embedding model and releases resources
|
|
385
|
+
* Call this when you're done using the engine to free up memory
|
|
386
|
+
*/
|
|
387
|
+
dispose() {
|
|
388
|
+
this.extractor = void 0;
|
|
389
|
+
}
|
|
390
|
+
};
|
|
391
|
+
|
|
392
|
+
//#endregion
|
|
393
|
+
Object.defineProperty(exports, 'EmbeddingEngine', {
|
|
394
|
+
enumerable: true,
|
|
395
|
+
get: function () {
|
|
396
|
+
return EmbeddingEngine;
|
|
397
|
+
}
|
|
398
|
+
});
|
|
399
|
+
Object.defineProperty(exports, '__toESM', {
|
|
400
|
+
enumerable: true,
|
|
401
|
+
get: function () {
|
|
402
|
+
return __toESM;
|
|
403
|
+
}
|
|
404
|
+
});
|
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
import { env, pipeline } from "@xenova/transformers";
|
|
2
|
+
import { existsSync } from "node:fs";
|
|
3
|
+
import { appendFile, mkdir, open, stat, writeFile } from "node:fs/promises";
|
|
4
|
+
import { dirname } from "node:path";
|
|
5
|
+
import invariant from "tiny-invariant";
|
|
6
|
+
|
|
7
|
+
//#region src/binary-format.ts
|
|
8
|
+
const magicBytes = "EMBD";
|
|
9
|
+
const currentVersion = 1;
|
|
10
|
+
const headerSize = 16;
|
|
11
|
+
function calculateRecordLength(keyLength, dimension) {
|
|
12
|
+
return 2 + keyLength + dimension * 4 + 4;
|
|
13
|
+
}
|
|
14
|
+
async function writeHeader(filePath, dimension) {
|
|
15
|
+
const buffer = new ArrayBuffer(headerSize);
|
|
16
|
+
const view = new DataView(buffer);
|
|
17
|
+
for (let i = 0; i < 4; i++) view.setUint8(i, magicBytes.charCodeAt(i));
|
|
18
|
+
view.setUint16(4, currentVersion, true);
|
|
19
|
+
view.setUint32(6, dimension, true);
|
|
20
|
+
await writeFile(filePath, new Uint8Array(buffer));
|
|
21
|
+
}
|
|
22
|
+
async function readHeader(filePath) {
|
|
23
|
+
const file = await open(filePath, "r");
|
|
24
|
+
try {
|
|
25
|
+
const buffer = new ArrayBuffer(headerSize);
|
|
26
|
+
const uint8View = new Uint8Array(buffer);
|
|
27
|
+
const { bytesRead } = await file.read(uint8View, 0, headerSize, 0);
|
|
28
|
+
if (bytesRead < headerSize) throw new Error(`File too small: expected at least ${headerSize} bytes, got ${bytesRead}`);
|
|
29
|
+
const view = new DataView(buffer);
|
|
30
|
+
const magic = new Uint8Array(buffer, 0, 4);
|
|
31
|
+
const magicString = String.fromCharCode(...magic);
|
|
32
|
+
if (magicString !== magicBytes) throw new Error(`Invalid file format: magic bytes expected "${magicBytes}", got "${magicString}"`);
|
|
33
|
+
const version = view.getUint16(4, true);
|
|
34
|
+
if (version !== currentVersion) throw new Error(`Unsupported version: ${version}. Current version is ${currentVersion}`);
|
|
35
|
+
return {
|
|
36
|
+
version,
|
|
37
|
+
dimension: view.getUint32(6, true)
|
|
38
|
+
};
|
|
39
|
+
} finally {
|
|
40
|
+
await file.close();
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
async function writeRecord(filePath, key, embedding) {
|
|
44
|
+
const keyBytes = new TextEncoder().encode(key);
|
|
45
|
+
const keyLength = keyBytes.length;
|
|
46
|
+
const dimension = embedding.length;
|
|
47
|
+
const recordLength = calculateRecordLength(keyLength, dimension);
|
|
48
|
+
const buffer = new ArrayBuffer(recordLength);
|
|
49
|
+
const view = new DataView(buffer);
|
|
50
|
+
const uint8View = new Uint8Array(buffer);
|
|
51
|
+
let offset = 0;
|
|
52
|
+
view.setUint16(offset, keyLength, true);
|
|
53
|
+
offset += 2;
|
|
54
|
+
uint8View.set(keyBytes, offset);
|
|
55
|
+
offset += keyLength;
|
|
56
|
+
for (let i = 0; i < dimension; i++) {
|
|
57
|
+
view.setFloat32(offset, embedding[i], true);
|
|
58
|
+
offset += 4;
|
|
59
|
+
}
|
|
60
|
+
view.setUint32(offset, recordLength, true);
|
|
61
|
+
await appendFile(filePath, uint8View);
|
|
62
|
+
}
|
|
63
|
+
async function writeRecords(filePath, records) {
|
|
64
|
+
if (records.length === 0) return;
|
|
65
|
+
let totalSize = 0;
|
|
66
|
+
const recordBuffers = [];
|
|
67
|
+
for (const record of records) {
|
|
68
|
+
const keyBytes = new TextEncoder().encode(record.key);
|
|
69
|
+
const keyLength = keyBytes.length;
|
|
70
|
+
const dimension = record.embedding.length;
|
|
71
|
+
const recordLength = calculateRecordLength(keyLength, dimension);
|
|
72
|
+
const buffer = new ArrayBuffer(recordLength);
|
|
73
|
+
const view = new DataView(buffer);
|
|
74
|
+
const uint8View = new Uint8Array(buffer);
|
|
75
|
+
let offset = 0;
|
|
76
|
+
view.setUint16(offset, keyLength, true);
|
|
77
|
+
offset += 2;
|
|
78
|
+
uint8View.set(keyBytes, offset);
|
|
79
|
+
offset += keyLength;
|
|
80
|
+
for (let i = 0; i < dimension; i++) {
|
|
81
|
+
view.setFloat32(offset, record.embedding[i], true);
|
|
82
|
+
offset += 4;
|
|
83
|
+
}
|
|
84
|
+
view.setUint32(offset, recordLength, true);
|
|
85
|
+
recordBuffers.push(uint8View);
|
|
86
|
+
totalSize += recordLength;
|
|
87
|
+
}
|
|
88
|
+
const combinedBuffer = new Uint8Array(totalSize);
|
|
89
|
+
let position = 0;
|
|
90
|
+
for (const buffer of recordBuffers) {
|
|
91
|
+
combinedBuffer.set(buffer, position);
|
|
92
|
+
position += buffer.length;
|
|
93
|
+
}
|
|
94
|
+
await appendFile(filePath, combinedBuffer);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
//#endregion
|
|
98
|
+
//#region src/binary-file-reader.ts
|
|
99
|
+
const chunkSize = 64 * 1024;
|
|
100
|
+
var BinaryFileReader = class {
|
|
101
|
+
constructor(storePath) {
|
|
102
|
+
this.storePath = storePath;
|
|
103
|
+
}
|
|
104
|
+
async *entries() {
|
|
105
|
+
const dimension = (await readHeader(this.storePath)).dimension;
|
|
106
|
+
const fileSize = (await stat(this.storePath)).size;
|
|
107
|
+
if (fileSize <= 16) return;
|
|
108
|
+
const seenKeys = /* @__PURE__ */ new Set();
|
|
109
|
+
const file = await open(this.storePath, "r");
|
|
110
|
+
try {
|
|
111
|
+
let currentPosition = fileSize;
|
|
112
|
+
while (currentPosition > 16) {
|
|
113
|
+
const chunkStart = Math.max(16, currentPosition - chunkSize);
|
|
114
|
+
const bytesToRead = currentPosition - chunkStart;
|
|
115
|
+
const buffer = new ArrayBuffer(bytesToRead);
|
|
116
|
+
const uint8View = new Uint8Array(buffer);
|
|
117
|
+
await file.read(uint8View, 0, bytesToRead, chunkStart);
|
|
118
|
+
let chunkPosition = bytesToRead;
|
|
119
|
+
while (chunkPosition > 0) {
|
|
120
|
+
if (chunkPosition < 4) break;
|
|
121
|
+
const recordLength = new DataView(buffer).getUint32(chunkPosition - 4, true);
|
|
122
|
+
if (recordLength > chunkPosition) {
|
|
123
|
+
const recordStart = chunkStart + chunkPosition - recordLength;
|
|
124
|
+
const recordBuffer = new ArrayBuffer(recordLength);
|
|
125
|
+
const recordUint8View = new Uint8Array(recordBuffer);
|
|
126
|
+
await file.read(recordUint8View, 0, recordLength, recordStart);
|
|
127
|
+
const record = this.parseRecord(recordBuffer, dimension);
|
|
128
|
+
if (record && !seenKeys.has(record.key)) {
|
|
129
|
+
seenKeys.add(record.key);
|
|
130
|
+
yield record;
|
|
131
|
+
}
|
|
132
|
+
chunkPosition -= recordLength;
|
|
133
|
+
} else {
|
|
134
|
+
const recordStart = chunkPosition - recordLength;
|
|
135
|
+
const recordView = new DataView(buffer, recordStart, recordLength);
|
|
136
|
+
const record = this.parseRecordFromView(recordView, dimension, recordLength);
|
|
137
|
+
if (record && !seenKeys.has(record.key)) {
|
|
138
|
+
seenKeys.add(record.key);
|
|
139
|
+
yield record;
|
|
140
|
+
}
|
|
141
|
+
chunkPosition -= recordLength;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
currentPosition = chunkStart + chunkPosition;
|
|
145
|
+
}
|
|
146
|
+
} finally {
|
|
147
|
+
await file.close();
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
parseRecord(buffer, dimension) {
|
|
151
|
+
const view = new DataView(buffer);
|
|
152
|
+
return this.parseRecordFromView(view, dimension, buffer.byteLength);
|
|
153
|
+
}
|
|
154
|
+
parseRecordFromView(view, dimension, _recordLength) {
|
|
155
|
+
try {
|
|
156
|
+
let offset = 0;
|
|
157
|
+
const keyLength = view.getUint16(offset, true);
|
|
158
|
+
offset += 2;
|
|
159
|
+
const keyBytes = new Uint8Array(view.buffer, view.byteOffset + offset, keyLength);
|
|
160
|
+
const key = new TextDecoder().decode(keyBytes);
|
|
161
|
+
offset += keyLength;
|
|
162
|
+
const embedding = new Float32Array(dimension);
|
|
163
|
+
for (let i = 0; i < dimension; i++) {
|
|
164
|
+
embedding[i] = view.getFloat32(offset, true);
|
|
165
|
+
offset += 4;
|
|
166
|
+
}
|
|
167
|
+
return {
|
|
168
|
+
key,
|
|
169
|
+
text: "",
|
|
170
|
+
embedding: Array.from(embedding),
|
|
171
|
+
timestamp: 0
|
|
172
|
+
};
|
|
173
|
+
} catch {
|
|
174
|
+
return null;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
//#endregion
|
|
180
|
+
//#region src/candidate-set.ts
|
|
181
|
+
var CandidateSet = class {
|
|
182
|
+
size;
|
|
183
|
+
entries = [];
|
|
184
|
+
constructor(size = 5) {
|
|
185
|
+
invariant(size > 0, "Size must be a positive integer.");
|
|
186
|
+
this.size = size;
|
|
187
|
+
}
|
|
188
|
+
add(key, value) {
|
|
189
|
+
invariant(key, "Key must be provided.");
|
|
190
|
+
invariant(value, "Value must be provided.");
|
|
191
|
+
if (this.entries.length < this.size) {
|
|
192
|
+
this.entries.push(new CandidateSetEntry(key, value));
|
|
193
|
+
return;
|
|
194
|
+
}
|
|
195
|
+
let minIndex = 0;
|
|
196
|
+
let minValue = this.entries[0].value;
|
|
197
|
+
for (let i = 1; i < this.entries.length; i++) if (this.entries[i].value < minValue) {
|
|
198
|
+
minValue = this.entries[i].value;
|
|
199
|
+
minIndex = i;
|
|
200
|
+
}
|
|
201
|
+
if (value > minValue) this.entries[minIndex] = new CandidateSetEntry(key, value);
|
|
202
|
+
}
|
|
203
|
+
count() {
|
|
204
|
+
return this.entries.length;
|
|
205
|
+
}
|
|
206
|
+
getEntries() {
|
|
207
|
+
return this.entries.slice().sort((a, b) => b.value - a.value);
|
|
208
|
+
}
|
|
209
|
+
getKeys() {
|
|
210
|
+
return this.getEntries().map((entry) => entry.key);
|
|
211
|
+
}
|
|
212
|
+
};
|
|
213
|
+
var CandidateSetEntry = class {
|
|
214
|
+
key;
|
|
215
|
+
value;
|
|
216
|
+
constructor(key, value) {
|
|
217
|
+
this.key = key;
|
|
218
|
+
this.value = value;
|
|
219
|
+
}
|
|
220
|
+
};
|
|
221
|
+
|
|
222
|
+
//#endregion
|
|
223
|
+
//#region src/engine.ts
|
|
224
|
+
var EmbeddingEngine = class {
|
|
225
|
+
fileReader;
|
|
226
|
+
storePath;
|
|
227
|
+
extractor;
|
|
228
|
+
constructor(options) {
|
|
229
|
+
this.storePath = options.storePath;
|
|
230
|
+
this.fileReader = new BinaryFileReader(options.storePath);
|
|
231
|
+
env.cacheDir = "./.cache";
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Gets or initializes the embedding model
|
|
235
|
+
* Caches the model instance to avoid repeated initialization overhead
|
|
236
|
+
* @returns Initialized feature extraction pipeline
|
|
237
|
+
*/
|
|
238
|
+
async getOrInitModel() {
|
|
239
|
+
this.extractor ??= await pipeline("feature-extraction", "Xenova/bge-small-en-v1.5");
|
|
240
|
+
return this.extractor;
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* Generates embedding from text using Transformers.js bge-small-en-v1.5 model
|
|
244
|
+
* @param text - Text to embed
|
|
245
|
+
* @returns 384-dimensional embedding vector (normalized)
|
|
246
|
+
*/
|
|
247
|
+
async generateEmbedding(text) {
|
|
248
|
+
const output = await (await this.getOrInitModel())(text, {
|
|
249
|
+
pooling: "mean",
|
|
250
|
+
normalize: true
|
|
251
|
+
});
|
|
252
|
+
return Array.from(output.data);
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Retrieves an embedding entry by key
|
|
256
|
+
* Reads the file in reverse order for efficiency (most recent first)
|
|
257
|
+
* @param key - Unique identifier for the entry
|
|
258
|
+
* @returns The embedding entry, or null if not found
|
|
259
|
+
*/
|
|
260
|
+
async get(key) {
|
|
261
|
+
invariant(key, "Key must be provided.");
|
|
262
|
+
if (!existsSync(this.storePath)) return null;
|
|
263
|
+
for await (const entry of this.fileReader.entries()) if (entry.key === key) return entry;
|
|
264
|
+
return null;
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Searches for similar embeddings using cosine similarity
|
|
268
|
+
* @param query - Text query to search for
|
|
269
|
+
* @param limit - Maximum number of results to return (default: 10)
|
|
270
|
+
* @param minSimilarity - Minimum similarity threshold (default: 0, range: -1 to 1)
|
|
271
|
+
* @returns Array of search results sorted by similarity (highest first)
|
|
272
|
+
*/
|
|
273
|
+
async search(query, limit = 10, minSimilarity = .5) {
|
|
274
|
+
invariant(query, "Query text must be provided.");
|
|
275
|
+
invariant(limit > 0, "Limit must be a positive integer.");
|
|
276
|
+
invariant(minSimilarity >= 0 && minSimilarity <= 1, "minSimilarity must be between 0 and 1.");
|
|
277
|
+
if (!existsSync(this.storePath)) return [];
|
|
278
|
+
const queryEmbedding = await this.generateEmbedding(query);
|
|
279
|
+
const candidateSet = new CandidateSet(limit);
|
|
280
|
+
for await (const entry of this.fileReader.entries()) {
|
|
281
|
+
const similarity = this.cosineSimilarity(queryEmbedding, entry.embedding);
|
|
282
|
+
if (similarity < minSimilarity) continue;
|
|
283
|
+
candidateSet.add(entry.key, similarity);
|
|
284
|
+
}
|
|
285
|
+
return candidateSet.getEntries().map((entry) => ({
|
|
286
|
+
key: entry.key,
|
|
287
|
+
similarity: entry.value
|
|
288
|
+
}));
|
|
289
|
+
}
|
|
290
|
+
/**
|
|
291
|
+
* Stores a text embedding in the binary append-only file
|
|
292
|
+
* Creates header on first write
|
|
293
|
+
* @param key - Unique identifier for this entry
|
|
294
|
+
* @param text - Text to embed and store
|
|
295
|
+
*/
|
|
296
|
+
async store(key, text) {
|
|
297
|
+
const embedding = await this.generateEmbedding(text);
|
|
298
|
+
const embeddingFloat32 = new Float32Array(embedding);
|
|
299
|
+
await mkdir(dirname(this.storePath), { recursive: true });
|
|
300
|
+
if (!existsSync(this.storePath)) await writeHeader(this.storePath, embedding.length);
|
|
301
|
+
await writeRecord(this.storePath, key, embeddingFloat32);
|
|
302
|
+
}
|
|
303
|
+
/**
|
|
304
|
+
* Stores multiple text embeddings in batch
|
|
305
|
+
* More efficient than calling store() multiple times
|
|
306
|
+
* Generates embeddings in a single batch and writes all records at once
|
|
307
|
+
* @param items - Array of {key, text} objects to store
|
|
308
|
+
*/
|
|
309
|
+
async storeMany(items) {
|
|
310
|
+
invariant(items.length > 0, "Items array must not be empty.");
|
|
311
|
+
const texts = items.map((item) => item.text);
|
|
312
|
+
const output = await (await this.getOrInitModel())(texts, {
|
|
313
|
+
pooling: "mean",
|
|
314
|
+
normalize: true
|
|
315
|
+
});
|
|
316
|
+
const batchSize = output.dims[0];
|
|
317
|
+
const embeddingDim = output.dims[1];
|
|
318
|
+
const embeddingsList = [];
|
|
319
|
+
for (let i = 0; i < batchSize; i++) {
|
|
320
|
+
const start = i * embeddingDim;
|
|
321
|
+
const end = start + embeddingDim;
|
|
322
|
+
const data = Array.from(output.data);
|
|
323
|
+
embeddingsList.push(data.slice(start, end));
|
|
324
|
+
}
|
|
325
|
+
invariant(embeddingsList.length === items.length, "Number of embeddings must match number of items.");
|
|
326
|
+
await mkdir(dirname(this.storePath), { recursive: true });
|
|
327
|
+
if (!existsSync(this.storePath)) await writeHeader(this.storePath, embeddingsList[0].length);
|
|
328
|
+
const records = items.map((item, index) => ({
|
|
329
|
+
key: item.key,
|
|
330
|
+
embedding: new Float32Array(embeddingsList[index])
|
|
331
|
+
}));
|
|
332
|
+
await writeRecords(this.storePath, records);
|
|
333
|
+
}
|
|
334
|
+
/**
|
|
335
|
+
* Calculates cosine similarity between two embeddings
|
|
336
|
+
* @param a - First embedding vector
|
|
337
|
+
* @param b - Second embedding vector
|
|
338
|
+
* @returns Cosine similarity score between -1 and 1 (1 = identical, -1 = opposite)
|
|
339
|
+
*/
|
|
340
|
+
cosineSimilarity(a, b) {
|
|
341
|
+
if (a.length !== b.length) throw new Error("Embeddings must have the same dimensions");
|
|
342
|
+
let dotProduct = 0;
|
|
343
|
+
let magnitudeA = 0;
|
|
344
|
+
let magnitudeB = 0;
|
|
345
|
+
for (let i = 0; i < a.length; i++) {
|
|
346
|
+
dotProduct += a[i] * b[i];
|
|
347
|
+
magnitudeA += a[i] * a[i];
|
|
348
|
+
magnitudeB += b[i] * b[i];
|
|
349
|
+
}
|
|
350
|
+
magnitudeA = Math.sqrt(magnitudeA);
|
|
351
|
+
magnitudeB = Math.sqrt(magnitudeB);
|
|
352
|
+
if (magnitudeA === 0 || magnitudeB === 0) return 0;
|
|
353
|
+
return dotProduct / (magnitudeA * magnitudeB);
|
|
354
|
+
}
|
|
355
|
+
/**
|
|
356
|
+
* Disposes of the cached embedding model and releases resources
|
|
357
|
+
* Call this when you're done using the engine to free up memory
|
|
358
|
+
*/
|
|
359
|
+
dispose() {
|
|
360
|
+
this.extractor = void 0;
|
|
361
|
+
}
|
|
362
|
+
};
|
|
363
|
+
|
|
364
|
+
//#endregion
|
|
365
|
+
export { EmbeddingEngine as t };
|
package/dist/index.cjs
CHANGED
package/dist/index.mjs
CHANGED
package/package.json
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "embedded-raptor",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"description": "Lightweight semantic search database with text embeddings for Node.js and Bun",
|
|
5
5
|
"author": "Christoffer Artmann <artgaard@gmail.com>",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
|
9
|
-
"url": "https://github.com/artmann/raptor.git"
|
|
9
|
+
"url": "https://github.com/artmann/embedded-raptor.git"
|
|
10
10
|
},
|
|
11
|
-
"homepage": "https://github.com/artmann/raptor#readme",
|
|
11
|
+
"homepage": "https://github.com/artmann/embedded-raptor#readme",
|
|
12
12
|
"bugs": {
|
|
13
|
-
"url": "https://github.com/artmann/raptor/issues"
|
|
13
|
+
"url": "https://github.com/artmann/embedded-raptor/issues"
|
|
14
14
|
},
|
|
15
15
|
"keywords": [
|
|
16
16
|
"embeddings",
|