expo-vector-search 0.4.1 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -11
- package/android/build.gradle +3 -3
- package/build/src/ExpoVectorSearchModule.d.ts +37 -7
- package/build/src/ExpoVectorSearchModule.d.ts.map +1 -1
- package/build/src/ExpoVectorSearchModule.js +28 -6
- package/build/src/ExpoVectorSearchModule.js.map +1 -1
- package/cpp/ExpoVectorSearch.h +220 -86
- package/package.json +1 -1
- package/src/ExpoVectorSearchModule.ts +60 -9
package/README.md
CHANGED
|
@@ -110,10 +110,11 @@ Inserts a vector into the index.
|
|
|
110
110
|
- `key`: A unique numeric identifier.
|
|
111
111
|
- `vector`: A `Float32Array` containing the embeddings.
|
|
112
112
|
|
|
113
|
-
#### `addBatch(keys: Int32Array, vectors: Float32Array):
|
|
114
|
-
High-performance batch insertion.
|
|
113
|
+
#### `async addBatch(keys: Int32Array, vectors: Float32Array): Promise<VectorAddBatchResult>`
|
|
114
|
+
High-performance **asynchronous** batch insertion. Runs in a background thread to prevent UI freezing.
|
|
115
115
|
- `keys`: An `Int32Array` of unique identifiers.
|
|
116
116
|
- `vectors`: A single `Float32Array` containing all vectors concatenated (must match `keys.length * dimensions`).
|
|
117
|
+
- **Returns**: A promise resolving to `{ duration: number, count: number }`.
|
|
117
118
|
|
|
118
119
|
#### `search(vector: Float32Array, count: number, options?: SearchOptions): SearchResult[]`
|
|
119
120
|
Performs an ANN search.
|
|
@@ -140,10 +141,10 @@ Deserializes an index from a file path.
|
|
|
140
141
|
#### `delete(): void`
|
|
141
142
|
Manually releases native memory resources. The index instance becomes unusable after this call.
|
|
142
143
|
|
|
143
|
-
#### `loadVectorsFromFile(path: string):
|
|
144
|
-
|
|
144
|
+
#### `async loadVectorsFromFile(path: string): Promise<VectorLoadResult>`
|
|
145
|
+
**Asynchronously** loads raw vectors directly from a binary file into the index.
|
|
145
146
|
- `path`: Absolute path to the binary file containing packed floats.
|
|
146
|
-
- **Returns**:
|
|
147
|
+
- **Returns**: A promise resolving to `{ duration: number, count: number }`.
|
|
147
148
|
- **Note**: This is significantly faster than parsing JSON/Base64 in JavaScript and adding vectors loop by loop.
|
|
148
149
|
|
|
149
150
|
#### `getItemVector(key: number): Float32Array | undefined`
|
|
@@ -164,6 +165,12 @@ Returns the estimated memory usage of the native index in bytes.
|
|
|
164
165
|
#### `isa: string` (readonly)
|
|
165
166
|
Returns the active SIMD instruction set name (e.g., `'NEON'`, `'AVX2'`, `'SVE'`, or `'Serial'`). Useful for verifying hardware acceleration at runtime.
|
|
166
167
|
|
|
168
|
+
#### `isIndexing: boolean` (readonly)
|
|
169
|
+
Returns `true` if a background indexing operation (`addBatch` or `loadVectorsFromFile`) is currently in progress.
|
|
170
|
+
|
|
171
|
+
#### `indexingProgress: { current: number, total: number, percentage: number }` (readonly)
|
|
172
|
+
Returns real-time progress of the current background indexing operation.
|
|
173
|
+
|
|
167
174
|
## Example Usage
|
|
168
175
|
|
|
169
176
|
```typescript
|
|
@@ -180,10 +187,10 @@ index.add(1, myVector);
|
|
|
180
187
|
const query = new Float32Array(384).fill(0.48);
|
|
181
188
|
const results = index.search(query, 5);
|
|
182
189
|
|
|
183
|
-
// High-Performance Batch Insertion
|
|
190
|
+
// High-Performance Async Batch Insertion
|
|
184
191
|
const manyKeys = new Int32Array([10, 11, 12]);
|
|
185
192
|
const manyVectors = new Float32Array(384 * 3).fill(0.1);
|
|
186
|
-
index.addBatch(manyKeys, manyVectors);
|
|
193
|
+
await index.addBatch(manyKeys, manyVectors);
|
|
187
194
|
|
|
188
195
|
console.log(`Found ${results.length} neighbors`);
|
|
189
196
|
results.forEach(res => {
|
|
@@ -217,12 +224,14 @@ Recent benchmarks show that Int8 indexing is actually ~4x faster than F32 precis
|
|
|
217
224
|
### Future Roadmap
|
|
218
225
|
- [x] **Dynamic CRUD Support**: Implemented `remove(key)` and `update(key, vector)`.
|
|
219
226
|
- [x] **Metadata Filtering**: Support for `allowedKeys` filtering during search.
|
|
220
|
-
- [x] **
|
|
221
|
-
- [
|
|
222
|
-
- [
|
|
227
|
+
- [x] **Simplified React Hooks**: Abstractions like `useVectorSearch` for automatic resource management.
|
|
228
|
+
- [x] **Architecture-Specific SIMD**: Enabled NEON/AVX optimizations via SimSIMD for Android and iOS.
|
|
229
|
+
- [x] **Background Indexing**: True multithreaded ingestion to avoid JS thread locks.
|
|
223
230
|
- [x] **Extended Distance Metrics**: Support for L2, IP, Hamming, and Jaccard.
|
|
224
231
|
- [x] **USearch Upgrade**: Migration to `v2.23.0+` for enhanced performance.
|
|
225
|
-
- [ ] **
|
|
232
|
+
- [ ] **Hybrid Search**: Combine vector similarity with traditional keyword-based search.
|
|
233
|
+
- [ ] **SQLite Synchronization**: Built-in utilities to sync vector indices with `expo-sqlite`.
|
|
234
|
+
|
|
226
235
|
|
|
227
236
|
## License
|
|
228
237
|
MIT
|
package/android/build.gradle
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
apply plugin: 'com.android.library'
|
|
2
2
|
|
|
3
3
|
group = 'expo.modules.vectorsearch'
|
|
4
|
-
version = '0.
|
|
4
|
+
version = '0.5.1'
|
|
5
5
|
|
|
6
6
|
def expoModulesCorePlugin = new File(project(":expo-modules-core").projectDir.absolutePath, "ExpoModulesCorePlugin.gradle")
|
|
7
7
|
apply from: expoModulesCorePlugin
|
|
@@ -35,8 +35,8 @@ android {
|
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
defaultConfig {
|
|
38
|
-
versionCode
|
|
39
|
-
versionName "0.
|
|
38
|
+
versionCode 3
|
|
39
|
+
versionName "0.5.1"
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
externalNativeBuild {
|
|
@@ -7,12 +7,30 @@ export interface VectorIndexOptions {
|
|
|
7
7
|
export interface SearchOptions {
|
|
8
8
|
allowedKeys?: number[] | Int32Array | Uint32Array;
|
|
9
9
|
}
|
|
10
|
+
export type AddResult = {
|
|
11
|
+
duration: number;
|
|
12
|
+
};
|
|
13
|
+
export type VectorAddBatchResult = {
|
|
14
|
+
duration: number;
|
|
15
|
+
count: number;
|
|
16
|
+
};
|
|
17
|
+
export type VectorLoadResult = {
|
|
18
|
+
duration: number;
|
|
19
|
+
count: number;
|
|
20
|
+
};
|
|
21
|
+
export type IndexingProgress = {
|
|
22
|
+
current: number;
|
|
23
|
+
total: number;
|
|
24
|
+
percentage: number;
|
|
25
|
+
};
|
|
10
26
|
interface VectorIndexHostObject {
|
|
11
27
|
dimensions: number;
|
|
12
28
|
count: number;
|
|
13
29
|
memoryUsage: number;
|
|
14
30
|
isa: string;
|
|
15
|
-
|
|
31
|
+
isIndexing: boolean;
|
|
32
|
+
indexingProgress: IndexingProgress;
|
|
33
|
+
add(key: number, vector: Vector): AddResult;
|
|
16
34
|
remove(key: number): void;
|
|
17
35
|
update(key: number, vector: Vector): void;
|
|
18
36
|
search(vector: Vector, count: number, options?: SearchOptions): SearchResult[];
|
|
@@ -20,8 +38,9 @@ interface VectorIndexHostObject {
|
|
|
20
38
|
load(path: string): void;
|
|
21
39
|
delete(): void;
|
|
22
40
|
addBatch(keys: Int32Array, vectors: Float32Array): void;
|
|
23
|
-
loadVectorsFromFile(path: string):
|
|
41
|
+
loadVectorsFromFile(path: string): void;
|
|
24
42
|
getItemVector(key: number): Float32Array | undefined;
|
|
43
|
+
getLastResult(): VectorLoadResult;
|
|
25
44
|
}
|
|
26
45
|
interface ExpoVectorSearchFactory {
|
|
27
46
|
createIndex(dimensions: number, options?: VectorIndexOptions): VectorIndexHostObject;
|
|
@@ -59,21 +78,32 @@ export declare class VectorIndex {
|
|
|
59
78
|
* The SIMD Instruction Set Architecture being used (e.g. 'neon', 'avx2', 'serial').
|
|
60
79
|
*/
|
|
61
80
|
get isa(): string;
|
|
81
|
+
/**
|
|
82
|
+
* Whether the index is currently processing an asynchronous operation (like addBatch).
|
|
83
|
+
*/
|
|
84
|
+
get isIndexing(): boolean;
|
|
85
|
+
/**
|
|
86
|
+
* The real-time progress of an ongoing indexing operation.
|
|
87
|
+
*/
|
|
88
|
+
get indexingProgress(): IndexingProgress;
|
|
62
89
|
/**
|
|
63
90
|
* Adds a vector to the index.
|
|
64
91
|
* @param key A unique numeric identifier for the vector.
|
|
65
92
|
* @param vector A Float32Array containing the vector data.
|
|
66
93
|
* @throws Error if the vector dimension doesn't match or memory allocation fails.
|
|
67
94
|
*/
|
|
68
|
-
add(key: number, vector: Vector):
|
|
95
|
+
add(key: number, vector: Vector): AddResult;
|
|
69
96
|
/**
|
|
70
97
|
* Adds multiple vectors in a single high-performance batch operation.
|
|
71
98
|
* This is significantly faster than calling `.add()` in a loop.
|
|
72
99
|
* @param keys An Int32Array of unique numeric identifiers.
|
|
73
|
-
* @param vectors A single Float32Array containing all vectors concatenated.
|
|
74
100
|
* @throws Error if buffer sizes or alignment do not match.
|
|
75
101
|
*/
|
|
76
|
-
addBatch(keys: Int32Array, vectors: Float32Array):
|
|
102
|
+
addBatch(keys: Int32Array, vectors: Float32Array): Promise<VectorAddBatchResult>;
|
|
103
|
+
/**
|
|
104
|
+
* Internal helper to poll for operation completion.
|
|
105
|
+
*/
|
|
106
|
+
private _waitForOperation;
|
|
77
107
|
/**
|
|
78
108
|
* Removes a vector from the index.
|
|
79
109
|
* @param key The unique numeric identifier of the vector to remove.
|
|
@@ -111,9 +141,9 @@ export declare class VectorIndex {
|
|
|
111
141
|
* Loads raw vectors directly from a binary file.
|
|
112
142
|
* This avoids JS parsing overhead and is much faster for initialization.
|
|
113
143
|
* @param path The absolute path to the binary file containing packed floats.
|
|
114
|
-
* @returns
|
|
144
|
+
* @returns An object containing the number of vectors loaded and the duration.
|
|
115
145
|
*/
|
|
116
|
-
loadVectorsFromFile(path: string):
|
|
146
|
+
loadVectorsFromFile(path: string): Promise<VectorLoadResult>;
|
|
117
147
|
/**
|
|
118
148
|
* Retrieves the vector associated with a specific key from the index.
|
|
119
149
|
* Useful when vectors are stored only in native memory (e.g., after loadVectorsFromFile).
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ExpoVectorSearchModule.d.ts","sourceRoot":"","sources":["../../src/ExpoVectorSearchModule.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM,0BAA0B,CAAC;AAMhF,MAAM,MAAM,gBAAgB,GAAG,KAAK,GAAG,KAAK,GAAG,IAAI,CAAC;AAEpD,MAAM,WAAW,kBAAkB;IACjC,YAAY,CAAC,EAAE,gBAAgB,CAAC;IAChC,MAAM,CAAC,EAAE,cAAc,CAAC;CACzB;AAED,MAAM,WAAW,aAAa;IAC5B,WAAW,CAAC,EAAE,MAAM,EAAE,GAAG,UAAU,GAAG,WAAW,CAAC;CACnD;
|
|
1
|
+
{"version":3,"file":"ExpoVectorSearchModule.d.ts","sourceRoot":"","sources":["../../src/ExpoVectorSearchModule.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM,0BAA0B,CAAC;AAMhF,MAAM,MAAM,gBAAgB,GAAG,KAAK,GAAG,KAAK,GAAG,IAAI,CAAC;AAEpD,MAAM,WAAW,kBAAkB;IACjC,YAAY,CAAC,EAAE,gBAAgB,CAAC;IAChC,MAAM,CAAC,EAAE,cAAc,CAAC;CACzB;AAED,MAAM,WAAW,aAAa;IAC5B,WAAW,CAAC,EAAE,MAAM,EAAE,GAAG,UAAU,GAAG,WAAW,CAAC;CACnD;AAED,MAAM,MAAM,SAAS,GAAG;IACtB,QAAQ,EAAE,MAAM,CAAC;CAClB,CAAC;AAEF,MAAM,MAAM,oBAAoB,GAAG;IACjC,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;CACf,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG;IAC7B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;CACf,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC;AAGF,UAAU,qBAAqB;IAC7B,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,EAAE,OAAO,CAAC;IACpB,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5C,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1C,MAAM,CACJ,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE,aAAa,GACtB,YAAY,EAAE,CAAC;IAClB,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,MAAM,IAAI,IAAI,CAAC;IACf,QAAQ,CAAC,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,YAAY,GAAG,IAAI,CAAC;IACxD,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACxC,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,YAAY,GAAG,SAAS,CAAC;IACrD,aAAa,IAAI,gBAAgB,CAAC;CACnC;AAGD,UAAU,uBAAuB;IAC/B,WAAW,CAAC,UAAU,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,kBAAkB,GAAG,qBAAqB,CAAC;CACtF;AAED,OAAO,CAAC,MAAM,CAAC;IACb,IAAI,gBAAgB,EAAE,uBAAuB,CAAC;CAC/C;AAED;;;GAGG;AACH,qBAAa,WAAW;IACtB,OAAO,CAAC,MAAM,CAAwB;IAEtC;;;;;OAKG;gBACS,UAAU,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,kBAAkB;IAO5D;;OAEG;IACH,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED;;OAEG;IACH,IAAI,KAAK,IAAI,MAAM,CAElB;IAED;;;OAGG;IACH,IAAI,WAAW,IAAI,MAAM,CAExB;IAED;;OAEG;IACH,IAAI,GAAG,IAAI,MAAM,CAEhB;IAED;;OAEG;IACH,IAAI,UAAU,IAAI,OAAO,CAExB;IAED;;OAEG;IACH,IAAI,gBAAgB,IAAI,gBAAgB,CAEvC;IAED;;;;;OAKG;IACH,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,SAAS;IAI3C;;;;;OAKG;IACG,QAAQ,CACZ,IAAI,EAAE,UAAU,EAChB,OAAO,EAAE,YAAY,GACpB,OAAO,CAAC,oBAAoB,CAAC;IAKhC;;OAEG;YACW,iBAAiB;IAO/B;;;;OAIG;IACH,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;IAIzB;;;;;;OAMG;IACH,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI;IAIzC;;;;;;;OAOG;IACH,MAAM,CACJ,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE,aAAa,GACtB,YAAY,EAAE;IAIjB;;;OAGG;IACH,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAIxB;;;OAGG;IACH,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAIxB;;;;;OAKG;IACG,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAKlE;;;;;OAKG;IACH,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,YAAY,GAAG,SAAS;IAIpD;;;OAGG;IACH,MAAM,IAAI,IAAI;CAGf;AAED,eAAe,WAAW,CAAC"}
|
|
@@ -44,6 +44,18 @@ export class VectorIndex {
|
|
|
44
44
|
get isa() {
|
|
45
45
|
return this._index.isa;
|
|
46
46
|
}
|
|
47
|
+
/**
|
|
48
|
+
* Whether the index is currently processing an asynchronous operation (like addBatch).
|
|
49
|
+
*/
|
|
50
|
+
get isIndexing() {
|
|
51
|
+
return this._index.isIndexing;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* The real-time progress of an ongoing indexing operation.
|
|
55
|
+
*/
|
|
56
|
+
get indexingProgress() {
|
|
57
|
+
return this._index.indexingProgress;
|
|
58
|
+
}
|
|
47
59
|
/**
|
|
48
60
|
* Adds a vector to the index.
|
|
49
61
|
* @param key A unique numeric identifier for the vector.
|
|
@@ -51,17 +63,26 @@ export class VectorIndex {
|
|
|
51
63
|
* @throws Error if the vector dimension doesn't match or memory allocation fails.
|
|
52
64
|
*/
|
|
53
65
|
add(key, vector) {
|
|
54
|
-
this._index.add(key, vector);
|
|
66
|
+
return this._index.add(key, vector);
|
|
55
67
|
}
|
|
56
68
|
/**
|
|
57
69
|
* Adds multiple vectors in a single high-performance batch operation.
|
|
58
70
|
* This is significantly faster than calling `.add()` in a loop.
|
|
59
71
|
* @param keys An Int32Array of unique numeric identifiers.
|
|
60
|
-
* @param vectors A single Float32Array containing all vectors concatenated.
|
|
61
72
|
* @throws Error if buffer sizes or alignment do not match.
|
|
62
73
|
*/
|
|
63
|
-
addBatch(keys, vectors) {
|
|
74
|
+
async addBatch(keys, vectors) {
|
|
64
75
|
this._index.addBatch(keys, vectors);
|
|
76
|
+
return this._waitForOperation();
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Internal helper to poll for operation completion.
|
|
80
|
+
*/
|
|
81
|
+
async _waitForOperation() {
|
|
82
|
+
while (this._index.isIndexing) {
|
|
83
|
+
await new Promise((resolve) => setTimeout(resolve, 50));
|
|
84
|
+
}
|
|
85
|
+
return this._index.getLastResult();
|
|
65
86
|
}
|
|
66
87
|
/**
|
|
67
88
|
* Removes a vector from the index.
|
|
@@ -110,10 +131,11 @@ export class VectorIndex {
|
|
|
110
131
|
* Loads raw vectors directly from a binary file.
|
|
111
132
|
* This avoids JS parsing overhead and is much faster for initialization.
|
|
112
133
|
* @param path The absolute path to the binary file containing packed floats.
|
|
113
|
-
* @returns
|
|
134
|
+
* @returns An object containing the number of vectors loaded and the duration.
|
|
114
135
|
*/
|
|
115
|
-
loadVectorsFromFile(path) {
|
|
116
|
-
|
|
136
|
+
async loadVectorsFromFile(path) {
|
|
137
|
+
this._index.loadVectorsFromFile(path);
|
|
138
|
+
return this._waitForOperation();
|
|
117
139
|
}
|
|
118
140
|
/**
|
|
119
141
|
* Retrieves the vector associated with a specific key from the index.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ExpoVectorSearchModule.js","sourceRoot":"","sources":["../../src/ExpoVectorSearchModule.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,MAAM,CAAC;AAG3C,2EAA2E;AAC3E,mBAAmB,CAAC,kBAAkB,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"ExpoVectorSearchModule.js","sourceRoot":"","sources":["../../src/ExpoVectorSearchModule.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,MAAM,CAAC;AAG3C,2EAA2E;AAC3E,mBAAmB,CAAC,kBAAkB,CAAC,CAAC;AAoExC;;;GAGG;AACH,MAAM,OAAO,WAAW;IACd,MAAM,CAAwB;IAEtC;;;;;OAKG;IACH,YAAY,UAAkB,EAAE,OAA4B;QAC1D,IAAI,CAAC,UAAU,CAAC,gBAAgB,EAAE,CAAC;YACjC,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;QACnE,CAAC;QACD,IAAI,CAAC,MAAM,GAAG,UAAU,CAAC,gBAAgB,CAAC,WAAW,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;IAC7E,CAAC;IAED;;OAEG;IACH,IAAI,UAAU;QACZ,OAAO,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC;IAChC,CAAC;IAED;;OAEG;IACH,IAAI,KAAK;QACP,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC;IAC3B,CAAC;IAED;;;OAGG;IACH,IAAI,WAAW;QACb,OAAO,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC;IACjC,CAAC;IAED;;OAEG;IACH,IAAI,GAAG;QACL,OAAO,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,IAAI,UAAU;QACZ,OAAO,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC;IAChC,CAAC;IAED;;OAEG;IACH,IAAI,gBAAgB;QAClB,OAAO,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAAC;IACtC,CAAC;IAED;;;;;OAKG;IACH,GAAG,CAAC,GAAW,EAAE,MAAc;QAC7B,OAAO,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IACtC,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,QAAQ,CACZ,IAAgB,EAChB,OAAqB;QAErB,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACpC,OAAO,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAClC,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,iBAAiB;QAC7B,OAAO,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;YAC9B,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;QAC1D,CAAC;QACD,OAAO,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,CAAC;IACrC,CAAC;IAED;;;;OAIG;IACH,MAAM,CAAC,GAAW;QAChB,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IAC1B,CAAC;IAED;;;;;;OAMG;IACH,MAAM,CAAC,GAAW,EAAE,MAAc;QAChC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IAClC,CAAC;IAED;;;;;;;OAOG;IACH,MAAM,CACJ,MAAc,EACd,KAAa,EACb,OAAuB;QAEvB,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;IACpD,CAAC;IAED;;;OAGG;IACH,IAAI,CAAC,IAAY;QACf,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACzB,CAAC;IAED;;;OAGG;IACH,IAAI,CAAC,IAAY;QACf,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACzB,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,mBAAmB,CAAC,IAAY;QACpC,IAAI,CAAC,MAAM,CAAC,mBAAmB,CAAC,IAAI,CAAC,CAAC;QACtC,OAAO,IAAI,CAAC,iBAAiB,EAAE,CAAC;IAClC,CAAC;IAED;;;;;OAKG;IACH,aAAa,CAAC,GAAW;QACvB,OAAO,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;IACxC,CAAC;IAED;;;OAGG;IACH,MAAM;QACJ,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;IACvB,CAAC;CACF;AAED,eAAe,WAAW,CAAC","sourcesContent":["import { requireNativeModule } from 'expo';\nimport { DistanceMetric, SearchResult, Vector } from './ExpoVectorSearch.types';\n\n// The native module is loaded to ensure JSI installation occurs (OnCreate)\nrequireNativeModule('ExpoVectorSearch');\n\n// Index creation options\nexport type QuantizationMode = 'f32' | 'f16' | 'i8';\n\nexport interface VectorIndexOptions {\n quantization?: QuantizationMode;\n metric?: DistanceMetric;\n}\n\nexport interface SearchOptions {\n allowedKeys?: number[] | Int32Array | Uint32Array;\n}\n\nexport type AddResult = {\n duration: number; // in milliseconds\n};\n\nexport type VectorAddBatchResult = {\n duration: number; // in milliseconds\n count: number;\n};\n\nexport type VectorLoadResult = {\n duration: number; // in milliseconds\n count: number;\n};\n\nexport type IndexingProgress = {\n current: number;\n total: number;\n percentage: number;\n};\n\n// C++ HostObject Interface (Index Instance)\ninterface VectorIndexHostObject {\n dimensions: number;\n count: number;\n memoryUsage: number;\n isa: string;\n isIndexing: boolean;\n indexingProgress: IndexingProgress;\n add(key: number, vector: Vector): AddResult;\n remove(key: number): void;\n update(key: number, vector: Vector): void;\n search(\n vector: Vector,\n count: number,\n options?: SearchOptions\n ): SearchResult[];\n save(path: string): void;\n load(path: string): void;\n delete(): void;\n addBatch(keys: Int32Array, vectors: Float32Array): void;\n loadVectorsFromFile(path: string): void;\n getItemVector(key: number): Float32Array | undefined;\n getLastResult(): VectorLoadResult;\n}\n\n// Global Module Interface (Factory)\ninterface ExpoVectorSearchFactory {\n createIndex(dimensions: number, options?: VectorIndexOptions): VectorIndexHostObject;\n}\n\ndeclare global {\n var ExpoVectorSearch: ExpoVectorSearchFactory;\n}\n\n/**\n * High-performance Vector Index powered by USearch (C++ JSI).\n * Allows for ultra-fast, on-device semantic search and similarity matching.\n */\nexport class VectorIndex {\n private _index: VectorIndexHostObject;\n\n /**\n * Creates a new vector index.\n * @param dimensions The dimensionality of the vectors (e.g., 768 or 1536).\n * @param options Configuration options for the index.\n * @throws Error if the native JSI module is not available.\n */\n constructor(dimensions: number, options?: VectorIndexOptions) {\n if (!globalThis.ExpoVectorSearch) {\n throw new Error(\"ExpoVectorSearch JSI module is not available.\");\n }\n this._index = globalThis.ExpoVectorSearch.createIndex(dimensions, options);\n }\n\n /**\n * The dimensionality of the vectors in this index.\n */\n get dimensions(): number {\n return this._index.dimensions;\n }\n\n /**\n * The total number of vectors currently stored in the index.\n */\n get count(): number {\n return this._index.count;\n }\n\n /**\n * The estimated memory usage of the native index in bytes.\n * Does not include JavaScript object overhead.\n */\n get memoryUsage(): number {\n return this._index.memoryUsage;\n }\n\n /**\n * The SIMD Instruction Set Architecture being used (e.g. 'neon', 'avx2', 'serial').\n */\n get isa(): string {\n return this._index.isa;\n }\n\n /**\n * Whether the index is currently processing an asynchronous operation (like addBatch).\n */\n get isIndexing(): boolean {\n return this._index.isIndexing;\n }\n\n /**\n * The real-time progress of an ongoing indexing operation.\n */\n get indexingProgress(): IndexingProgress {\n return this._index.indexingProgress;\n }\n\n /**\n * Adds a vector to the index.\n * @param key A unique numeric identifier for the vector.\n * @param vector A Float32Array containing the vector data.\n * @throws Error if the vector dimension doesn't match or memory allocation fails.\n */\n add(key: number, vector: Vector): AddResult {\n return this._index.add(key, vector);\n }\n\n /**\n * Adds multiple vectors in a single high-performance batch operation.\n * This is significantly faster than calling `.add()` in a loop.\n * @param keys An Int32Array of unique numeric identifiers.\n * @throws Error if buffer sizes or alignment do not match.\n */\n async addBatch(\n keys: Int32Array,\n vectors: Float32Array\n ): Promise<VectorAddBatchResult> {\n this._index.addBatch(keys, vectors);\n return this._waitForOperation();\n }\n\n /**\n * Internal helper to poll for operation completion.\n */\n private async _waitForOperation(): Promise<VectorLoadResult> {\n while (this._index.isIndexing) {\n await new Promise((resolve) => setTimeout(resolve, 50));\n }\n return this._index.getLastResult();\n }\n\n /**\n * Removes a vector from the index.\n * @param key The unique numeric identifier of the vector to remove.\n * @throws Error if the key is not found or removal fails.\n */\n remove(key: number): void {\n this._index.remove(key);\n }\n\n /**\n * Updates an existing vector in the index.\n * This is equivalent to removing the old vector and adding a new one.\n * @param key The unique numeric identifier.\n * @param vector The new vector data.\n * @throws Error if dimensions mismatch or update fails.\n */\n update(key: number, vector: Vector): void {\n this._index.update(key, vector);\n }\n\n /**\n * Performs an Approximate Nearest Neighbor (ANN) search.\n * @param vector The query vector.\n * @param count The number of nearest neighbors to return.\n * @param options Optional SearchOptions (e.g., allowedKeys for filtering).\n * @returns An array of SearchResult objects (key and distance).\n * @throws Error if dimensions mismatch or search fails.\n */\n search(\n vector: Vector,\n count: number,\n options?: SearchOptions\n ): SearchResult[] {\n return this._index.search(vector, count, options);\n }\n\n /**\n * Saves the index to a file.\n * @param path The absolute path to the file (e.g., in Expo.FileSystem.documentDirectory).\n */\n save(path: string): void {\n this._index.save(path);\n }\n\n /**\n * Loads the index from a file.\n * @param path The absolute path to the file.\n */\n load(path: string): void {\n this._index.load(path);\n }\n\n /**\n * Loads raw vectors directly from a binary file.\n * This avoids JS parsing overhead and is much faster for initialization.\n * @param path The absolute path to the binary file containing packed floats.\n * @returns An object containing the number of vectors loaded and the duration.\n */\n async loadVectorsFromFile(path: string): Promise<VectorLoadResult> {\n this._index.loadVectorsFromFile(path);\n return this._waitForOperation();\n }\n\n /**\n * Retrieves the vector associated with a specific key from the index.\n * Useful when vectors are stored only in native memory (e.g., after loadVectorsFromFile).\n * @param key The unique key of the item.\n * @returns The vector as a Float32Array, or undefined if not found.\n */\n getItemVector(key: number): Float32Array | undefined {\n return this._index.getItemVector(key);\n }\n\n /**\n * Explicitly releases the native memory associated with this index.\n * Once called, the index can no longer be used.\n */\n delete(): void {\n this._index.delete();\n }\n}\n\nexport default VectorIndex;"]}
|
package/cpp/ExpoVectorSearch.h
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
3
|
#ifdef __cplusplus
|
|
4
|
+
#include <atomic>
|
|
5
|
+
#include <chrono>
|
|
4
6
|
#include <cstdio>
|
|
5
7
|
#include <cstdlib>
|
|
6
8
|
#include <fstream>
|
|
7
9
|
#include <jsi/jsi.h>
|
|
8
10
|
#include <memory>
|
|
11
|
+
#include <mutex>
|
|
9
12
|
#include <string>
|
|
10
13
|
#include <thread>
|
|
11
14
|
#include <unordered_set>
|
|
@@ -135,7 +138,15 @@ inline float jaccard_f32(const float *a, const float *b, std::size_t n,
|
|
|
135
138
|
return 1.0f - (intersection / union_count);
|
|
136
139
|
}
|
|
137
140
|
|
|
138
|
-
|
|
141
|
+
struct OperationResult {
|
|
142
|
+
double duration = 0;
|
|
143
|
+
size_t count = 0;
|
|
144
|
+
std::string error = "";
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
class VectorIndexHostObject
|
|
148
|
+
: public jsi::HostObject,
|
|
149
|
+
public std::enable_shared_from_this<VectorIndexHostObject> {
|
|
139
150
|
public:
|
|
140
151
|
using Index = index_dense_t;
|
|
141
152
|
|
|
@@ -146,20 +157,21 @@ public:
|
|
|
146
157
|
_threads = std::thread::hardware_concurrency();
|
|
147
158
|
if (_threads == 0)
|
|
148
159
|
_threads = 1;
|
|
160
|
+
_quantized = quantized;
|
|
149
161
|
|
|
150
162
|
scalar_kind_t scalar_kind =
|
|
151
163
|
quantized ? scalar_kind_t::i8_k : scalar_kind_t::f32_k;
|
|
152
164
|
|
|
153
165
|
// Special case: Jaccard with f32 (not bitsets)
|
|
154
166
|
if (metric_kind == metric_kind_t::jaccard_k && !quantized) {
|
|
155
|
-
metric_punned_t
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
_index = std::
|
|
167
|
+
metric_punned_t metric_j(dimensions,
|
|
168
|
+
reinterpret_cast<std::uintptr_t>(&jaccard_f32),
|
|
169
|
+
metric_punned_signature_t::array_array_size_k,
|
|
170
|
+
metric_kind_t::jaccard_k, scalar_kind_t::f32_k);
|
|
171
|
+
_index = std::make_shared<Index>(Index::make(metric_j));
|
|
160
172
|
} else {
|
|
161
173
|
metric_punned_t metric(dimensions, metric_kind, scalar_kind);
|
|
162
|
-
_index = std::
|
|
174
|
+
_index = std::make_shared<Index>(Index::make(metric));
|
|
163
175
|
}
|
|
164
176
|
|
|
165
177
|
LOGD("Initializing Index HostObject: dims=%d, quantized=%d, metric=%d",
|
|
@@ -181,23 +193,77 @@ public:
|
|
|
181
193
|
jsi::Value get(jsi::Runtime &runtime, const jsi::PropNameID &name) override {
|
|
182
194
|
std::string methodName = name.utf8(runtime);
|
|
183
195
|
|
|
184
|
-
if (methodName == "dimensions")
|
|
196
|
+
if (methodName == "dimensions") {
|
|
197
|
+
std::lock_guard<std::mutex> lock(_mutex);
|
|
185
198
|
return jsi::Value((double)_index->dimensions());
|
|
199
|
+
}
|
|
186
200
|
if (methodName == "count")
|
|
201
|
+
{
|
|
202
|
+
std::lock_guard<std::mutex> lock(_mutex);
|
|
187
203
|
return jsi::Value(_index ? (double)_index->size() : 0);
|
|
188
|
-
|
|
189
|
-
|
|
204
|
+
}
|
|
205
|
+
if (methodName == "memoryUsage") {
|
|
206
|
+
std::lock_guard<std::mutex> lock(_mutex);
|
|
207
|
+
if (!_index)
|
|
208
|
+
return jsi::Value(0);
|
|
209
|
+
// We calculate memory usage manually to avoid a race condition in
|
|
210
|
+
// USearch's stats() when called during background indexing. This is also
|
|
211
|
+
// much faster and safer.
|
|
212
|
+
size_t count = _index->size();
|
|
213
|
+
size_t dims = _index->dimensions();
|
|
214
|
+
// Estimation:
|
|
215
|
+
// 1. Vector data (the largest part)
|
|
216
|
+
size_t vectorBytes = count * dims * (_quantized ? 1 : 4);
|
|
217
|
+
// 2. Graph overhead (nodes + neighbors). USearch node is ~64 bytes +
|
|
218
|
+
// connectivity * 4. We assume an average connectivity of 32 for the
|
|
219
|
+
// estimation.
|
|
220
|
+
size_t graphOverhead = count * (64 + 32 * 4);
|
|
221
|
+
// 3. Buffer base (metadata, threads, etc)
|
|
222
|
+
size_t baseMemory = 1024 * 1024; // 1MB base
|
|
223
|
+
return jsi::Value((double)(vectorBytes + graphOverhead + baseMemory));
|
|
224
|
+
}
|
|
190
225
|
if (methodName == "isa") {
|
|
226
|
+
std::lock_guard<std::mutex> lock(_mutex);
|
|
191
227
|
const char *isa = _index ? _index->metric().isa_name() : "unknown";
|
|
192
|
-
LOGD("isa property accessed: %s", isa);
|
|
193
228
|
return jsi::String::createFromUtf8(runtime, isa);
|
|
194
229
|
}
|
|
230
|
+
if (methodName == "isIndexing") {
|
|
231
|
+
return jsi::Value(_isIndexing.load());
|
|
232
|
+
}
|
|
233
|
+
if (methodName == "indexingProgress") {
|
|
234
|
+
jsi::Object res(runtime);
|
|
235
|
+
size_t current = _currentIndexingCount.load();
|
|
236
|
+
size_t total = _totalIndexingCount.load();
|
|
237
|
+
double percentage = (total > 0) ? (double)current / total : 0;
|
|
238
|
+
res.setProperty(runtime, "current", (double)current);
|
|
239
|
+
res.setProperty(runtime, "total", (double)total);
|
|
240
|
+
res.setProperty(runtime, "percentage", percentage);
|
|
241
|
+
return res;
|
|
242
|
+
}
|
|
243
|
+
if (methodName == "getLastResult") {
|
|
244
|
+
return jsi::Function::createFromHostFunction(
|
|
245
|
+
runtime, name, 0,
|
|
246
|
+
[this](jsi::Runtime &runtime, const jsi::Value &thisValue,
|
|
247
|
+
const jsi::Value *arguments, size_t count) -> jsi::Value {
|
|
248
|
+
std::lock_guard<std::mutex> lock(_mutex);
|
|
249
|
+
if (!_lastResult.error.empty()) {
|
|
250
|
+
std::string err = _lastResult.error;
|
|
251
|
+
_lastResult.error = ""; // Clear after reporting
|
|
252
|
+
throw jsi::JSError(runtime, err);
|
|
253
|
+
}
|
|
254
|
+
jsi::Object res(runtime);
|
|
255
|
+
res.setProperty(runtime, "duration", _lastResult.duration);
|
|
256
|
+
res.setProperty(runtime, "count", (double)_lastResult.count);
|
|
257
|
+
return res;
|
|
258
|
+
});
|
|
259
|
+
}
|
|
195
260
|
|
|
196
261
|
if (methodName == "delete") {
|
|
197
262
|
return jsi::Function::createFromHostFunction(
|
|
198
263
|
runtime, name, 0,
|
|
199
264
|
[this](jsi::Runtime &runtime, const jsi::Value &thisValue,
|
|
200
265
|
const jsi::Value *arguments, size_t count) -> jsi::Value {
|
|
266
|
+
std::lock_guard<std::mutex> lock(_mutex);
|
|
201
267
|
_index.reset();
|
|
202
268
|
return jsi::Value::undefined();
|
|
203
269
|
});
|
|
@@ -211,15 +277,14 @@ public:
|
|
|
211
277
|
if (count < 2)
|
|
212
278
|
throw jsi::JSError(runtime,
|
|
213
279
|
"add expects 2 arguments: key, vector");
|
|
214
|
-
if (!_index)
|
|
215
|
-
throw jsi::JSError(runtime, "VectorIndex has been deleted.");
|
|
216
280
|
|
|
217
281
|
default_key_t key =
|
|
218
282
|
static_cast<default_key_t>(arguments[0].asNumber());
|
|
219
283
|
auto [vecData, vecSize] = getRawVector(runtime, arguments[1]);
|
|
220
284
|
|
|
221
|
-
|
|
222
|
-
|
|
285
|
+
std::lock_guard<std::mutex> lock(_mutex);
|
|
286
|
+
if (!_index)
|
|
287
|
+
throw jsi::JSError(runtime, "VectorIndex has been deleted.");
|
|
223
288
|
|
|
224
289
|
if (vecSize != _index->dimensions()) {
|
|
225
290
|
LOGE("Dimension mismatch: expected %zu, got %zu",
|
|
@@ -234,17 +299,22 @@ public:
|
|
|
234
299
|
LOGD("Resizing index to: %zu", newCapacity);
|
|
235
300
|
_index->reserve(index_limits_t(newCapacity, _threads));
|
|
236
301
|
}
|
|
237
|
-
|
|
238
|
-
(unsigned long long)key, vecData, _index->dimensions());
|
|
302
|
+
auto start = std::chrono::high_resolution_clock::now();
|
|
239
303
|
auto result = _index->add(key, vecData);
|
|
240
|
-
|
|
304
|
+
auto end = std::chrono::high_resolution_clock::now();
|
|
305
|
+
|
|
241
306
|
if (!result) {
|
|
242
307
|
LOGE("Failed to add vector: %s", result.error.what());
|
|
243
308
|
throw jsi::JSError(runtime, "Error adding: " +
|
|
244
309
|
std::string(result.error.what()));
|
|
245
310
|
}
|
|
246
311
|
|
|
247
|
-
|
|
312
|
+
double durationMs =
|
|
313
|
+
std::chrono::duration<double, std::milli>(end - start).count();
|
|
314
|
+
|
|
315
|
+
jsi::Object res(runtime);
|
|
316
|
+
res.setProperty(runtime, "duration", durationMs);
|
|
317
|
+
return res;
|
|
248
318
|
});
|
|
249
319
|
}
|
|
250
320
|
|
|
@@ -253,12 +323,13 @@ public:
|
|
|
253
323
|
runtime, name, 2,
|
|
254
324
|
[this](jsi::Runtime &runtime, const jsi::Value &thisValue,
|
|
255
325
|
const jsi::Value *arguments, size_t count) -> jsi::Value {
|
|
256
|
-
LOGD("addBatch called");
|
|
257
326
|
if (count < 2)
|
|
258
327
|
throw jsi::JSError(runtime,
|
|
259
328
|
"addBatch expects 2 arguments: keys, vectors");
|
|
260
329
|
if (!_index)
|
|
261
330
|
throw jsi::JSError(runtime, "VectorIndex has been deleted.");
|
|
331
|
+
if (_isIndexing)
|
|
332
|
+
throw jsi::JSError(runtime, "Index is already busy.");
|
|
262
333
|
|
|
263
334
|
jsi::Object keysArray = arguments[0].asObject(runtime);
|
|
264
335
|
auto keysBuffer = keysArray.getProperty(runtime, "buffer")
|
|
@@ -283,37 +354,62 @@ public:
|
|
|
283
354
|
size_t dims = _index->dimensions();
|
|
284
355
|
size_t batchCount = vecTotalElements / dims;
|
|
285
356
|
|
|
286
|
-
LOGD("addBatch processing: keysCount=%zu, batchCount=%zu, "
|
|
287
|
-
"dims=%zu",
|
|
288
|
-
keysCount, batchCount, dims);
|
|
289
|
-
|
|
290
357
|
if (batchCount != keysCount)
|
|
291
358
|
throw jsi::JSError(runtime, "Batch mismatch: keys and vectors "
|
|
292
359
|
"must have compatible sizes.");
|
|
293
360
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
newCapacity = _index->size() + batchCount + 100;
|
|
301
|
-
|
|
302
|
-
LOGD("Resizing index for batch to: %zu", newCapacity);
|
|
303
|
-
_index->reserve(index_limits_t(newCapacity, _threads));
|
|
361
|
+
{
|
|
362
|
+
std::lock_guard<std::mutex> lock(_mutex);
|
|
363
|
+
if (_index->size() + batchCount > _index->capacity()) {
|
|
364
|
+
size_t newCapacity = _index->size() + batchCount + 100;
|
|
365
|
+
_index->reserve(index_limits_t(newCapacity, _threads));
|
|
366
|
+
}
|
|
304
367
|
}
|
|
305
368
|
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
369
|
+
// Copy data safely for background thread
|
|
370
|
+
std::vector<int32_t> keys(keysData, keysData + batchCount);
|
|
371
|
+
std::vector<float> vectors(vecData, vecData + (batchCount * dims));
|
|
372
|
+
|
|
373
|
+
_isIndexing = true;
|
|
374
|
+
_currentIndexingCount = 0;
|
|
375
|
+
_totalIndexingCount = batchCount;
|
|
376
|
+
|
|
377
|
+
// Capture self to keep HostObject alive during background thread
|
|
378
|
+
std::thread([self = shared_from_this(), keys = std::move(keys),
|
|
379
|
+
vectors = std::move(vectors), batchCount,
|
|
380
|
+
dims]() mutable {
|
|
381
|
+
auto start = std::chrono::high_resolution_clock::now();
|
|
382
|
+
try {
|
|
383
|
+
for (size_t i = 0; i < batchCount; ++i) {
|
|
384
|
+
std::lock_guard<std::mutex> lock(self->_mutex);
|
|
385
|
+
if (!self->_index)
|
|
386
|
+
break; // Safety check
|
|
387
|
+
auto result = self->_index->add((default_key_t)keys[i],
|
|
388
|
+
vectors.data() + (i * dims));
|
|
389
|
+
if (!result) {
|
|
390
|
+
self->_lastResult.error =
|
|
391
|
+
"Error adding at index " + std::to_string(i);
|
|
392
|
+
self->_isIndexing = false;
|
|
393
|
+
return;
|
|
394
|
+
}
|
|
395
|
+
self->_currentIndexingCount++;
|
|
396
|
+
}
|
|
397
|
+
auto end = std::chrono::high_resolution_clock::now();
|
|
398
|
+
{
|
|
399
|
+
std::lock_guard<std::mutex> lock(self->_mutex);
|
|
400
|
+
self->_lastResult.duration =
|
|
401
|
+
std::chrono::duration<double, std::milli>(end - start)
|
|
402
|
+
.count();
|
|
403
|
+
self->_lastResult.count = batchCount;
|
|
404
|
+
self->_lastResult.error = "";
|
|
405
|
+
}
|
|
406
|
+
} catch (const std::exception &e) {
|
|
407
|
+
std::lock_guard<std::mutex> lock(self->_mutex);
|
|
408
|
+
self->_lastResult.error = e.what();
|
|
314
409
|
}
|
|
315
|
-
|
|
316
|
-
|
|
410
|
+
self->_isIndexing = false;
|
|
411
|
+
}).detach();
|
|
412
|
+
|
|
317
413
|
return jsi::Value::undefined();
|
|
318
414
|
});
|
|
319
415
|
}
|
|
@@ -325,12 +421,14 @@ public:
|
|
|
325
421
|
const jsi::Value *arguments, size_t count) -> jsi::Value {
|
|
326
422
|
if (count < 1)
|
|
327
423
|
throw jsi::JSError(runtime, "remove expects 1 argument: key");
|
|
328
|
-
if (!_index)
|
|
329
|
-
throw jsi::JSError(runtime, "VectorIndex has been deleted.");
|
|
330
424
|
|
|
331
425
|
default_key_t key =
|
|
332
426
|
static_cast<default_key_t>(arguments[0].asNumber());
|
|
333
427
|
|
|
428
|
+
std::lock_guard<std::mutex> lock(_mutex);
|
|
429
|
+
if (!_index)
|
|
430
|
+
throw jsi::JSError(runtime, "VectorIndex has been deleted.");
|
|
431
|
+
|
|
334
432
|
auto result = _index->remove(key);
|
|
335
433
|
if (!result) {
|
|
336
434
|
LOGE("Failed to remove vector: %s", result.error.what());
|
|
@@ -350,13 +448,15 @@ public:
|
|
|
350
448
|
if (count < 2)
|
|
351
449
|
throw jsi::JSError(runtime,
|
|
352
450
|
"update expects 2 arguments: key, vector");
|
|
353
|
-
if (!_index)
|
|
354
|
-
throw jsi::JSError(runtime, "VectorIndex has been deleted.");
|
|
355
451
|
|
|
356
452
|
default_key_t key =
|
|
357
453
|
static_cast<default_key_t>(arguments[0].asNumber());
|
|
358
454
|
auto [vecData, vecSize] = getRawVector(runtime, arguments[1]);
|
|
359
455
|
|
|
456
|
+
std::lock_guard<std::mutex> lock(_mutex);
|
|
457
|
+
if (!_index)
|
|
458
|
+
throw jsi::JSError(runtime, "VectorIndex has been deleted.");
|
|
459
|
+
|
|
360
460
|
if (vecSize != _index->dimensions()) {
|
|
361
461
|
throw jsi::JSError(runtime, "Incorrect dimension for update.");
|
|
362
462
|
}
|
|
@@ -385,8 +485,6 @@ public:
|
|
|
385
485
|
if (count < 2)
|
|
386
486
|
throw jsi::JSError(runtime,
|
|
387
487
|
"search expects 2 arguments: vector, count");
|
|
388
|
-
if (!_index)
|
|
389
|
-
throw jsi::JSError(runtime, "VectorIndex has been deleted.");
|
|
390
488
|
|
|
391
489
|
LOGD("search: starting...");
|
|
392
490
|
auto [queryData, querySize] = getRawVector(runtime, arguments[0]);
|
|
@@ -416,6 +514,10 @@ public:
|
|
|
416
514
|
}
|
|
417
515
|
}
|
|
418
516
|
|
|
517
|
+
std::lock_guard<std::mutex> lock(_mutex);
|
|
518
|
+
if (!_index)
|
|
519
|
+
throw jsi::JSError(runtime, "VectorIndex has been deleted.");
|
|
520
|
+
|
|
419
521
|
if (querySize != _index->dimensions()) {
|
|
420
522
|
LOGE("Search dimension mismatch: expected %zu, got %zu",
|
|
421
523
|
_index->dimensions(), querySize);
|
|
@@ -457,6 +559,11 @@ public:
|
|
|
457
559
|
|
|
458
560
|
default_key_t key =
|
|
459
561
|
static_cast<default_key_t>(arguments[0].asNumber());
|
|
562
|
+
|
|
563
|
+
std::lock_guard<std::mutex> lock(_mutex);
|
|
564
|
+
if (!_index)
|
|
565
|
+
throw jsi::JSError(runtime, "VectorIndex has been deleted.");
|
|
566
|
+
|
|
460
567
|
size_t dims = _index->dimensions();
|
|
461
568
|
|
|
462
569
|
jsi::ArrayBuffer buffer =
|
|
@@ -466,24 +573,15 @@ public:
|
|
|
466
573
|
.getObject(runtime)
|
|
467
574
|
.getArrayBuffer(runtime);
|
|
468
575
|
|
|
469
|
-
// We need raw access to write to it
|
|
470
|
-
// JSI ArrayBuffer doesn't give direct mutable pointer easily
|
|
471
|
-
// without a TypedArray view? Actually getArrayBuffer ->
|
|
472
|
-
// data(runtime) gives pointer.
|
|
473
|
-
|
|
474
576
|
uint8_t *data = buffer.data(runtime);
|
|
475
577
|
float *vecData = reinterpret_cast<float *>(data);
|
|
476
578
|
|
|
477
|
-
// USearch get() signature: bool get(key_t key, scalar_t* vector)
|
|
478
|
-
// const
|
|
479
579
|
bool found = _index->get(key, vecData);
|
|
480
580
|
|
|
481
581
|
if (!found) {
|
|
482
582
|
return jsi::Value::undefined();
|
|
483
583
|
}
|
|
484
584
|
|
|
485
|
-
// Return Float32Array view
|
|
486
|
-
// Float32Array constructor: new Float32Array(buffer)
|
|
487
585
|
jsi::Object float32ArrayCtor =
|
|
488
586
|
runtime.global().getPropertyAsObject(runtime, "Float32Array");
|
|
489
587
|
jsi::Object float32Array = float32ArrayCtor.asFunction(runtime)
|
|
@@ -503,6 +601,9 @@ public:
|
|
|
503
601
|
throw jsi::JSError(runtime, "save expects path");
|
|
504
602
|
std::string path = normalizePath(
|
|
505
603
|
runtime, arguments[0].asString(runtime).utf8(runtime));
|
|
604
|
+
std::lock_guard<std::mutex> lock(_mutex);
|
|
605
|
+
if (!_index)
|
|
606
|
+
throw jsi::JSError(runtime, "VectorIndex has been deleted.");
|
|
506
607
|
if (!_index->save(path.c_str()))
|
|
507
608
|
throw jsi::JSError(
|
|
508
609
|
runtime, "Critical error saving index to disk: " + path);
|
|
@@ -517,6 +618,9 @@ public:
|
|
|
517
618
|
const jsi::Value *arguments, size_t count) -> jsi::Value {
|
|
518
619
|
if (count < 1 || !arguments[0].isString())
|
|
519
620
|
throw jsi::JSError(runtime, "loadVectorsFromFile expects path");
|
|
621
|
+
if (_isIndexing)
|
|
622
|
+
throw jsi::JSError(runtime, "Index is already busy.");
|
|
623
|
+
|
|
520
624
|
std::string path = normalizePath(
|
|
521
625
|
runtime, arguments[0].asString(runtime).utf8(runtime));
|
|
522
626
|
|
|
@@ -525,39 +629,60 @@ public:
|
|
|
525
629
|
throw jsi::JSError(runtime, "Could not open file: " + path);
|
|
526
630
|
|
|
527
631
|
std::streamsize size = file.tellg();
|
|
528
|
-
file.seekg(0, std::ios::beg);
|
|
529
|
-
|
|
530
632
|
if (size <= 0)
|
|
531
633
|
return jsi::Value::undefined();
|
|
532
634
|
|
|
533
635
|
size_t dims = _index->dimensions();
|
|
534
|
-
if (size % (dims * sizeof(float)) != 0) {
|
|
535
|
-
throw jsi::JSError(runtime,
|
|
536
|
-
"File size is not multiple of dimension");
|
|
537
|
-
}
|
|
538
|
-
|
|
539
636
|
size_t numVectors = size / (dims * sizeof(float));
|
|
540
|
-
std::vector<char> buffer(size);
|
|
541
|
-
if (!file.read(buffer.data(), size))
|
|
542
|
-
throw jsi::JSError(runtime, "Failed to read file");
|
|
543
|
-
|
|
544
|
-
const float *vectorData =
|
|
545
|
-
reinterpret_cast<const float *>(buffer.data());
|
|
546
|
-
|
|
547
|
-
// Reserve capacity
|
|
548
|
-
if (_index->size() + numVectors > _index->capacity()) {
|
|
549
|
-
size_t newCap = _index->size() + numVectors + 100;
|
|
550
|
-
LOGD("Resizing index for large binary load: %zu", newCap);
|
|
551
|
-
_index->reserve(newCap);
|
|
552
|
-
}
|
|
553
637
|
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
638
|
+
_isIndexing = true;
|
|
639
|
+
_currentIndexingCount = 0;
|
|
640
|
+
_totalIndexingCount = numVectors;
|
|
641
|
+
|
|
642
|
+
// Capture self to keep HostObject alive during background thread
|
|
643
|
+
std::thread([self = shared_from_this(), path, numVectors, dims]() {
|
|
644
|
+
auto start = std::chrono::high_resolution_clock::now();
|
|
645
|
+
try {
|
|
646
|
+
std::ifstream file(path, std::ios::binary);
|
|
647
|
+
std::vector<float> vectorData(numVectors * dims);
|
|
648
|
+
file.read(reinterpret_cast<char *>(vectorData.data()),
|
|
649
|
+
numVectors * dims * sizeof(float));
|
|
650
|
+
|
|
651
|
+
{
|
|
652
|
+
std::lock_guard<std::mutex> lock(self->_mutex);
|
|
653
|
+
if (!self->_index)
|
|
654
|
+
return;
|
|
655
|
+
if (self->_index->size() + numVectors >
|
|
656
|
+
self->_index->capacity()) {
|
|
657
|
+
self->_index->reserve(self->_index->size() + numVectors +
|
|
658
|
+
100);
|
|
659
|
+
}
|
|
660
|
+
}
|
|
558
661
|
|
|
559
|
-
|
|
560
|
-
|
|
662
|
+
for (size_t i = 0; i < numVectors; ++i) {
|
|
663
|
+
std::lock_guard<std::mutex> lock(self->_mutex);
|
|
664
|
+
self->_index->add((default_key_t)i,
|
|
665
|
+
vectorData.data() + (i * dims));
|
|
666
|
+
self->_currentIndexingCount++;
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
auto end = std::chrono::high_resolution_clock::now();
|
|
670
|
+
{
|
|
671
|
+
std::lock_guard<std::mutex> lock(self->_mutex);
|
|
672
|
+
self->_lastResult.duration =
|
|
673
|
+
std::chrono::duration<double, std::milli>(end - start)
|
|
674
|
+
.count();
|
|
675
|
+
self->_lastResult.count = numVectors;
|
|
676
|
+
self->_lastResult.error = "";
|
|
677
|
+
}
|
|
678
|
+
} catch (const std::exception &e) {
|
|
679
|
+
std::lock_guard<std::mutex> lock(self->_mutex);
|
|
680
|
+
self->_lastResult.error = e.what();
|
|
681
|
+
}
|
|
682
|
+
self->_isIndexing = false;
|
|
683
|
+
}).detach();
|
|
684
|
+
|
|
685
|
+
return jsi::Value::undefined();
|
|
561
686
|
});
|
|
562
687
|
}
|
|
563
688
|
|
|
@@ -570,6 +695,9 @@ public:
|
|
|
570
695
|
throw jsi::JSError(runtime, "load expects path");
|
|
571
696
|
std::string path = normalizePath(
|
|
572
697
|
runtime, arguments[0].asString(runtime).utf8(runtime));
|
|
698
|
+
std::lock_guard<std::mutex> lock(_mutex);
|
|
699
|
+
if (!_index)
|
|
700
|
+
throw jsi::JSError(runtime, "VectorIndex has been deleted.");
|
|
573
701
|
if (!_index->load(path.c_str()))
|
|
574
702
|
throw jsi::JSError(
|
|
575
703
|
runtime, "Critical error loading index from disk: " + path);
|
|
@@ -581,7 +709,13 @@ public:
|
|
|
581
709
|
}
|
|
582
710
|
|
|
583
711
|
private:
|
|
584
|
-
std::
|
|
712
|
+
std::shared_ptr<Index> _index;
|
|
713
|
+
mutable std::mutex _mutex;
|
|
714
|
+
std::atomic<bool> _isIndexing{false};
|
|
715
|
+
std::atomic<size_t> _currentIndexingCount{0};
|
|
716
|
+
std::atomic<size_t> _totalIndexingCount{0};
|
|
717
|
+
bool _quantized;
|
|
718
|
+
OperationResult _lastResult;
|
|
585
719
|
};
|
|
586
720
|
|
|
587
721
|
inline void install(jsi::Runtime &rt) {
|
package/package.json
CHANGED
|
@@ -16,13 +16,35 @@ export interface SearchOptions {
|
|
|
16
16
|
allowedKeys?: number[] | Int32Array | Uint32Array;
|
|
17
17
|
}
|
|
18
18
|
|
|
19
|
+
export type AddResult = {
|
|
20
|
+
duration: number; // in milliseconds
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export type VectorAddBatchResult = {
|
|
24
|
+
duration: number; // in milliseconds
|
|
25
|
+
count: number;
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
export type VectorLoadResult = {
|
|
29
|
+
duration: number; // in milliseconds
|
|
30
|
+
count: number;
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
export type IndexingProgress = {
|
|
34
|
+
current: number;
|
|
35
|
+
total: number;
|
|
36
|
+
percentage: number;
|
|
37
|
+
};
|
|
38
|
+
|
|
19
39
|
// C++ HostObject Interface (Index Instance)
|
|
20
40
|
interface VectorIndexHostObject {
|
|
21
41
|
dimensions: number;
|
|
22
42
|
count: number;
|
|
23
43
|
memoryUsage: number;
|
|
24
44
|
isa: string;
|
|
25
|
-
|
|
45
|
+
isIndexing: boolean;
|
|
46
|
+
indexingProgress: IndexingProgress;
|
|
47
|
+
add(key: number, vector: Vector): AddResult;
|
|
26
48
|
remove(key: number): void;
|
|
27
49
|
update(key: number, vector: Vector): void;
|
|
28
50
|
search(
|
|
@@ -34,8 +56,9 @@ interface VectorIndexHostObject {
|
|
|
34
56
|
load(path: string): void;
|
|
35
57
|
delete(): void;
|
|
36
58
|
addBatch(keys: Int32Array, vectors: Float32Array): void;
|
|
37
|
-
loadVectorsFromFile(path: string):
|
|
59
|
+
loadVectorsFromFile(path: string): void;
|
|
38
60
|
getItemVector(key: number): Float32Array | undefined;
|
|
61
|
+
getLastResult(): VectorLoadResult;
|
|
39
62
|
}
|
|
40
63
|
|
|
41
64
|
// Global Module Interface (Factory)
|
|
@@ -96,25 +119,52 @@ export class VectorIndex {
|
|
|
96
119
|
return this._index.isa;
|
|
97
120
|
}
|
|
98
121
|
|
|
122
|
+
/**
|
|
123
|
+
* Whether the index is currently processing an asynchronous operation (like addBatch).
|
|
124
|
+
*/
|
|
125
|
+
get isIndexing(): boolean {
|
|
126
|
+
return this._index.isIndexing;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* The real-time progress of an ongoing indexing operation.
|
|
131
|
+
*/
|
|
132
|
+
get indexingProgress(): IndexingProgress {
|
|
133
|
+
return this._index.indexingProgress;
|
|
134
|
+
}
|
|
135
|
+
|
|
99
136
|
/**
|
|
100
137
|
* Adds a vector to the index.
|
|
101
138
|
* @param key A unique numeric identifier for the vector.
|
|
102
139
|
* @param vector A Float32Array containing the vector data.
|
|
103
140
|
* @throws Error if the vector dimension doesn't match or memory allocation fails.
|
|
104
141
|
*/
|
|
105
|
-
add(key: number, vector: Vector):
|
|
106
|
-
this._index.add(key, vector);
|
|
142
|
+
add(key: number, vector: Vector): AddResult {
|
|
143
|
+
return this._index.add(key, vector);
|
|
107
144
|
}
|
|
108
145
|
|
|
109
146
|
/**
|
|
110
147
|
* Adds multiple vectors in a single high-performance batch operation.
|
|
111
148
|
* This is significantly faster than calling `.add()` in a loop.
|
|
112
149
|
* @param keys An Int32Array of unique numeric identifiers.
|
|
113
|
-
* @param vectors A single Float32Array containing all vectors concatenated.
|
|
114
150
|
* @throws Error if buffer sizes or alignment do not match.
|
|
115
151
|
*/
|
|
116
|
-
addBatch(
|
|
152
|
+
async addBatch(
|
|
153
|
+
keys: Int32Array,
|
|
154
|
+
vectors: Float32Array
|
|
155
|
+
): Promise<VectorAddBatchResult> {
|
|
117
156
|
this._index.addBatch(keys, vectors);
|
|
157
|
+
return this._waitForOperation();
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Internal helper to poll for operation completion.
|
|
162
|
+
*/
|
|
163
|
+
private async _waitForOperation(): Promise<VectorLoadResult> {
|
|
164
|
+
while (this._index.isIndexing) {
|
|
165
|
+
await new Promise((resolve) => setTimeout(resolve, 50));
|
|
166
|
+
}
|
|
167
|
+
return this._index.getLastResult();
|
|
118
168
|
}
|
|
119
169
|
|
|
120
170
|
/**
|
|
@@ -173,10 +223,11 @@ export class VectorIndex {
|
|
|
173
223
|
* Loads raw vectors directly from a binary file.
|
|
174
224
|
* This avoids JS parsing overhead and is much faster for initialization.
|
|
175
225
|
* @param path The absolute path to the binary file containing packed floats.
|
|
176
|
-
* @returns
|
|
226
|
+
* @returns An object containing the number of vectors loaded and the duration.
|
|
177
227
|
*/
|
|
178
|
-
loadVectorsFromFile(path: string):
|
|
179
|
-
|
|
228
|
+
async loadVectorsFromFile(path: string): Promise<VectorLoadResult> {
|
|
229
|
+
this._index.loadVectorsFromFile(path);
|
|
230
|
+
return this._waitForOperation();
|
|
180
231
|
}
|
|
181
232
|
|
|
182
233
|
/**
|