bm25-turbo-wasm 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +138 -0
- package/bm25_turbo_wasm.d.ts +61 -0
- package/bm25_turbo_wasm.js +9 -0
- package/bm25_turbo_wasm_bg.js +353 -0
- package/bm25_turbo_wasm_bg.wasm +0 -0
- package/package.json +23 -0
package/README.md
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# bm25-turbo
|
|
2
|
+
|
|
3
|
+
The fastest BM25 full-text search engine, compiled to WebAssembly.
|
|
4
|
+
|
|
5
|
+
BM25 Turbo is a Rust-native BM25 information retrieval engine that supports 5 scoring variants (Robertson, Lucene, ATIRE, BM25L, BM25+), 17-language tokenization with Snowball stemming, and compressed sparse column storage. This package brings the full engine to the browser and Node.js via WebAssembly.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install bm25-turbo
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
```javascript
|
|
16
|
+
import init, { WasmBM25 } from 'bm25-turbo';
|
|
17
|
+
|
|
18
|
+
// Initialize the WASM module (required once before use)
|
|
19
|
+
await init();
|
|
20
|
+
|
|
21
|
+
// Build an index from an array of documents
|
|
22
|
+
const index = new WasmBM25([
|
|
23
|
+
"The quick brown fox jumps over the lazy dog",
|
|
24
|
+
"A fast red car drives on the highway",
|
|
25
|
+
"The brown dog sleeps in the sun",
|
|
26
|
+
"Quick foxes are surprisingly lazy animals",
|
|
27
|
+
]);
|
|
28
|
+
|
|
29
|
+
// Search for the top 2 results
|
|
30
|
+
const results = index.search("quick brown fox", 2);
|
|
31
|
+
console.log(results);
|
|
32
|
+
// { doc_ids: [0, 3], scores: [1.82, 0.94] }
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Usage
|
|
36
|
+
|
|
37
|
+
### Creating an Index
|
|
38
|
+
|
|
39
|
+
The `WasmBM25` constructor accepts an array of document strings and optional parameters:
|
|
40
|
+
|
|
41
|
+
```typescript
|
|
42
|
+
const index = new WasmBM25(
|
|
43
|
+
documents, // string[] — array of document texts
|
|
44
|
+
method?, // string — scoring variant (default: "lucene")
|
|
45
|
+
k1?, // number — term frequency saturation (default: 1.5)
|
|
46
|
+
b?, // number — document length normalization (default: 0.75)
|
|
47
|
+
);
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
**Supported methods:** `"robertson"`, `"lucene"`, `"atire"`, `"bm25l"`, `"bm25plus"`
|
|
51
|
+
|
|
52
|
+
### Searching
|
|
53
|
+
|
|
54
|
+
```typescript
|
|
55
|
+
const results = index.search(query, k);
|
|
56
|
+
// Returns: { doc_ids: number[], scores: number[] }
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
- `query` — search query string
|
|
60
|
+
- `k` — maximum number of results to return (must be > 0)
|
|
61
|
+
|
|
62
|
+
### Serialization
|
|
63
|
+
|
|
64
|
+
Save an index to bytes for storage (e.g., IndexedDB, localStorage):
|
|
65
|
+
|
|
66
|
+
```typescript
|
|
67
|
+
// Serialize
|
|
68
|
+
const bytes = index.to_bytes(); // Uint8Array
|
|
69
|
+
|
|
70
|
+
// Deserialize
|
|
71
|
+
const restored = WasmBM25.loadBytes(bytes);
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Index Statistics
|
|
75
|
+
|
|
76
|
+
```typescript
|
|
77
|
+
console.log(index.num_docs()); // number of documents
|
|
78
|
+
console.log(index.vocab_size()); // number of unique terms
|
|
79
|
+
console.log(index.stats()); // JSON string with full stats
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## API Reference
|
|
83
|
+
|
|
84
|
+
### `new WasmBM25(documents, method?, k1?, b?)`
|
|
85
|
+
|
|
86
|
+
Construct a BM25 index from a corpus of documents.
|
|
87
|
+
|
|
88
|
+
| Parameter | Type | Default | Description |
|
|
89
|
+
|-----------|------|---------|-------------|
|
|
90
|
+
| `documents` | `string[]` | (required) | Array of document texts to index |
|
|
91
|
+
| `method` | `string` | `"lucene"` | BM25 scoring variant |
|
|
92
|
+
| `k1` | `number` | `1.5` | Term frequency saturation parameter |
|
|
93
|
+
| `b` | `number` | `0.75` | Document length normalization parameter |
|
|
94
|
+
|
|
95
|
+
### `.search(query, k)`
|
|
96
|
+
|
|
97
|
+
Search the index and return the top-k results.
|
|
98
|
+
|
|
99
|
+
| Parameter | Type | Description |
|
|
100
|
+
|-----------|------|-------------|
|
|
101
|
+
| `query` | `string` | Search query text |
|
|
102
|
+
| `k` | `number` | Maximum number of results (must be > 0) |
|
|
103
|
+
|
|
104
|
+
**Returns:** `{ doc_ids: number[], scores: number[] }`
|
|
105
|
+
|
|
106
|
+
### `.num_docs()`
|
|
107
|
+
|
|
108
|
+
Returns the number of documents in the index.
|
|
109
|
+
|
|
110
|
+
### `.vocab_size()`
|
|
111
|
+
|
|
112
|
+
Returns the number of unique terms in the vocabulary.
|
|
113
|
+
|
|
114
|
+
### `.stats()`
|
|
115
|
+
|
|
116
|
+
Returns a JSON string with index statistics including `num_docs`, `vocab_size`, `method`, `k1`, `b`, `delta`, and `avg_doc_len`.
|
|
117
|
+
|
|
118
|
+
### `.to_bytes()`
|
|
119
|
+
|
|
120
|
+
Serializes the index to a `Uint8Array` for storage or transfer.
|
|
121
|
+
|
|
122
|
+
### `WasmBM25.loadBytes(data)`
|
|
123
|
+
|
|
124
|
+
Static method. Deserializes an index from bytes produced by `to_bytes()`.
|
|
125
|
+
|
|
126
|
+
| Parameter | Type | Description |
|
|
127
|
+
|-----------|------|-------------|
|
|
128
|
+
| `data` | `Uint8Array` | Serialized index bytes |
|
|
129
|
+
|
|
130
|
+
**Returns:** `WasmBM25`
|
|
131
|
+
|
|
132
|
+
## Browser Support
|
|
133
|
+
|
|
134
|
+
Requires a browser with WebAssembly support (all modern browsers). The package targets the `web` platform by default, producing ES module output suitable for `<script type="module">` or bundlers.
|
|
135
|
+
|
|
136
|
+
## License
|
|
137
|
+
|
|
138
|
+
MIT OR Apache-2.0
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/* tslint:disable */
|
|
2
|
+
/* eslint-disable */
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* BM25 search engine for WebAssembly.
|
|
6
|
+
*
|
|
7
|
+
* Provides index construction, search, and serialization capabilities
|
|
8
|
+
* entirely in WASM linear memory. No filesystem access, no threading.
|
|
9
|
+
*/
|
|
10
|
+
export class WasmBM25 {
|
|
11
|
+
free(): void;
|
|
12
|
+
[Symbol.dispose](): void;
|
|
13
|
+
/**
|
|
14
|
+
* Load a pre-built index from bytes.
|
|
15
|
+
*
|
|
16
|
+
* Accepts bytes produced by `to_bytes()` or by the native Rust library's
|
|
17
|
+
* bincode serialization of the same format.
|
|
18
|
+
*/
|
|
19
|
+
static loadBytes(data: Uint8Array): WasmBM25;
|
|
20
|
+
/**
|
|
21
|
+
* Build a BM25 index from an array of document strings.
|
|
22
|
+
*
|
|
23
|
+
* # Arguments
|
|
24
|
+
* * `documents` - JavaScript array of strings
|
|
25
|
+
* * `method` - BM25 variant: "robertson", "lucene", "atire", "bm25l", "bm25plus"
|
|
26
|
+
* * `k1` - Term frequency saturation parameter (default: 1.5)
|
|
27
|
+
* * `b` - Document length normalization parameter (default: 0.75)
|
|
28
|
+
*/
|
|
29
|
+
constructor(documents: string[], method?: string | null, k1?: number | null, b?: number | null);
|
|
30
|
+
/**
|
|
31
|
+
* Return the number of documents in the index.
|
|
32
|
+
*/
|
|
33
|
+
num_docs(): number;
|
|
34
|
+
/**
|
|
35
|
+
* Search the index and return the top-k results.
|
|
36
|
+
*
|
|
37
|
+
* Returns a JavaScript object with `doc_ids` (Uint32Array) and `scores` (Float32Array).
|
|
38
|
+
*/
|
|
39
|
+
search(query: string, k: number): any;
|
|
40
|
+
/**
|
|
41
|
+
* Return index statistics as a JSON string.
|
|
42
|
+
*/
|
|
43
|
+
stats(): string;
|
|
44
|
+
/**
|
|
45
|
+
* Serialize the index to bytes for storage/transfer.
|
|
46
|
+
*
|
|
47
|
+
* The returned bytes can be loaded back via `WasmBM25.load_bytes()`.
|
|
48
|
+
*/
|
|
49
|
+
to_bytes(): Uint8Array;
|
|
50
|
+
/**
|
|
51
|
+
* Return the vocabulary size.
|
|
52
|
+
*/
|
|
53
|
+
vocab_size(): number;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Called automatically when the WASM module is instantiated.
|
|
58
|
+
* Sets up `console_error_panic_hook` so that Rust panics produce
|
|
59
|
+
* readable stack traces in the browser console.
|
|
60
|
+
*/
|
|
61
|
+
export function init(): void;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/* @ts-self-types="./bm25_turbo_wasm.d.ts" */
|
|
2
|
+
|
|
3
|
+
import * as wasm from "./bm25_turbo_wasm_bg.wasm";
|
|
4
|
+
import { __wbg_set_wasm } from "./bm25_turbo_wasm_bg.js";
|
|
5
|
+
__wbg_set_wasm(wasm);
|
|
6
|
+
wasm.__wbindgen_start();
|
|
7
|
+
export {
|
|
8
|
+
WasmBM25, init
|
|
9
|
+
} from "./bm25_turbo_wasm_bg.js";
|
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BM25 search engine for WebAssembly.
|
|
3
|
+
*
|
|
4
|
+
* Provides index construction, search, and serialization capabilities
|
|
5
|
+
* entirely in WASM linear memory. No filesystem access, no threading.
|
|
6
|
+
*/
|
|
7
|
+
export class WasmBM25 {
|
|
8
|
+
static __wrap(ptr) {
|
|
9
|
+
ptr = ptr >>> 0;
|
|
10
|
+
const obj = Object.create(WasmBM25.prototype);
|
|
11
|
+
obj.__wbg_ptr = ptr;
|
|
12
|
+
WasmBM25Finalization.register(obj, obj.__wbg_ptr, obj);
|
|
13
|
+
return obj;
|
|
14
|
+
}
|
|
15
|
+
__destroy_into_raw() {
|
|
16
|
+
const ptr = this.__wbg_ptr;
|
|
17
|
+
this.__wbg_ptr = 0;
|
|
18
|
+
WasmBM25Finalization.unregister(this);
|
|
19
|
+
return ptr;
|
|
20
|
+
}
|
|
21
|
+
free() {
|
|
22
|
+
const ptr = this.__destroy_into_raw();
|
|
23
|
+
wasm.__wbg_wasmbm25_free(ptr, 0);
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Load a pre-built index from bytes.
|
|
27
|
+
*
|
|
28
|
+
* Accepts bytes produced by `to_bytes()` or by the native Rust library's
|
|
29
|
+
* bincode serialization of the same format.
|
|
30
|
+
* @param {Uint8Array} data
|
|
31
|
+
* @returns {WasmBM25}
|
|
32
|
+
*/
|
|
33
|
+
static loadBytes(data) {
|
|
34
|
+
const ptr0 = passArray8ToWasm0(data, wasm.__wbindgen_malloc);
|
|
35
|
+
const len0 = WASM_VECTOR_LEN;
|
|
36
|
+
const ret = wasm.wasmbm25_loadBytes(ptr0, len0);
|
|
37
|
+
if (ret[2]) {
|
|
38
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
39
|
+
}
|
|
40
|
+
return WasmBM25.__wrap(ret[0]);
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Build a BM25 index from an array of document strings.
|
|
44
|
+
*
|
|
45
|
+
* # Arguments
|
|
46
|
+
* * `documents` - JavaScript array of strings
|
|
47
|
+
* * `method` - BM25 variant: "robertson", "lucene", "atire", "bm25l", "bm25plus"
|
|
48
|
+
* * `k1` - Term frequency saturation parameter (default: 1.5)
|
|
49
|
+
* * `b` - Document length normalization parameter (default: 0.75)
|
|
50
|
+
* @param {string[]} documents
|
|
51
|
+
* @param {string | null} [method]
|
|
52
|
+
* @param {number | null} [k1]
|
|
53
|
+
* @param {number | null} [b]
|
|
54
|
+
*/
|
|
55
|
+
constructor(documents, method, k1, b) {
|
|
56
|
+
const ptr0 = passArrayJsValueToWasm0(documents, wasm.__wbindgen_malloc);
|
|
57
|
+
const len0 = WASM_VECTOR_LEN;
|
|
58
|
+
var ptr1 = isLikeNone(method) ? 0 : passStringToWasm0(method, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
59
|
+
var len1 = WASM_VECTOR_LEN;
|
|
60
|
+
const ret = wasm.wasmbm25_new(ptr0, len0, ptr1, len1, isLikeNone(k1) ? 0x100000001 : Math.fround(k1), isLikeNone(b) ? 0x100000001 : Math.fround(b));
|
|
61
|
+
if (ret[2]) {
|
|
62
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
63
|
+
}
|
|
64
|
+
this.__wbg_ptr = ret[0] >>> 0;
|
|
65
|
+
WasmBM25Finalization.register(this, this.__wbg_ptr, this);
|
|
66
|
+
return this;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Return the number of documents in the index.
|
|
70
|
+
* @returns {number}
|
|
71
|
+
*/
|
|
72
|
+
num_docs() {
|
|
73
|
+
const ret = wasm.wasmbm25_num_docs(this.__wbg_ptr);
|
|
74
|
+
return ret >>> 0;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Search the index and return the top-k results.
|
|
78
|
+
*
|
|
79
|
+
* Returns a JavaScript object with `doc_ids` (Uint32Array) and `scores` (Float32Array).
|
|
80
|
+
* @param {string} query
|
|
81
|
+
* @param {number} k
|
|
82
|
+
* @returns {any}
|
|
83
|
+
*/
|
|
84
|
+
search(query, k) {
|
|
85
|
+
const ptr0 = passStringToWasm0(query, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
86
|
+
const len0 = WASM_VECTOR_LEN;
|
|
87
|
+
const ret = wasm.wasmbm25_search(this.__wbg_ptr, ptr0, len0, k);
|
|
88
|
+
if (ret[2]) {
|
|
89
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
90
|
+
}
|
|
91
|
+
return takeFromExternrefTable0(ret[0]);
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Return index statistics as a JSON string.
|
|
95
|
+
* @returns {string}
|
|
96
|
+
*/
|
|
97
|
+
stats() {
|
|
98
|
+
let deferred1_0;
|
|
99
|
+
let deferred1_1;
|
|
100
|
+
try {
|
|
101
|
+
const ret = wasm.wasmbm25_stats(this.__wbg_ptr);
|
|
102
|
+
deferred1_0 = ret[0];
|
|
103
|
+
deferred1_1 = ret[1];
|
|
104
|
+
return getStringFromWasm0(ret[0], ret[1]);
|
|
105
|
+
} finally {
|
|
106
|
+
wasm.__wbindgen_free(deferred1_0, deferred1_1, 1);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Serialize the index to bytes for storage/transfer.
|
|
111
|
+
*
|
|
112
|
+
* The returned bytes can be loaded back via `WasmBM25.load_bytes()`.
|
|
113
|
+
* @returns {Uint8Array}
|
|
114
|
+
*/
|
|
115
|
+
to_bytes() {
|
|
116
|
+
const ret = wasm.wasmbm25_to_bytes(this.__wbg_ptr);
|
|
117
|
+
if (ret[3]) {
|
|
118
|
+
throw takeFromExternrefTable0(ret[2]);
|
|
119
|
+
}
|
|
120
|
+
var v1 = getArrayU8FromWasm0(ret[0], ret[1]).slice();
|
|
121
|
+
wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
|
|
122
|
+
return v1;
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Return the vocabulary size.
|
|
126
|
+
* @returns {number}
|
|
127
|
+
*/
|
|
128
|
+
vocab_size() {
|
|
129
|
+
const ret = wasm.wasmbm25_vocab_size(this.__wbg_ptr);
|
|
130
|
+
return ret >>> 0;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if (Symbol.dispose) WasmBM25.prototype[Symbol.dispose] = WasmBM25.prototype.free;
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Called automatically when the WASM module is instantiated.
|
|
137
|
+
* Sets up `console_error_panic_hook` so that Rust panics produce
|
|
138
|
+
* readable stack traces in the browser console.
|
|
139
|
+
*/
|
|
140
|
+
export function init() {
|
|
141
|
+
wasm.init();
|
|
142
|
+
}
|
|
143
|
+
export function __wbg_Error_83742b46f01ce22d(arg0, arg1) {
|
|
144
|
+
const ret = Error(getStringFromWasm0(arg0, arg1));
|
|
145
|
+
return ret;
|
|
146
|
+
}
|
|
147
|
+
export function __wbg_String_8564e559799eccda(arg0, arg1) {
|
|
148
|
+
const ret = String(arg1);
|
|
149
|
+
const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
150
|
+
const len1 = WASM_VECTOR_LEN;
|
|
151
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
|
|
152
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
|
|
153
|
+
}
|
|
154
|
+
export function __wbg___wbindgen_string_get_395e606bd0ee4427(arg0, arg1) {
|
|
155
|
+
const obj = arg1;
|
|
156
|
+
const ret = typeof(obj) === 'string' ? obj : undefined;
|
|
157
|
+
var ptr1 = isLikeNone(ret) ? 0 : passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
158
|
+
var len1 = WASM_VECTOR_LEN;
|
|
159
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
|
|
160
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
|
|
161
|
+
}
|
|
162
|
+
export function __wbg___wbindgen_throw_6ddd609b62940d55(arg0, arg1) {
|
|
163
|
+
throw new Error(getStringFromWasm0(arg0, arg1));
|
|
164
|
+
}
|
|
165
|
+
export function __wbg_error_a6fa202b58aa1cd3(arg0, arg1) {
|
|
166
|
+
let deferred0_0;
|
|
167
|
+
let deferred0_1;
|
|
168
|
+
try {
|
|
169
|
+
deferred0_0 = arg0;
|
|
170
|
+
deferred0_1 = arg1;
|
|
171
|
+
console.error(getStringFromWasm0(arg0, arg1));
|
|
172
|
+
} finally {
|
|
173
|
+
wasm.__wbindgen_free(deferred0_0, deferred0_1, 1);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
export function __wbg_new_227d7c05414eb861() {
|
|
177
|
+
const ret = new Error();
|
|
178
|
+
return ret;
|
|
179
|
+
}
|
|
180
|
+
export function __wbg_new_a70fbab9066b301f() {
|
|
181
|
+
const ret = new Array();
|
|
182
|
+
return ret;
|
|
183
|
+
}
|
|
184
|
+
export function __wbg_new_ab79df5bd7c26067() {
|
|
185
|
+
const ret = new Object();
|
|
186
|
+
return ret;
|
|
187
|
+
}
|
|
188
|
+
export function __wbg_set_282384002438957f(arg0, arg1, arg2) {
|
|
189
|
+
arg0[arg1 >>> 0] = arg2;
|
|
190
|
+
}
|
|
191
|
+
export function __wbg_set_6be42768c690e380(arg0, arg1, arg2) {
|
|
192
|
+
arg0[arg1] = arg2;
|
|
193
|
+
}
|
|
194
|
+
export function __wbg_stack_3b0d974bbf31e44f(arg0, arg1) {
|
|
195
|
+
const ret = arg1.stack;
|
|
196
|
+
const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
197
|
+
const len1 = WASM_VECTOR_LEN;
|
|
198
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
|
|
199
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
|
|
200
|
+
}
|
|
201
|
+
export function __wbindgen_cast_0000000000000001(arg0) {
|
|
202
|
+
// Cast intrinsic for `F64 -> Externref`.
|
|
203
|
+
const ret = arg0;
|
|
204
|
+
return ret;
|
|
205
|
+
}
|
|
206
|
+
export function __wbindgen_cast_0000000000000002(arg0, arg1) {
|
|
207
|
+
// Cast intrinsic for `Ref(String) -> Externref`.
|
|
208
|
+
const ret = getStringFromWasm0(arg0, arg1);
|
|
209
|
+
return ret;
|
|
210
|
+
}
|
|
211
|
+
export function __wbindgen_init_externref_table() {
|
|
212
|
+
const table = wasm.__wbindgen_externrefs;
|
|
213
|
+
const offset = table.grow(4);
|
|
214
|
+
table.set(0, undefined);
|
|
215
|
+
table.set(offset + 0, undefined);
|
|
216
|
+
table.set(offset + 1, null);
|
|
217
|
+
table.set(offset + 2, true);
|
|
218
|
+
table.set(offset + 3, false);
|
|
219
|
+
}
|
|
220
|
+
const WasmBM25Finalization = (typeof FinalizationRegistry === 'undefined')
|
|
221
|
+
? { register: () => {}, unregister: () => {} }
|
|
222
|
+
: new FinalizationRegistry(ptr => wasm.__wbg_wasmbm25_free(ptr >>> 0, 1));
|
|
223
|
+
|
|
224
|
+
function addToExternrefTable0(obj) {
|
|
225
|
+
const idx = wasm.__externref_table_alloc();
|
|
226
|
+
wasm.__wbindgen_externrefs.set(idx, obj);
|
|
227
|
+
return idx;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function getArrayU8FromWasm0(ptr, len) {
|
|
231
|
+
ptr = ptr >>> 0;
|
|
232
|
+
return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
let cachedDataViewMemory0 = null;
|
|
236
|
+
function getDataViewMemory0() {
|
|
237
|
+
if (cachedDataViewMemory0 === null || cachedDataViewMemory0.buffer.detached === true || (cachedDataViewMemory0.buffer.detached === undefined && cachedDataViewMemory0.buffer !== wasm.memory.buffer)) {
|
|
238
|
+
cachedDataViewMemory0 = new DataView(wasm.memory.buffer);
|
|
239
|
+
}
|
|
240
|
+
return cachedDataViewMemory0;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
function getStringFromWasm0(ptr, len) {
|
|
244
|
+
ptr = ptr >>> 0;
|
|
245
|
+
return decodeText(ptr, len);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
let cachedUint8ArrayMemory0 = null;
|
|
249
|
+
function getUint8ArrayMemory0() {
|
|
250
|
+
if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) {
|
|
251
|
+
cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
|
|
252
|
+
}
|
|
253
|
+
return cachedUint8ArrayMemory0;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
function isLikeNone(x) {
|
|
257
|
+
return x === undefined || x === null;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
function passArray8ToWasm0(arg, malloc) {
|
|
261
|
+
const ptr = malloc(arg.length * 1, 1) >>> 0;
|
|
262
|
+
getUint8ArrayMemory0().set(arg, ptr / 1);
|
|
263
|
+
WASM_VECTOR_LEN = arg.length;
|
|
264
|
+
return ptr;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function passArrayJsValueToWasm0(array, malloc) {
|
|
268
|
+
const ptr = malloc(array.length * 4, 4) >>> 0;
|
|
269
|
+
for (let i = 0; i < array.length; i++) {
|
|
270
|
+
const add = addToExternrefTable0(array[i]);
|
|
271
|
+
getDataViewMemory0().setUint32(ptr + 4 * i, add, true);
|
|
272
|
+
}
|
|
273
|
+
WASM_VECTOR_LEN = array.length;
|
|
274
|
+
return ptr;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function passStringToWasm0(arg, malloc, realloc) {
|
|
278
|
+
if (realloc === undefined) {
|
|
279
|
+
const buf = cachedTextEncoder.encode(arg);
|
|
280
|
+
const ptr = malloc(buf.length, 1) >>> 0;
|
|
281
|
+
getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf);
|
|
282
|
+
WASM_VECTOR_LEN = buf.length;
|
|
283
|
+
return ptr;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
let len = arg.length;
|
|
287
|
+
let ptr = malloc(len, 1) >>> 0;
|
|
288
|
+
|
|
289
|
+
const mem = getUint8ArrayMemory0();
|
|
290
|
+
|
|
291
|
+
let offset = 0;
|
|
292
|
+
|
|
293
|
+
for (; offset < len; offset++) {
|
|
294
|
+
const code = arg.charCodeAt(offset);
|
|
295
|
+
if (code > 0x7F) break;
|
|
296
|
+
mem[ptr + offset] = code;
|
|
297
|
+
}
|
|
298
|
+
if (offset !== len) {
|
|
299
|
+
if (offset !== 0) {
|
|
300
|
+
arg = arg.slice(offset);
|
|
301
|
+
}
|
|
302
|
+
ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
|
|
303
|
+
const view = getUint8ArrayMemory0().subarray(ptr + offset, ptr + len);
|
|
304
|
+
const ret = cachedTextEncoder.encodeInto(arg, view);
|
|
305
|
+
|
|
306
|
+
offset += ret.written;
|
|
307
|
+
ptr = realloc(ptr, len, offset, 1) >>> 0;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
WASM_VECTOR_LEN = offset;
|
|
311
|
+
return ptr;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
function takeFromExternrefTable0(idx) {
|
|
315
|
+
const value = wasm.__wbindgen_externrefs.get(idx);
|
|
316
|
+
wasm.__externref_table_dealloc(idx);
|
|
317
|
+
return value;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
let cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
|
|
321
|
+
cachedTextDecoder.decode();
|
|
322
|
+
const MAX_SAFARI_DECODE_BYTES = 2146435072;
|
|
323
|
+
let numBytesDecoded = 0;
|
|
324
|
+
function decodeText(ptr, len) {
|
|
325
|
+
numBytesDecoded += len;
|
|
326
|
+
if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
|
|
327
|
+
cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
|
|
328
|
+
cachedTextDecoder.decode();
|
|
329
|
+
numBytesDecoded = len;
|
|
330
|
+
}
|
|
331
|
+
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
const cachedTextEncoder = new TextEncoder();
|
|
335
|
+
|
|
336
|
+
if (!('encodeInto' in cachedTextEncoder)) {
|
|
337
|
+
cachedTextEncoder.encodeInto = function (arg, view) {
|
|
338
|
+
const buf = cachedTextEncoder.encode(arg);
|
|
339
|
+
view.set(buf);
|
|
340
|
+
return {
|
|
341
|
+
read: arg.length,
|
|
342
|
+
written: buf.length
|
|
343
|
+
};
|
|
344
|
+
};
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
let WASM_VECTOR_LEN = 0;
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
let wasm;
|
|
351
|
+
export function __wbg_set_wasm(val) {
|
|
352
|
+
wasm = val;
|
|
353
|
+
}
|
|
Binary file
|
package/package.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "bm25-turbo-wasm",
|
|
3
|
+
"type": "module",
|
|
4
|
+
"description": "BM25 Turbo compiled to WebAssembly — the fastest BM25 search engine for the browser",
|
|
5
|
+
"version": "0.1.0",
|
|
6
|
+
"license": "MIT OR Apache-2.0",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "https://github.com/TheSauceSuite/BM25-Turbo-Rust-Python-WASM-CLI-"
|
|
10
|
+
},
|
|
11
|
+
"files": [
|
|
12
|
+
"bm25_turbo_wasm_bg.wasm",
|
|
13
|
+
"bm25_turbo_wasm.js",
|
|
14
|
+
"bm25_turbo_wasm_bg.js",
|
|
15
|
+
"bm25_turbo_wasm.d.ts"
|
|
16
|
+
],
|
|
17
|
+
"main": "bm25_turbo_wasm.js",
|
|
18
|
+
"types": "bm25_turbo_wasm.d.ts",
|
|
19
|
+
"sideEffects": [
|
|
20
|
+
"./bm25_turbo_wasm.js",
|
|
21
|
+
"./snippets/*"
|
|
22
|
+
]
|
|
23
|
+
}
|