@tgies/megahal-js 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,27 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [1.0.0] - 2026-05-22
9
+
10
+ Initial release. A JavaScript port of the MegaHAL conversational engine
11
+ (Jason Hutchens, 1998), targeting both Node.js and browser environments.
12
+
13
+ ### Added
14
+ - Forward and backward 5th-order Markov trie models with case-insensitive symbols.
15
+ - Tokenization matching the original C boundary rules, including apostrophe
16
+ handling for contractions and sentence-terminal normalization.
17
+ - Two-pass keyword extraction with banned, auxiliary, and swap-table support.
18
+ - Reply generation with seeded forward and backward babble phases, keyword
19
+ priority, and the `used_key` discipline.
20
+ - Surprise-based reply scoring with depth-averaged probability and the
21
+ num >= 8 / num >= 16 length penalties.
22
+ - Binary brain persistence compatible with the `MegaHALv8` cookie format,
23
+ with optional 64-bit count/usage extensions.
24
+ - Default support file data (banned, auxiliary, greeting, swap) bundled.
25
+ - TypeScript declarations generated from JSDoc.
26
+
27
+ [1.0.0]: https://github.com/tgies/megahal-js/releases/tag/v1.0.0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Tony Gies
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,156 @@
1
+ # MegaHAL-JS
2
+
3
+ [![CI](https://github.com/tgies/megahal-js/actions/workflows/ci.yml/badge.svg)](https://github.com/tgies/megahal-js/actions/workflows/ci.yml)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
+
6
+ A JavaScript port of Jason Hutchens' famous 1998 MegaHAL conversational engine.
7
+
8
+ MegaHAL-JS runs natively in Node.js (>= 20) and in all modern browsers.
9
+
10
+ ---
11
+
12
+ ## Installation
13
+
14
+ ```bash
15
+ npm install @tgies/megahal-js
16
+ ```
17
+
18
+ ---
19
+
20
+ ## Quick Start
21
+
22
+ ### Modern ESM (Node & Browser)
23
+
24
+ ```javascript
25
+ import { MegaHal } from '@tgies/megahal-js';
26
+
27
+ // Instantiate with order N (default is 5)
28
+ const hal = new MegaHal(5);
29
+
30
+ // Train the engine
31
+ hal.learn('The cat sat on the mat.');
32
+ hal.learn('The dog chased the cat around the yard.');
33
+
34
+ // Generate a reply (this will also learn from the prompt before generating)
35
+ const reply = hal.respond('Tell me about the cat.');
36
+ console.log(reply);
37
+ // e.g., "The dog chased the cat sat on the mat."
38
+ ```
39
+
40
+ ### Browser direct integration (Vanilla HTML/JS)
41
+
42
+ ```html
43
+ <!DOCTYPE html>
44
+ <html>
45
+ <head>
46
+ <title>MegaHAL Chatbot</title>
47
+ </head>
48
+ <body>
49
+ <script type="module">
50
+ import { MegaHal } from './node_modules/megahal-js/index.js';
51
+
52
+ const hal = new MegaHal(3);
53
+ hal.learn('Hello world!');
54
+ hal.learn('Welcome to the web browser version of MegaHAL.');
55
+
56
+ console.log(hal.respond('hello'));
57
+ </script>
58
+ </body>
59
+ </html>
60
+ ```
61
+
62
+ ---
63
+
64
+ ## API Reference
65
+
66
+ ### `class MegaHal`
67
+
68
+ #### `constructor(order = 5, rng = null)`
69
+ Creates a new MegaHAL engine.
70
+ - `order`: The Markov n-gram depth (trie depth). Defaults to `5`.
71
+ - `rng`: An optional custom random number generator. Must implement `randomRange(min, max)` returning an integer in `[min, max)`. If omitted, defaults to `Math.random`.
72
+
73
+ #### `respond(input)`
74
+ Learns from the input sentence, extracts its keywords, generates a response biased toward those keywords, capitalizes the response according to sentence-casing rules, and returns it.
75
+ - `input`: The prompt string.
76
+ - Returns: `string`
77
+
78
+ #### `generate(input)`
79
+ Generates a response to the prompt *without* learning from it. Returns `null` if no reply can be generated.
80
+ - `input`: The prompt string.
81
+ - Returns: `string | null`
82
+
83
+ #### `greet()`
84
+ Generates an initial greeting using a random word selected from the greeting keywords list. Falls back to the default fallback greeting if no greeting can be generated.
85
+ - Returns: `string`
86
+
87
+ #### `learn(input)`
88
+ Tokenizes and trains both forward and backward models on the given sentence.
89
+ - `input`: Sentence to train on.
90
+
91
+ #### `setLimit({ timeout, maxIterations })`
92
+ Configures generation limits for the reply loop.
93
+ - `timeout`: Maximum milliseconds to spend generating candidate responses (defaults to `1000`).
94
+ - `maxIterations`: Maximum candidates to generate.
95
+
96
+ #### `setKeywordConfig(config)`
97
+ Overrides the extraction config containing banned words, auxiliary words, and the swap table.
98
+ - `config`: A `KeywordConfig` instance.
99
+
100
+ #### `setGreetings(greetings)`
101
+ Sets the keywords list used to seed the initial greeting.
102
+ - `greetings`: Array of string greeting words.
103
+
104
+ #### `exportBrain()`
105
+ Serializes the engine's internal dictionary and tries into a spec-compliant C-compatible `.brn` binary format and returns a `Uint8Array`.
106
+ - Returns: `Uint8Array`
107
+
108
+ #### `importBrain(data)`
109
+ Deserializes a binary brain from a `Uint8Array` or `ArrayBuffer` into the engine, restoring dictionary and tries.
110
+ - `data`: Binary data buffer.
111
+
112
+ #### `trainFromContent(content)`
113
+ Trains the model on multi-line text corpus. Lines starting with `#` are ignored as comments.
114
+ - `content`: Plain text string.
115
+
116
+ #### `saveBrain(path)` *(Node-only)*
117
+ Asynchronously saves the serialized binary brain to a file.
118
+ - `path`: Target file path.
119
+
120
+ #### `loadBrain(path)` *(Node-only)*
121
+ Asynchronously loads a serialized binary brain from a file.
122
+ - `path`: Source file path.
123
+
124
+ #### `trainFromFile(path)` *(Node-only)*
125
+ Asynchronously trains the model from a corpus file.
126
+ - `path`: Text file path.
127
+
128
+ ---
129
+
130
+ ## Quality & Verification Commands
131
+
132
+ ```bash
133
+ # Run unit tests
134
+ npm test
135
+
136
+ # Run test coverage
137
+ npm run test:coverage
138
+
139
+ # Perform TypeScript check
140
+ npm run typecheck
141
+
142
+ # Build declaration types
143
+ npm run build:types
144
+
145
+ # Run ESLint linter
146
+ npm run lint
147
+
148
+ # Run Stryker Mutation Testing
149
+ npm run test:mutation
150
+ ```
151
+
152
+ ---
153
+
154
+ ## License
155
+
156
+ MIT © [Tony Gies](mailto:tgies@tgies.net)
package/index.d.ts ADDED
@@ -0,0 +1,6 @@
1
+ export { tokenize } from "./src/tokenizer.js";
2
+ export { SymbolDict } from "./src/dict.js";
3
+ export { Trie } from "./src/trie.js";
4
+ export { MegaHal, parseWordList, parseSwapFile, loadWordList, loadSwapFile } from "./src/engine.js";
5
+ export { extractKeywords, KeywordConfig, SwapTable } from "./src/keywords.js";
6
+ //# sourceMappingURL=index.d.ts.map
package/index.d.ts.map ADDED
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.js"],"names":[],"mappings":""}
package/index.js ADDED
@@ -0,0 +1,29 @@
1
+ /**
2
+ * MegaHAL conversational engine entry point.
3
+ */
4
+
5
+ export {
6
+ MegaHal,
7
+ parseWordList,
8
+ parseSwapFile,
9
+ loadWordList,
10
+ loadSwapFile
11
+ } from './src/engine.js';
12
+
13
+ export {
14
+ tokenize
15
+ } from './src/tokenizer.js';
16
+
17
+ export {
18
+ extractKeywords,
19
+ KeywordConfig,
20
+ SwapTable
21
+ } from './src/keywords.js';
22
+
23
+ export {
24
+ SymbolDict
25
+ } from './src/dict.js';
26
+
27
+ export {
28
+ Trie
29
+ } from './src/trie.js';
package/package.json ADDED
@@ -0,0 +1,83 @@
1
+ {
2
+ "name": "@tgies/megahal-js",
3
+ "publishConfig": {
4
+ "access": "public"
5
+ },
6
+ "version": "1.0.0",
7
+ "description": "A JavaScript port of the MegaHAL conversational engine supporting both Node.js and Browser environments.",
8
+ "type": "module",
9
+ "main": "./index.js",
10
+ "exports": {
11
+ ".": {
12
+ "types": "./index.d.ts",
13
+ "import": "./index.js"
14
+ }
15
+ },
16
+ "types": "./index.d.ts",
17
+ "engines": {
18
+ "node": ">= 20"
19
+ },
20
+ "files": [
21
+ "index.js",
22
+ "index.d.ts",
23
+ "index.d.ts.map",
24
+ "src/**/*.js",
25
+ "src/**/*.d.ts",
26
+ "src/**/*.d.ts.map",
27
+ "README.md",
28
+ "LICENSE",
29
+ "CHANGELOG.md"
30
+ ],
31
+ "scripts": {
32
+ "test": "vitest run",
33
+ "test:coverage": "vitest run --coverage",
34
+ "test:mutation": "stryker run",
35
+ "typecheck": "tsc --noEmit",
36
+ "lint": "eslint .",
37
+ "lint:fix": "eslint . --fix",
38
+ "build:types": "node scripts/clean-types.js && tsc --declaration --emitDeclarationOnly --noEmit false",
39
+ "check": "npm run lint && npm run typecheck && npm run test",
40
+ "prepare": "husky",
41
+ "prepack": "npm run build:types"
42
+ },
43
+ "keywords": [
44
+ "megahal",
45
+ "chatbot",
46
+ "markov",
47
+ "n-gram",
48
+ "conversational-ai"
49
+ ],
50
+ "author": "Tony Gies <tgies@tgies.net>",
51
+ "license": "MIT",
52
+ "repository": {
53
+ "type": "git",
54
+ "url": "git+https://github.com/tgies/megahal-js.git"
55
+ },
56
+ "bugs": {
57
+ "url": "https://github.com/tgies/megahal-js/issues"
58
+ },
59
+ "homepage": "https://github.com/tgies/megahal-js#readme",
60
+ "devDependencies": {
61
+ "@arethetypeswrong/cli": "^0.18.2",
62
+ "@commitlint/cli": "^20.4.3",
63
+ "@commitlint/config-conventional": "^20.5.0",
64
+ "@eslint/js": "^10.0.1",
65
+ "@stryker-mutator/core": "^9.6.0",
66
+ "@stryker-mutator/vitest-runner": "^9.6.0",
67
+ "@types/node": "^25.6.0",
68
+ "@vitest/coverage-v8": "^3.0.0",
69
+ "eslint": "^10.0.3",
70
+ "globals": "^17.4.0",
71
+ "husky": "^9.1.7",
72
+ "lint-staged": "^16.3.2",
73
+ "publint": "^0.3.2",
74
+ "typescript": "^6.0.3",
75
+ "vitest": "^3.0.0"
76
+ },
77
+ "lint-staged": {
78
+ "*.js": "eslint --fix"
79
+ },
80
+ "overrides": {
81
+ "fflate": "0.8.2"
82
+ }
83
+ }
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Serialize a BidirectionalModel into a binary buffer.
3
+ *
4
+ * @param {import('./model.js').BidirectionalModel} model
5
+ * @param {{ use64Bit?: boolean }} [options] - Options for serialization
6
+ * @returns {Uint8Array}
7
+ */
8
+ export function serializeBrain(model: import("./model.js").BidirectionalModel, options?: {
9
+ use64Bit?: boolean;
10
+ }): Uint8Array;
11
+ /**
12
+ * Deserialize binary brain data into a BidirectionalModel.
13
+ *
14
+ * @param {Uint8Array|ArrayBuffer} data
15
+ * @param {import('./model.js').BidirectionalModel} model
16
+ */
17
+ export function deserializeBrain(data: Uint8Array | ArrayBuffer, model: import("./model.js").BidirectionalModel): void;
18
+ //# sourceMappingURL=binary.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"binary.d.ts","sourceRoot":"","sources":["binary.js"],"names":[],"mappings":"AA0NA;;;;;;GAMG;AACH,sCAJW,OAAO,YAAY,EAAE,kBAAkB,YACvC;IAAE,QAAQ,CAAC,EAAE,OAAO,CAAA;CAAE,GACpB,UAAU,CA2CtB;AAED;;;;;GAKG;AACH,uCAHW,UAAU,GAAC,WAAW,SACtB,OAAO,YAAY,EAAE,kBAAkB,QAuDjD"}
package/src/binary.js ADDED
@@ -0,0 +1,328 @@
1
+ import { Trie, TrieNode } from './trie.js';
2
+ import { SymbolDict } from './dict.js';
3
+
4
+ const COOKIE = 'MegaHALv8';
5
+
6
+ class BinaryWriter {
7
+ constructor() {
8
+ this.buffer = new Uint8Array(4096);
9
+ this.offset = 0;
10
+ this.view = new DataView(this.buffer.buffer);
11
+ }
12
+
13
+ /**
14
+ * Ensure the internal buffer is large enough.
15
+ * @private
16
+ * @param {number} size
17
+ */
18
+ _ensure(size) {
19
+ if (this.offset + size > this.buffer.byteLength) {
20
+ let newLength = this.buffer.byteLength * 2;
21
+ while (this.offset + size > newLength) {
22
+ newLength *= 2;
23
+ }
24
+ const newBuffer = new Uint8Array(newLength);
25
+ newBuffer.set(this.buffer);
26
+ this.buffer = newBuffer;
27
+ this.view = new DataView(this.buffer.buffer);
28
+ }
29
+ }
30
+
31
+ /**
32
+ * Write a uint8 byte.
33
+ * @param {number} val
34
+ */
35
+ writeUint8(val) {
36
+ this._ensure(1);
37
+ this.view.setUint8(this.offset, val);
38
+ this.offset += 1;
39
+ }
40
+
41
+ /**
42
+ * Write a uint16 word.
43
+ * @param {number} val
44
+ */
45
+ writeUint16(val) {
46
+ this._ensure(2);
47
+ this.view.setUint16(this.offset, val, true);
48
+ this.offset += 2;
49
+ }
50
+
51
+ /**
52
+ * Write a uint32 double word.
53
+ * @param {number} val
54
+ */
55
+ writeUint32(val) {
56
+ this._ensure(4);
57
+ this.view.setUint32(this.offset, val, true);
58
+ this.offset += 4;
59
+ }
60
+
61
+ /**
62
+ * Write a raw byte array.
63
+ * @param {Uint8Array} bytes
64
+ */
65
+ writeBytes(bytes) {
66
+ this._ensure(bytes.length);
67
+ this.buffer.set(bytes, this.offset);
68
+ this.offset += bytes.length;
69
+ }
70
+
71
+ /**
72
+ * Write a UTF-8 string.
73
+ * @param {string} str
74
+ */
75
+ writeString(str) {
76
+ const encoder = new TextEncoder();
77
+ const bytes = encoder.encode(str);
78
+ this.writeBytes(bytes);
79
+ }
80
+
81
+ /**
82
+ * Get the written contents as a Uint8Array.
83
+ * @returns {Uint8Array}
84
+ */
85
+ getUint8Array() {
86
+ return this.buffer.subarray(0, this.offset);
87
+ }
88
+ }
89
+
90
+ class BinaryReader {
91
+ /**
92
+ * @param {ArrayBuffer|Uint8Array} buffer
93
+ */
94
+ constructor(buffer) {
95
+ this.buffer = buffer instanceof Uint8Array ? buffer : new Uint8Array(buffer);
96
+ this.view = new DataView(this.buffer.buffer, this.buffer.byteOffset, this.buffer.byteLength);
97
+ this.offset = 0;
98
+ }
99
+
100
+ /**
101
+ * Read a uint8 byte.
102
+ * @returns {number}
103
+ */
104
+ readUint8() {
105
+ const val = this.view.getUint8(this.offset);
106
+ this.offset += 1;
107
+ return val;
108
+ }
109
+
110
+ /**
111
+ * Read a uint16 word.
112
+ * @returns {number}
113
+ */
114
+ readUint16() {
115
+ const val = this.view.getUint16(this.offset, true);
116
+ this.offset += 2;
117
+ return val;
118
+ }
119
+
120
+ /**
121
+ * Read a uint32 double word.
122
+ * @returns {number}
123
+ */
124
+ readUint32() {
125
+ const val = this.view.getUint32(this.offset, true);
126
+ this.offset += 4;
127
+ return val;
128
+ }
129
+
130
+ /**
131
+ * Read raw bytes.
132
+ * @param {number} length
133
+ * @returns {Uint8Array}
134
+ */
135
+ readBytes(length) {
136
+ if (this.offset + length > this.buffer.byteLength) {
137
+ throw new Error('Unexpected end of file while reading bytes');
138
+ }
139
+ const bytes = this.buffer.subarray(this.offset, this.offset + length);
140
+ this.offset += length;
141
+ return bytes;
142
+ }
143
+
144
+ /**
145
+ * Read a UTF-8 string.
146
+ * @param {number} length
147
+ * @returns {string}
148
+ */
149
+ readString(length) {
150
+ const bytes = this.readBytes(length);
151
+ const decoder = new TextDecoder();
152
+ return decoder.decode(bytes);
153
+ }
154
+
155
+ /**
156
+ * Check if there are more bytes to read.
157
+ * @returns {boolean}
158
+ */
159
+ hasMore() {
160
+ return this.offset < this.buffer.byteLength;
161
+ }
162
+ }
163
+
164
+ /**
165
+ * Serialize a Trie node recursively.
166
+ * @param {Trie} trie
167
+ * @param {number} ref
168
+ * @param {BinaryWriter} writer
169
+ * @param {boolean} use64Bit
170
+ */
171
+ function serializeNode(trie, ref, writer, use64Bit) {
172
+ const node = trie.node(ref);
173
+ writer.writeUint16(node.symbol);
174
+ if (use64Bit) {
175
+ writer.writeUint32(node.usage);
176
+ writer.writeUint32(0); // High 4 bytes
177
+ } else {
178
+ writer.writeUint32(node.usage);
179
+ }
180
+ writer.writeUint16(node.count);
181
+ writer.writeUint16(node.children.length);
182
+
183
+ for (const childRef of node.children) {
184
+ serializeNode(trie, childRef, writer, use64Bit);
185
+ }
186
+ }
187
+
188
+ /**
189
+ * Deserialize a Trie node recursively.
190
+ * @param {Trie} trie
191
+ * @param {BinaryReader} reader
192
+ * @param {number} byte4Size
193
+ * @returns {number} NodeRef
194
+ */
195
+ function deserializeNode(trie, reader, byte4Size) {
196
+ const symbol = reader.readUint16();
197
+ const usage = reader.readUint32();
198
+ if (byte4Size === 8) {
199
+ reader.readUint32(); // Skip high 4 bytes
200
+ }
201
+ const count = reader.readUint16();
202
+ const branch = reader.readUint16();
203
+
204
+ const node = new TrieNode(symbol);
205
+ node.usage = usage;
206
+ node.count = count;
207
+
208
+ const ref = trie.nodes.length;
209
+ trie.nodes.push(node);
210
+
211
+ for (let i = 0; i < branch; i++) {
212
+ const childRef = deserializeNode(trie, reader, byte4Size);
213
+ node.children.push(childRef);
214
+ }
215
+
216
+ return ref;
217
+ }
218
+
219
+ /**
220
+ * Serialize a BidirectionalModel into a binary buffer.
221
+ *
222
+ * @param {import('./model.js').BidirectionalModel} model
223
+ * @param {{ use64Bit?: boolean }} [options] - Options for serialization
224
+ * @returns {Uint8Array}
225
+ */
226
+ export function serializeBrain(model, options = {}) {
227
+ const use64Bit = !!options.use64Bit;
228
+ const writer = new BinaryWriter();
229
+
230
+ const cookieBytes = new TextEncoder().encode(COOKIE);
231
+ if (cookieBytes.length !== 9) {
232
+ throw new Error('Cookie size must be exactly 9 bytes');
233
+ }
234
+ writer.writeBytes(cookieBytes);
235
+
236
+ writer.writeUint8(model.order);
237
+
238
+ serializeNode(model.forward, model.forward.root(), writer, use64Bit);
239
+
240
+ serializeNode(model.backward, model.backward.root(), writer, use64Bit);
241
+
242
+ const dict = model.dictionary;
243
+ if (dict.entries.length > 65536) {
244
+ throw new RangeError(
245
+ `Dictionary size (${dict.entries.length}) exceeds maximum of 65536 symbols supported by the binary format`
246
+ );
247
+ }
248
+
249
+ if (use64Bit) {
250
+ writer.writeUint32(dict.entries.length);
251
+ writer.writeUint32(0); // High 4 bytes
252
+ } else {
253
+ writer.writeUint32(dict.entries.length);
254
+ }
255
+
256
+ for (let i = 0; i < dict.entries.length; i++) {
257
+ const word = dict.entries[i];
258
+ const wordBytes = new TextEncoder().encode(word);
259
+ if (wordBytes.length > 255) {
260
+ throw new Error(`Symbol '${word}' exceeds maximum byte size of 255`);
261
+ }
262
+ writer.writeUint8(wordBytes.length);
263
+ writer.writeBytes(wordBytes);
264
+ }
265
+
266
+ return writer.getUint8Array();
267
+ }
268
+
269
+ /**
270
+ * Deserialize binary brain data into a BidirectionalModel.
271
+ *
272
+ * @param {Uint8Array|ArrayBuffer} data
273
+ * @param {import('./model.js').BidirectionalModel} model
274
+ */
275
+ export function deserializeBrain(data, model) {
276
+ const reader = new BinaryReader(data);
277
+
278
+ const cookie = reader.readString(9);
279
+ if (cookie !== COOKIE) {
280
+ throw new Error('Invalid brain file: Magic cookie mismatch');
281
+ }
282
+
283
+ const order = reader.readUint8();
284
+ model.order = order;
285
+
286
+ // Auto-detect byte4Size (4 or 8 bytes) by inspecting the root node of the forward tree.
287
+ let byte4Size = 4;
288
+ if (data.byteLength >= 24) {
289
+ const buffer = data instanceof Uint8Array ? data.buffer : data;
290
+ const byteOffset = data instanceof Uint8Array ? data.byteOffset : 0;
291
+ const byteLength = data.byteLength;
292
+ const view = new DataView(buffer, byteOffset, byteLength);
293
+ const branch4 = view.getUint16(18, true);
294
+ const branch8 = view.getUint16(22, true);
295
+ if (branch4 === 0 && branch8 > 0) {
296
+ byte4Size = 8;
297
+ }
298
+ }
299
+
300
+ model.forward = new Trie();
301
+ model.forward.nodes = []; // Clear default root
302
+ deserializeNode(model.forward, reader, byte4Size);
303
+
304
+ model.backward = new Trie();
305
+ model.backward.nodes = []; // Clear default root
306
+ deserializeNode(model.backward, reader, byte4Size);
307
+
308
+ const dictSize = reader.readUint32();
309
+ if (byte4Size === 8) {
310
+ reader.readUint32(); // Skip high 4 bytes of dictionary size
311
+ }
312
+
313
+ const dict = new SymbolDict();
314
+ dict.entries = [];
315
+ dict.sortedIndex = [];
316
+
317
+ for (let i = 0; i < dictSize; i++) {
318
+ const len = reader.readUint8();
319
+ const word = reader.readString(len);
320
+ dict.entries.push(word);
321
+
322
+ // Reconstruct sorted index using binary search insert position.
323
+ const { index } = dict._binarySearch(word);
324
+ dict.sortedIndex.splice(index, 0, i);
325
+ }
326
+
327
+ model.dictionary = dict;
328
+ }