@tgies/megahal-js 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/LICENSE +21 -0
- package/README.md +156 -0
- package/index.d.ts +6 -0
- package/index.d.ts.map +1 -0
- package/index.js +29 -0
- package/package.json +83 -0
- package/src/binary.d.ts +18 -0
- package/src/binary.d.ts.map +1 -0
- package/src/binary.js +328 -0
- package/src/dict.d.ts +54 -0
- package/src/dict.d.ts.map +1 -0
- package/src/dict.js +115 -0
- package/src/engine.d.ts +140 -0
- package/src/engine.d.ts.map +1 -0
- package/src/engine.js +317 -0
- package/src/evaluator.d.ts +10 -0
- package/src/evaluator.d.ts.map +1 -0
- package/src/evaluator.js +101 -0
- package/src/generator.d.ts +36 -0
- package/src/generator.d.ts.map +1 -0
- package/src/generator.js +296 -0
- package/src/keywords.d.ts +34 -0
- package/src/keywords.d.ts.map +1 -0
- package/src/keywords.js +122 -0
- package/src/model.d.ts +73 -0
- package/src/model.d.ts.map +1 -0
- package/src/model.js +154 -0
- package/src/tokenizer.d.ts +8 -0
- package/src/tokenizer.d.ts.map +1 -0
- package/src/tokenizer.js +125 -0
- package/src/trie.d.ts +81 -0
- package/src/trie.d.ts.map +1 -0
- package/src/trie.js +164 -0
package/src/model.js
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import { Trie } from './trie.js';
|
|
2
|
+
import { SymbolDict, FIN_ID } from './dict.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* A sliding context window tracking position in an n-gram trie.
|
|
6
|
+
*/
|
|
7
|
+
export class ContextWindow {
|
|
8
|
+
/**
|
|
9
|
+
* @param {number} order - Markov model order
|
|
10
|
+
*/
|
|
11
|
+
constructor(order) {
|
|
12
|
+
this.order = order;
|
|
13
|
+
/** @type {(number|null)[]} Context slots matching the model order. */
|
|
14
|
+
this.slots = new Array(order + 2).fill(null);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Reset the context window using the specified root reference.
|
|
19
|
+
* @param {number} rootRef
|
|
20
|
+
*/
|
|
21
|
+
initialize(rootRef) {
|
|
22
|
+
this.slots.fill(null);
|
|
23
|
+
this.slots[0] = rootRef;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Update the context window without creating new trie nodes.
|
|
28
|
+
* @param {Trie} trie
|
|
29
|
+
* @param {number} symbolId
|
|
30
|
+
*/
|
|
31
|
+
advance(trie, symbolId) {
|
|
32
|
+
for (let d = this.order + 1; d >= 1; d--) {
|
|
33
|
+
const parent = this.slots[d - 1];
|
|
34
|
+
if (parent !== null && parent !== undefined) {
|
|
35
|
+
const child = trie.findChild(parent, symbolId);
|
|
36
|
+
this.slots[d] = child !== undefined ? child : null;
|
|
37
|
+
} else {
|
|
38
|
+
this.slots[d] = null;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Update the context window, creating new trie nodes if necessary.
|
|
45
|
+
* @param {Trie} trie
|
|
46
|
+
* @param {number} symbolId
|
|
47
|
+
*/
|
|
48
|
+
advanceAndLearn(trie, symbolId) {
|
|
49
|
+
for (let d = this.order + 1; d >= 1; d--) {
|
|
50
|
+
const parent = this.slots[d - 1];
|
|
51
|
+
if (parent !== null && parent !== undefined) {
|
|
52
|
+
this.slots[d] = trie.addChild(parent, symbolId);
|
|
53
|
+
} else {
|
|
54
|
+
this.slots[d] = null;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Get the context node at depth j.
|
|
61
|
+
* @param {number} j
|
|
62
|
+
* @returns {number|null}
|
|
63
|
+
*/
|
|
64
|
+
atDepth(j) {
|
|
65
|
+
if (j < 0 || j >= this.slots.length) {
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
return this.slots[j];
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Get the deepest non-null context node.
|
|
73
|
+
* Scans from slot 0 up to slot `order` (inclusive), returning the last non-null.
|
|
74
|
+
* @returns {number|null}
|
|
75
|
+
*/
|
|
76
|
+
deepest() {
|
|
77
|
+
let best = null;
|
|
78
|
+
for (let d = 0; d <= this.order; d++) {
|
|
79
|
+
if (this.slots[d] !== null && this.slots[d] !== undefined) {
|
|
80
|
+
best = this.slots[d];
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return best;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Bidirectional Markov model: forward trie + backward trie + shared dictionary.
|
|
89
|
+
*/
|
|
90
|
+
export class BidirectionalModel {
|
|
91
|
+
/**
|
|
92
|
+
* @param {number} order - Markov model order (default: 5)
|
|
93
|
+
*/
|
|
94
|
+
constructor(order = 5) {
|
|
95
|
+
this.order = order;
|
|
96
|
+
this.forward = new Trie();
|
|
97
|
+
this.backward = new Trie();
|
|
98
|
+
this.dictionary = new SymbolDict();
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Learn from a sequence of token strings.
|
|
103
|
+
* Skips learning if tokens.length <= order.
|
|
104
|
+
* @param {string[]} tokens
|
|
105
|
+
*/
|
|
106
|
+
learn(tokens) {
|
|
107
|
+
if (tokens.length <= this.order) {
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Forward pass: learn the sequence in the forward trie.
|
|
112
|
+
const fwdCtx = new ContextWindow(this.order);
|
|
113
|
+
fwdCtx.initialize(this.forward.root());
|
|
114
|
+
|
|
115
|
+
/** @type {number[]} */
|
|
116
|
+
const symbolIds = [];
|
|
117
|
+
for (const tok of tokens) {
|
|
118
|
+
const id = this.dictionary.intern(tok);
|
|
119
|
+
symbolIds.push(id);
|
|
120
|
+
fwdCtx.advanceAndLearn(this.forward, id);
|
|
121
|
+
}
|
|
122
|
+
fwdCtx.advanceAndLearn(this.forward, FIN_ID);
|
|
123
|
+
|
|
124
|
+
// Backward pass: learn the reverse sequence in the backward trie.
|
|
125
|
+
const bwdCtx = new ContextWindow(this.order);
|
|
126
|
+
bwdCtx.initialize(this.backward.root());
|
|
127
|
+
|
|
128
|
+
for (let i = symbolIds.length - 1; i >= 0; i--) {
|
|
129
|
+
const id = symbolIds[i];
|
|
130
|
+
bwdCtx.advanceAndLearn(this.backward, id);
|
|
131
|
+
}
|
|
132
|
+
bwdCtx.advanceAndLearn(this.backward, FIN_ID);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Create a context window initialized to the forward root.
|
|
137
|
+
* @returns {ContextWindow}
|
|
138
|
+
*/
|
|
139
|
+
forwardContext() {
|
|
140
|
+
const ctx = new ContextWindow(this.order);
|
|
141
|
+
ctx.initialize(this.forward.root());
|
|
142
|
+
return ctx;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Create a context window initialized to the backward root.
|
|
147
|
+
* @returns {ContextWindow}
|
|
148
|
+
*/
|
|
149
|
+
backwardContext() {
|
|
150
|
+
const ctx = new ContextWindow(this.order);
|
|
151
|
+
ctx.initialize(this.backward.root());
|
|
152
|
+
return ctx;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["tokenizer.js"],"names":[],"mappings":"AAmFA;;;;;GAKG;AACH,gCAHW,MAAM,GACJ,MAAM,EAAE,CAqCpB"}
|
package/src/tokenizer.js
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MegaHAL Tokenizer.
|
|
3
|
+
* Splits input text into an alternating sequence of word tokens and separator tokens.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Checks if a character is ASCII alphabetic.
|
|
8
|
+
* @param {string} char
|
|
9
|
+
* @returns {boolean}
|
|
10
|
+
*/
|
|
11
|
+
function isAlpha(char) {
|
|
12
|
+
return typeof char === 'string' && char.length === 1 && /^[A-Z]$/.test(char);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Checks if a character is ASCII digit.
|
|
17
|
+
* @param {string} char
|
|
18
|
+
* @returns {boolean}
|
|
19
|
+
*/
|
|
20
|
+
function isDigit(char) {
|
|
21
|
+
return typeof char === 'string' && char.length === 1 && /^[0-9]$/.test(char);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Checks if a character is ASCII alphanumeric.
|
|
26
|
+
* @param {string} char
|
|
27
|
+
* @returns {boolean}
|
|
28
|
+
*/
|
|
29
|
+
function isAlphanumeric(char) {
|
|
30
|
+
return typeof char === 'string' && char.length === 1 && /^[A-Z0-9]$/.test(char);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Determine if position `pos` in the uppercase `input` string is a word boundary.
|
|
35
|
+
*
|
|
36
|
+
* Rules (from MEGAHAL_SPEC.md Section 4.1):
|
|
37
|
+
* 1. pos == 0: never a boundary
|
|
38
|
+
* 2. pos == len: always a boundary
|
|
39
|
+
* 3. Apostrophe rule: if char at pos is `'` and both neighbors are alpha, no boundary.
|
|
40
|
+
* If char at pos-1 is `'` and both pos-2 and pos are alpha, no boundary.
|
|
41
|
+
* 4. Alpha transition: exactly one of pos and pos-1 is alphabetic -> boundary
|
|
42
|
+
* 5. Digit transition: digit status differs between pos and pos-1 -> boundary
|
|
43
|
+
*
|
|
44
|
+
* @param {string} input - Uppercase string
|
|
45
|
+
* @param {number} pos - 0-indexed position to test
|
|
46
|
+
* @returns {boolean}
|
|
47
|
+
*/
|
|
48
|
+
function isBoundary(input, pos) {
|
|
49
|
+
if (pos === 0) {
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
if (pos === input.length) {
|
|
53
|
+
return true;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const curr = input[pos];
|
|
57
|
+
const prev = input[pos - 1];
|
|
58
|
+
|
|
59
|
+
// Apostrophe rule.
|
|
60
|
+
if (curr === '\'' && pos + 1 < input.length && isAlpha(prev) && isAlpha(input[pos + 1])) {
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
if (prev === '\'' && pos >= 2 && isAlpha(input[pos - 2]) && isAlpha(curr)) {
|
|
64
|
+
return false;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Alpha transition.
|
|
68
|
+
const currAlpha = isAlpha(curr);
|
|
69
|
+
const prevAlpha = isAlpha(prev);
|
|
70
|
+
if (currAlpha !== prevAlpha) {
|
|
71
|
+
return true;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Digit transition.
|
|
75
|
+
const currDigit = isDigit(curr);
|
|
76
|
+
const prevDigit = isDigit(prev);
|
|
77
|
+
if (currDigit !== prevDigit) {
|
|
78
|
+
return true;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Tokenize input text per MegaHAL rules.
|
|
86
|
+
*
|
|
87
|
+
* @param {string} input
|
|
88
|
+
* @returns {string[]}
|
|
89
|
+
*/
|
|
90
|
+
export function tokenize(input) {
|
|
91
|
+
if (!input || input.trim() === '') {
|
|
92
|
+
return ['.'];
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const upper = input.toUpperCase();
|
|
96
|
+
/** @type {string[]} */
|
|
97
|
+
const tokens = [];
|
|
98
|
+
let start = 0;
|
|
99
|
+
|
|
100
|
+
for (let pos = 1; pos <= upper.length; pos++) {
|
|
101
|
+
if (isBoundary(upper, pos)) {
|
|
102
|
+
if (pos > start) {
|
|
103
|
+
tokens.push(upper.substring(start, pos));
|
|
104
|
+
}
|
|
105
|
+
start = pos;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (tokens.length === 0) {
|
|
110
|
+
return ['.'];
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Sentence-terminal normalization.
|
|
114
|
+
const last = tokens[tokens.length - 1];
|
|
115
|
+
const firstChar = last[0];
|
|
116
|
+
const lastChar = last[last.length - 1];
|
|
117
|
+
|
|
118
|
+
if (isAlphanumeric(firstChar)) {
|
|
119
|
+
tokens.push('.');
|
|
120
|
+
} else if (lastChar !== '!' && lastChar !== '.' && lastChar !== '?') {
|
|
121
|
+
tokens[tokens.length - 1] = '.';
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return tokens;
|
|
125
|
+
}
|
package/src/trie.d.ts
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Node in the frequency trie.
|
|
3
|
+
*/
|
|
4
|
+
export class TrieNode {
|
|
5
|
+
/**
|
|
6
|
+
* @param {number} symbolId
|
|
7
|
+
*/
|
|
8
|
+
constructor(symbolId: number);
|
|
9
|
+
/** @type {number} Symbol ID. */
|
|
10
|
+
symbol: number;
|
|
11
|
+
/** @type {number} Total count of all child observations. */
|
|
12
|
+
usage: number;
|
|
13
|
+
/** @type {number} Observation count of this symbol in its parent's context. */
|
|
14
|
+
count: number;
|
|
15
|
+
/** @type {number[]} References to child nodes in the arena. */
|
|
16
|
+
children: number[];
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Arena-based frequency trie.
|
|
20
|
+
*/
|
|
21
|
+
export class Trie {
|
|
22
|
+
/** @type {TrieNode[]} Arena storing all trie nodes (root at index 0). */
|
|
23
|
+
nodes: TrieNode[];
|
|
24
|
+
/**
|
|
25
|
+
* Get the root node reference (always index 0).
|
|
26
|
+
* @returns {number}
|
|
27
|
+
*/
|
|
28
|
+
root(): number;
|
|
29
|
+
/**
|
|
30
|
+
* Helper to perform binary search on a parent node's children list.
|
|
31
|
+
* @private
|
|
32
|
+
* @param {number} parentRef - The index of the parent node in nodes.
|
|
33
|
+
* @param {number} symbolId - The symbol ID to search for.
|
|
34
|
+
* @returns {{ found: boolean, index: number }}
|
|
35
|
+
*/
|
|
36
|
+
private _findChildIndex;
|
|
37
|
+
/**
|
|
38
|
+
* Find an existing child node of parent matching symbolId.
|
|
39
|
+
* Returns undefined if no such child exists.
|
|
40
|
+
* @param {number} parentRef
|
|
41
|
+
* @param {number} symbolId
|
|
42
|
+
* @returns {number|undefined}
|
|
43
|
+
*/
|
|
44
|
+
findChild(parentRef: number, symbolId: number): number | undefined;
|
|
45
|
+
/**
|
|
46
|
+
* Find or create a child node of parent matching symbolId, incrementing counts.
|
|
47
|
+
* @param {number} parentRef
|
|
48
|
+
* @param {number} symbolId
|
|
49
|
+
* @returns {number} NodeRef (index in nodes arena)
|
|
50
|
+
*/
|
|
51
|
+
addChild(parentRef: number, symbolId: number): number;
|
|
52
|
+
/**
|
|
53
|
+
* Get the children node references for a node.
|
|
54
|
+
* @param {number} parentRef
|
|
55
|
+
* @returns {number[]}
|
|
56
|
+
*/
|
|
57
|
+
children(parentRef: number): number[];
|
|
58
|
+
/**
|
|
59
|
+
* Get the number of children of a node.
|
|
60
|
+
* @param {number} parentRef
|
|
61
|
+
* @returns {number}
|
|
62
|
+
*/
|
|
63
|
+
branchCount(parentRef: number): number;
|
|
64
|
+
/**
|
|
65
|
+
* Access a node by its reference index.
|
|
66
|
+
* @param {number} ref
|
|
67
|
+
* @returns {TrieNode}
|
|
68
|
+
*/
|
|
69
|
+
node(ref: number): TrieNode;
|
|
70
|
+
/**
|
|
71
|
+
* Total number of nodes in the trie (including root).
|
|
72
|
+
* @returns {number}
|
|
73
|
+
*/
|
|
74
|
+
get size(): number;
|
|
75
|
+
/**
|
|
76
|
+
* Whether the trie contains only the root node.
|
|
77
|
+
* @returns {boolean}
|
|
78
|
+
*/
|
|
79
|
+
isEmpty(): boolean;
|
|
80
|
+
}
|
|
81
|
+
//# sourceMappingURL=trie.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"trie.d.ts","sourceRoot":"","sources":["trie.js"],"names":[],"mappings":"AAIA;;GAEG;AACH;IACE;;OAEG;IACH,sBAFW,MAAM,EAchB;IAXC,gCAAgC;IAChC,QADW,MAAM,CACK;IAEtB,4DAA4D;IAC5D,OADW,MAAM,CACH;IAEd,+EAA+E;IAC/E,OADW,MAAM,CACH;IAEd,+DAA+D;IAC/D,UADW,MAAM,EAAE,CACD;CAErB;AAED;;GAEG;AACH;IAEI,yEAAyE;IACzE,OADW,QAAQ,EAAE,CACgB;IAGvC;;;OAGG;IACH,QAFa,MAAM,CAIlB;IAED;;;;;;OAMG;IACH,wBAsBC;IAED;;;;;;OAMG;IACH,qBAJW,MAAM,YACN,MAAM,GACJ,MAAM,GAAC,SAAS,CAQ5B;IAED;;;;;OAKG;IACH,oBAJW,MAAM,YACN,MAAM,GACJ,MAAM,CAuBlB;IAED;;;;OAIG;IACH,oBAHW,MAAM,GACJ,MAAM,EAAE,CAIpB;IAED;;;;OAIG;IACH,uBAHW,MAAM,GACJ,MAAM,CAIlB;IAED;;;;OAIG;IACH,UAHW,MAAM,GACJ,QAAQ,CAOpB;IAED;;;OAGG;IACH,YAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,WAFa,OAAO,CAInB;CACF"}
|
package/src/trie.js
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import { ERROR_ID } from './dict.js';
|
|
2
|
+
|
|
3
|
+
const U16_MAX = 65535;
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Node in the frequency trie.
|
|
7
|
+
*/
|
|
8
|
+
export class TrieNode {
|
|
9
|
+
/**
|
|
10
|
+
* @param {number} symbolId
|
|
11
|
+
*/
|
|
12
|
+
constructor(symbolId) {
|
|
13
|
+
/** @type {number} Symbol ID. */
|
|
14
|
+
this.symbol = symbolId;
|
|
15
|
+
|
|
16
|
+
/** @type {number} Total count of all child observations. */
|
|
17
|
+
this.usage = 0;
|
|
18
|
+
|
|
19
|
+
/** @type {number} Observation count of this symbol in its parent's context. */
|
|
20
|
+
this.count = 0;
|
|
21
|
+
|
|
22
|
+
/** @type {number[]} References to child nodes in the arena. */
|
|
23
|
+
this.children = [];
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Arena-based frequency trie.
|
|
29
|
+
*/
|
|
30
|
+
export class Trie {
|
|
31
|
+
constructor() {
|
|
32
|
+
/** @type {TrieNode[]} Arena storing all trie nodes (root at index 0). */
|
|
33
|
+
this.nodes = [new TrieNode(ERROR_ID)];
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Get the root node reference (always index 0).
|
|
38
|
+
* @returns {number}
|
|
39
|
+
*/
|
|
40
|
+
root() {
|
|
41
|
+
return 0;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Helper to perform binary search on a parent node's children list.
|
|
46
|
+
* @private
|
|
47
|
+
* @param {number} parentRef - The index of the parent node in nodes.
|
|
48
|
+
* @param {number} symbolId - The symbol ID to search for.
|
|
49
|
+
* @returns {{ found: boolean, index: number }}
|
|
50
|
+
*/
|
|
51
|
+
_findChildIndex(parentRef, symbolId) {
|
|
52
|
+
const parentNode = this.nodes[parentRef];
|
|
53
|
+
const childrenRefs = parentNode.children;
|
|
54
|
+
|
|
55
|
+
let low = 0;
|
|
56
|
+
let high = childrenRefs.length - 1;
|
|
57
|
+
|
|
58
|
+
while (low <= high) {
|
|
59
|
+
const mid = (low + high) >> 1;
|
|
60
|
+
const childRef = childrenRefs[mid];
|
|
61
|
+
const childSym = this.nodes[childRef].symbol;
|
|
62
|
+
|
|
63
|
+
if (childSym < symbolId) {
|
|
64
|
+
low = mid + 1;
|
|
65
|
+
} else if (childSym > symbolId) {
|
|
66
|
+
high = mid - 1;
|
|
67
|
+
} else {
|
|
68
|
+
return { found: true, index: mid };
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return { found: false, index: low };
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Find an existing child node of parent matching symbolId.
|
|
77
|
+
* Returns undefined if no such child exists.
|
|
78
|
+
* @param {number} parentRef
|
|
79
|
+
* @param {number} symbolId
|
|
80
|
+
* @returns {number|undefined}
|
|
81
|
+
*/
|
|
82
|
+
findChild(parentRef, symbolId) {
|
|
83
|
+
const { found, index } = this._findChildIndex(parentRef, symbolId);
|
|
84
|
+
if (found) {
|
|
85
|
+
return this.nodes[parentRef].children[index];
|
|
86
|
+
}
|
|
87
|
+
return undefined;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Find or create a child node of parent matching symbolId, incrementing counts.
|
|
92
|
+
* @param {number} parentRef
|
|
93
|
+
* @param {number} symbolId
|
|
94
|
+
* @returns {number} NodeRef (index in nodes arena)
|
|
95
|
+
*/
|
|
96
|
+
addChild(parentRef, symbolId) {
|
|
97
|
+
const { found, index } = this._findChildIndex(parentRef, symbolId);
|
|
98
|
+
|
|
99
|
+
if (found) {
|
|
100
|
+
const childRef = this.nodes[parentRef].children[index];
|
|
101
|
+
const child = this.nodes[childRef];
|
|
102
|
+
if (child.count < U16_MAX) {
|
|
103
|
+
child.count++;
|
|
104
|
+
this.nodes[parentRef].usage++;
|
|
105
|
+
}
|
|
106
|
+
return childRef;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const childRef = this.nodes.length;
|
|
110
|
+
const newChild = new TrieNode(symbolId);
|
|
111
|
+
newChild.count = 1;
|
|
112
|
+
this.nodes.push(newChild);
|
|
113
|
+
|
|
114
|
+
this.nodes[parentRef].usage++;
|
|
115
|
+
this.nodes[parentRef].children.splice(index, 0, childRef);
|
|
116
|
+
return childRef;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Get the children node references for a node.
|
|
121
|
+
* @param {number} parentRef
|
|
122
|
+
* @returns {number[]}
|
|
123
|
+
*/
|
|
124
|
+
children(parentRef) {
|
|
125
|
+
return this.nodes[parentRef].children;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Get the number of children of a node.
|
|
130
|
+
* @param {number} parentRef
|
|
131
|
+
* @returns {number}
|
|
132
|
+
*/
|
|
133
|
+
branchCount(parentRef) {
|
|
134
|
+
return this.nodes[parentRef].children.length;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Access a node by its reference index.
|
|
139
|
+
* @param {number} ref
|
|
140
|
+
* @returns {TrieNode}
|
|
141
|
+
*/
|
|
142
|
+
node(ref) {
|
|
143
|
+
if (ref < 0 || ref >= this.nodes.length) {
|
|
144
|
+
throw new RangeError(`Node reference ${ref} is out of bounds`);
|
|
145
|
+
}
|
|
146
|
+
return this.nodes[ref];
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Total number of nodes in the trie (including root).
|
|
151
|
+
* @returns {number}
|
|
152
|
+
*/
|
|
153
|
+
get size() {
|
|
154
|
+
return this.nodes.length;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Whether the trie contains only the root node.
|
|
159
|
+
* @returns {boolean}
|
|
160
|
+
*/
|
|
161
|
+
isEmpty() {
|
|
162
|
+
return this.nodes.length <= 1;
|
|
163
|
+
}
|
|
164
|
+
}
|