@tgies/megahal-js 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/LICENSE +21 -0
- package/README.md +156 -0
- package/index.d.ts +6 -0
- package/index.d.ts.map +1 -0
- package/index.js +29 -0
- package/package.json +83 -0
- package/src/binary.d.ts +18 -0
- package/src/binary.d.ts.map +1 -0
- package/src/binary.js +328 -0
- package/src/dict.d.ts +54 -0
- package/src/dict.d.ts.map +1 -0
- package/src/dict.js +115 -0
- package/src/engine.d.ts +140 -0
- package/src/engine.d.ts.map +1 -0
- package/src/engine.js +317 -0
- package/src/evaluator.d.ts +10 -0
- package/src/evaluator.d.ts.map +1 -0
- package/src/evaluator.js +101 -0
- package/src/generator.d.ts +36 -0
- package/src/generator.d.ts.map +1 -0
- package/src/generator.js +296 -0
- package/src/keywords.d.ts +34 -0
- package/src/keywords.d.ts.map +1 -0
- package/src/keywords.js +122 -0
- package/src/model.d.ts +73 -0
- package/src/model.d.ts.map +1 -0
- package/src/model.js +154 -0
- package/src/tokenizer.d.ts +8 -0
- package/src/tokenizer.d.ts.map +1 -0
- package/src/tokenizer.js +125 -0
- package/src/trie.d.ts +81 -0
- package/src/trie.d.ts.map +1 -0
- package/src/trie.js +164 -0
package/src/dict.d.ts
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Symbol ID constants.
|
|
3
|
+
*/
|
|
4
|
+
export const ERROR_ID: 0;
|
|
5
|
+
export const FIN_ID: 1;
|
|
6
|
+
/**
|
|
7
|
+
* Interning Dictionary mapping symbols (strings) to compact integer IDs.
|
|
8
|
+
*/
|
|
9
|
+
export class SymbolDict {
|
|
10
|
+
/** @type {string[]} Symbols in insertion order (index is the Symbol ID). */
|
|
11
|
+
entries: string[];
|
|
12
|
+
/** @type {number[]} Sorted indices into entries, ordered alphabetically. */
|
|
13
|
+
sortedIndex: number[];
|
|
14
|
+
/**
|
|
15
|
+
* Helper to perform binary search on the sorted index.
|
|
16
|
+
* @param {string} symbol - The symbol to search for (assumed to be uppercase).
|
|
17
|
+
* @returns {{ found: boolean, index: number }}
|
|
18
|
+
*/
|
|
19
|
+
_binarySearch(symbol: string): {
|
|
20
|
+
found: boolean;
|
|
21
|
+
index: number;
|
|
22
|
+
};
|
|
23
|
+
/**
|
|
24
|
+
* Intern a symbol (uppercase string), returning its unique ID.
|
|
25
|
+
* If it already exists, returns the existing ID.
|
|
26
|
+
* @param {string} symbol
|
|
27
|
+
* @returns {number}
|
|
28
|
+
*/
|
|
29
|
+
intern(symbol: string): number;
|
|
30
|
+
/**
|
|
31
|
+
* Find the ID of an existing symbol without inserting it.
|
|
32
|
+
* Returns undefined if the symbol does not exist.
|
|
33
|
+
* @param {string} symbol
|
|
34
|
+
* @returns {number|undefined}
|
|
35
|
+
*/
|
|
36
|
+
find(symbol: string): number | undefined;
|
|
37
|
+
/**
|
|
38
|
+
* Resolve a symbol ID back to its string value.
|
|
39
|
+
* @param {number} id
|
|
40
|
+
* @returns {string}
|
|
41
|
+
*/
|
|
42
|
+
resolve(id: number): string;
|
|
43
|
+
/**
|
|
44
|
+
* Number of symbols in the dictionary (including sentinels).
|
|
45
|
+
* @returns {number}
|
|
46
|
+
*/
|
|
47
|
+
get size(): number;
|
|
48
|
+
/**
|
|
49
|
+
* Whether the dictionary contains only the default sentinels.
|
|
50
|
+
* @returns {boolean}
|
|
51
|
+
*/
|
|
52
|
+
isEmpty(): boolean;
|
|
53
|
+
}
|
|
54
|
+
//# sourceMappingURL=dict.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dict.d.ts","sourceRoot":"","sources":["dict.js"],"names":[],"mappings":"AAAA;;GAEG;AACH,uBAAwB,CAAC,CAAC;AAC1B,qBAAsB,CAAC,CAAC;AAExB;;GAEG;AACH;IAEI,4EAA4E;IAC5E,SADW,MAAM,EAAE,CACgB;IAEnC,4EAA4E;IAC5E,aADW,MAAM,EAAE,CACE;IAWvB;;;;OAIG;IACH,sBAHW,MAAM,GACJ;QAAE,KAAK,EAAE,OAAO,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAqB7C;IAED;;;;;OAKG;IACH,eAHW,MAAM,GACJ,MAAM,CAclB;IAED;;;;;OAKG;IACH,aAHW,MAAM,GACJ,MAAM,GAAC,SAAS,CAS5B;IAED;;;;OAIG;IACH,YAHW,MAAM,GACJ,MAAM,CAOlB;IAED;;;OAGG;IACH,YAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,WAFa,OAAO,CAInB;CACF"}
|
package/src/dict.js
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Symbol ID constants.
|
|
3
|
+
*/
|
|
4
|
+
export const ERROR_ID = 0;
|
|
5
|
+
export const FIN_ID = 1;
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Interning Dictionary mapping symbols (strings) to compact integer IDs.
|
|
9
|
+
*/
|
|
10
|
+
export class SymbolDict {
|
|
11
|
+
constructor() {
|
|
12
|
+
/** @type {string[]} Symbols in insertion order (index is the Symbol ID). */
|
|
13
|
+
this.entries = ['<ERROR>', '<FIN>'];
|
|
14
|
+
|
|
15
|
+
/** @type {number[]} Sorted indices into entries, ordered alphabetically. */
|
|
16
|
+
this.sortedIndex = [];
|
|
17
|
+
|
|
18
|
+
const err = this.entries[ERROR_ID];
|
|
19
|
+
const fin = this.entries[FIN_ID];
|
|
20
|
+
if (err <= fin) {
|
|
21
|
+
this.sortedIndex.push(ERROR_ID, FIN_ID);
|
|
22
|
+
} else {
|
|
23
|
+
this.sortedIndex.push(FIN_ID, ERROR_ID);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Helper to perform binary search on the sorted index.
|
|
29
|
+
* @param {string} symbol - The symbol to search for (assumed to be uppercase).
|
|
30
|
+
* @returns {{ found: boolean, index: number }}
|
|
31
|
+
*/
|
|
32
|
+
_binarySearch(symbol) {
|
|
33
|
+
let low = 0;
|
|
34
|
+
let high = this.sortedIndex.length - 1;
|
|
35
|
+
|
|
36
|
+
while (low <= high) {
|
|
37
|
+
const mid = (low + high) >> 1;
|
|
38
|
+
const midId = this.sortedIndex[mid];
|
|
39
|
+
const midSym = this.entries[midId];
|
|
40
|
+
|
|
41
|
+
if (midSym < symbol) {
|
|
42
|
+
low = mid + 1;
|
|
43
|
+
} else if (midSym > symbol) {
|
|
44
|
+
high = mid - 1;
|
|
45
|
+
} else {
|
|
46
|
+
return { found: true, index: mid };
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return { found: false, index: low };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Intern a symbol (uppercase string), returning its unique ID.
|
|
55
|
+
* If it already exists, returns the existing ID.
|
|
56
|
+
* @param {string} symbol
|
|
57
|
+
* @returns {number}
|
|
58
|
+
*/
|
|
59
|
+
intern(symbol) {
|
|
60
|
+
const sym = symbol.toUpperCase();
|
|
61
|
+
const { found, index } = this._binarySearch(sym);
|
|
62
|
+
|
|
63
|
+
if (found) {
|
|
64
|
+
return this.sortedIndex[index];
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const newId = this.entries.length;
|
|
68
|
+
this.entries.push(sym);
|
|
69
|
+
this.sortedIndex.splice(index, 0, newId);
|
|
70
|
+
return newId;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Find the ID of an existing symbol without inserting it.
|
|
75
|
+
* Returns undefined if the symbol does not exist.
|
|
76
|
+
* @param {string} symbol
|
|
77
|
+
* @returns {number|undefined}
|
|
78
|
+
*/
|
|
79
|
+
find(symbol) {
|
|
80
|
+
const sym = symbol.toUpperCase();
|
|
81
|
+
const { found, index } = this._binarySearch(sym);
|
|
82
|
+
if (found) {
|
|
83
|
+
return this.sortedIndex[index];
|
|
84
|
+
}
|
|
85
|
+
return undefined;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Resolve a symbol ID back to its string value.
|
|
90
|
+
* @param {number} id
|
|
91
|
+
* @returns {string}
|
|
92
|
+
*/
|
|
93
|
+
resolve(id) {
|
|
94
|
+
if (id < 0 || id >= this.entries.length) {
|
|
95
|
+
throw new RangeError(`Symbol ID ${id} is out of bounds`);
|
|
96
|
+
}
|
|
97
|
+
return this.entries[id];
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Number of symbols in the dictionary (including sentinels).
|
|
102
|
+
* @returns {number}
|
|
103
|
+
*/
|
|
104
|
+
get size() {
|
|
105
|
+
return this.entries.length;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Whether the dictionary contains only the default sentinels.
|
|
110
|
+
* @returns {boolean}
|
|
111
|
+
*/
|
|
112
|
+
isEmpty() {
|
|
113
|
+
return this.entries.length <= 2;
|
|
114
|
+
}
|
|
115
|
+
}
|
package/src/engine.d.ts
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Universal helper to parse a plain word list.
|
|
3
|
+
* @param {string} text
|
|
4
|
+
* @returns {string[]}
|
|
5
|
+
*/
|
|
6
|
+
export function parseWordList(text: string): string[];
|
|
7
|
+
/**
|
|
8
|
+
* Universal helper to parse a swap table file content.
|
|
9
|
+
* @param {string} text
|
|
10
|
+
* @returns {[string, string][]}
|
|
11
|
+
*/
|
|
12
|
+
export function parseSwapFile(text: string): [string, string][];
|
|
13
|
+
/**
|
|
14
|
+
* Node-only helper to load a plain word list from a file.
|
|
15
|
+
* @param {string} path
|
|
16
|
+
* @returns {Promise<string[]>}
|
|
17
|
+
*/
|
|
18
|
+
export function loadWordList(path: string): Promise<string[]>;
|
|
19
|
+
/**
|
|
20
|
+
* Node-only helper to load a swap table from a file.
|
|
21
|
+
* @param {string} path
|
|
22
|
+
* @returns {Promise<[string, string][]>}
|
|
23
|
+
*/
|
|
24
|
+
export function loadSwapFile(path: string): Promise<[string, string][]>;
|
|
25
|
+
/**
|
|
26
|
+
* Main MegaHAL engine class.
|
|
27
|
+
*/
|
|
28
|
+
export class MegaHal {
|
|
29
|
+
/**
|
|
30
|
+
* @param {number} [order=5] - Markov model order
|
|
31
|
+
* @param {any} [rng=null] - Optional custom random number generator
|
|
32
|
+
*/
|
|
33
|
+
constructor(order?: number, rng?: any);
|
|
34
|
+
model: BidirectionalModel;
|
|
35
|
+
rng: any;
|
|
36
|
+
keywordConfig: KeywordConfig;
|
|
37
|
+
/** @type {string[]} */
|
|
38
|
+
greetings: string[];
|
|
39
|
+
limit: {
|
|
40
|
+
timeout: number;
|
|
41
|
+
maxIterations: number;
|
|
42
|
+
};
|
|
43
|
+
fallbackReply: string;
|
|
44
|
+
fallbackGreeting: string;
|
|
45
|
+
/**
|
|
46
|
+
* Override fallback message when respond() cannot produce output.
|
|
47
|
+
* @param {string} msg
|
|
48
|
+
*/
|
|
49
|
+
setFallbackReply(msg: string): void;
|
|
50
|
+
/**
|
|
51
|
+
* Override fallback greeting when greet() cannot produce output.
|
|
52
|
+
* @param {string} msg
|
|
53
|
+
*/
|
|
54
|
+
setFallbackGreeting(msg: string): void;
|
|
55
|
+
/**
|
|
56
|
+
* Set reply generation limits.
|
|
57
|
+
* @param {{ timeout?: number, maxIterations?: number }} limit
|
|
58
|
+
*/
|
|
59
|
+
setLimit(limit: {
|
|
60
|
+
timeout?: number;
|
|
61
|
+
maxIterations?: number;
|
|
62
|
+
}): void;
|
|
63
|
+
/**
|
|
64
|
+
* Set keyword configuration (banned words, auxiliary words, and swap table).
|
|
65
|
+
* @param {KeywordConfig} config
|
|
66
|
+
*/
|
|
67
|
+
setKeywordConfig(config: KeywordConfig): void;
|
|
68
|
+
/**
|
|
69
|
+
* Set greeting keywords.
|
|
70
|
+
* @param {string[]} greetings
|
|
71
|
+
*/
|
|
72
|
+
setGreetings(greetings: string[]): void;
|
|
73
|
+
/**
|
|
74
|
+
* Learn from input text without generating a reply.
|
|
75
|
+
* @param {string} input
|
|
76
|
+
*/
|
|
77
|
+
learn(input: string): void;
|
|
78
|
+
/**
|
|
79
|
+
* Learn from input and generate a reply.
|
|
80
|
+
* @param {string} input
|
|
81
|
+
* @returns {string}
|
|
82
|
+
*/
|
|
83
|
+
respond(input: string): string;
|
|
84
|
+
/**
|
|
85
|
+
* Generate a reply without learning from the input.
|
|
86
|
+
* Returns null if no reply can be generated.
|
|
87
|
+
* @param {string} input
|
|
88
|
+
* @returns {string|null}
|
|
89
|
+
*/
|
|
90
|
+
generate(input: string): string | null;
|
|
91
|
+
/**
|
|
92
|
+
* Generate an initial greeting before user input.
|
|
93
|
+
* @returns {string}
|
|
94
|
+
*/
|
|
95
|
+
greet(): string;
|
|
96
|
+
/**
|
|
97
|
+
* Export the model state as a binary Uint8Array.
|
|
98
|
+
* Works in both Node and Browser.
|
|
99
|
+
* @param {{ use64Bit?: boolean }} [options] - Options for serialization
|
|
100
|
+
* @returns {Uint8Array}
|
|
101
|
+
*/
|
|
102
|
+
exportBrain(options?: {
|
|
103
|
+
use64Bit?: boolean;
|
|
104
|
+
}): Uint8Array;
|
|
105
|
+
/**
|
|
106
|
+
* Import the model state from binary brain data.
|
|
107
|
+
* Works in both Node and Browser.
|
|
108
|
+
* @param {Uint8Array|ArrayBuffer} data
|
|
109
|
+
*/
|
|
110
|
+
importBrain(data: Uint8Array | ArrayBuffer): void;
|
|
111
|
+
/**
|
|
112
|
+
* Train from a text string containing multiple lines of sentences.
|
|
113
|
+
* @param {string} content
|
|
114
|
+
*/
|
|
115
|
+
trainFromContent(content: string): void;
|
|
116
|
+
/**
|
|
117
|
+
* Node-only: Save the model to a binary brain file.
|
|
118
|
+
* @param {string} path
|
|
119
|
+
* @param {{ use64Bit?: boolean }} [options] - Options for serialization
|
|
120
|
+
* @returns {Promise<void>}
|
|
121
|
+
*/
|
|
122
|
+
saveBrain(path: string, options?: {
|
|
123
|
+
use64Bit?: boolean;
|
|
124
|
+
}): Promise<void>;
|
|
125
|
+
/**
|
|
126
|
+
* Node-only: Load the model from a binary brain file.
|
|
127
|
+
* @param {string} path
|
|
128
|
+
* @returns {Promise<void>}
|
|
129
|
+
*/
|
|
130
|
+
loadBrain(path: string): Promise<void>;
|
|
131
|
+
/**
|
|
132
|
+
* Node-only: Train from a text file.
|
|
133
|
+
* @param {string} path
|
|
134
|
+
* @returns {Promise<void>}
|
|
135
|
+
*/
|
|
136
|
+
trainFromFile(path: string): Promise<void>;
|
|
137
|
+
}
|
|
138
|
+
import { BidirectionalModel } from './model.js';
|
|
139
|
+
import { KeywordConfig } from './keywords.js';
|
|
140
|
+
//# sourceMappingURL=engine.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"engine.d.ts","sourceRoot":"","sources":["engine.js"],"names":[],"mappings":"AA+PA;;;;GAIG;AACH,oCAHW,MAAM,GACJ,MAAM,EAAE,CAQpB;AAED;;;;GAIG;AACH,oCAHW,MAAM,GACJ,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAiB9B;AAED;;;;GAIG;AACH,mCAHW,MAAM,GACJ,OAAO,CAAC,MAAM,EAAE,CAAC,CAS7B;AAED;;;;GAIG;AACH,mCAHW,MAAM,GACJ,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC,CASvC;AArSD;;GAEG;AACH;IACE;;;OAGG;IACH,oBAHW,MAAM,QACN,GAAG,EAWb;IARC,0BAA0C;IAC1C,SAAc;IACd,6BAAwC;IACxC,uBAAuB;IACvB,WADW,MAAM,EAAE,CACA;IACnB;;;MAAgD;IAChD,sBAA2C;IAC3C,yBAAiD;IAGnD;;;OAGG;IACH,sBAFW,MAAM,QAIhB;IAED;;;OAGG;IACH,yBAFW,MAAM,QAIhB;IAED;;;OAGG;IACH,gBAFW;QAAE,OAAO,CAAC,EAAE,MAAM,CAAC;QAAC,aAAa,CAAC,EAAE,MAAM,CAAA;KAAE,QAItD;IAED;;;OAGG;IACH,yBAFW,aAAa,QAIvB;IAED;;;OAGG;IACH,wBAFW,MAAM,EAAE,QAIlB;IAED;;;OAGG;IACH,aAFW,MAAM,QAKhB;IAED;;;;OAIG;IACH,eAHW,MAAM,GACJ,MAAM,CAwBlB;IAED;;;;;OAKG;IACH,gBAHW,MAAM,GACJ,MAAM,GAAC,IAAI,CAqBvB;IAED;;;OAGG;IACH,SAFa,MAAM,CA0BlB;IAED;;;;;OAKG;IACH,sBAHW;QAAE,QAAQ,CAAC,EAAE,OAAO,CAAA;KAAE,GACpB,UAAU,CAItB;IAED;;;;OAIG;IACH,kBAFW,UAAU,GAAC,WAAW,QAIhC;IAED;;;OAGG;IACH,0BAFW,MAAM,QAWhB;IAED;;;;;OAKG;IACH,gBAJW,MAAM,YACN;QAAE,QAAQ,CAAC,EAAE,OAAO,CAAA;KAAE,GACpB,OAAO,CAAC,IAAI,CAAC,CASzB;IAED;;;;OAIG;IACH,gBAHW,MAAM,GACJ,OAAO,CAAC,IAAI,CAAC,CASzB;IAED;;;;OAIG;IACH,oBAHW,MAAM,GACJ,OAAO,CAAC,IAAI,CAAC,CASzB;CACF;mCA7PkC,YAAY;8BAEA,eAAe"}
|
package/src/engine.js
ADDED
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
import { BidirectionalModel } from './model.js';
|
|
2
|
+
import { tokenize } from './tokenizer.js';
|
|
3
|
+
import { extractKeywords, KeywordConfig } from './keywords.js';
|
|
4
|
+
import { generateReply, capitalize } from './generator.js';
|
|
5
|
+
import { serializeBrain, deserializeBrain } from './binary.js';
|
|
6
|
+
|
|
7
|
+
const DEFAULT_FALLBACK_REPLY = "I don't know enough to answer you yet!";
|
|
8
|
+
const DEFAULT_FALLBACK_GREETING = 'Hello!';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Helper to pick a random range value.
|
|
12
|
+
* @param {any} rng
|
|
13
|
+
* @param {number} min
|
|
14
|
+
* @param {number} max
|
|
15
|
+
* @returns {number}
|
|
16
|
+
*/
|
|
17
|
+
function randomRange(rng, min, max) {
|
|
18
|
+
if (rng && typeof rng.randomRange === 'function') {
|
|
19
|
+
return rng.randomRange(min, max);
|
|
20
|
+
}
|
|
21
|
+
return Math.floor(Math.random() * (max - min)) + min;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Main MegaHAL engine class.
|
|
26
|
+
*/
|
|
27
|
+
export class MegaHal {
|
|
28
|
+
/**
|
|
29
|
+
* @param {number} [order=5] - Markov model order
|
|
30
|
+
* @param {any} [rng=null] - Optional custom random number generator
|
|
31
|
+
*/
|
|
32
|
+
constructor(order = 5, rng = null) {
|
|
33
|
+
this.model = new BidirectionalModel(order);
|
|
34
|
+
this.rng = rng;
|
|
35
|
+
this.keywordConfig = new KeywordConfig();
|
|
36
|
+
/** @type {string[]} */
|
|
37
|
+
this.greetings = [];
|
|
38
|
+
this.limit = { timeout: 1000, maxIterations: 0 };
|
|
39
|
+
this.fallbackReply = DEFAULT_FALLBACK_REPLY;
|
|
40
|
+
this.fallbackGreeting = DEFAULT_FALLBACK_GREETING;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Override fallback message when respond() cannot produce output.
|
|
45
|
+
* @param {string} msg
|
|
46
|
+
*/
|
|
47
|
+
setFallbackReply(msg) {
|
|
48
|
+
this.fallbackReply = msg;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Override fallback greeting when greet() cannot produce output.
|
|
53
|
+
* @param {string} msg
|
|
54
|
+
*/
|
|
55
|
+
setFallbackGreeting(msg) {
|
|
56
|
+
this.fallbackGreeting = msg;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Set reply generation limits.
|
|
61
|
+
* @param {{ timeout?: number, maxIterations?: number }} limit
|
|
62
|
+
*/
|
|
63
|
+
setLimit(limit) {
|
|
64
|
+
this.limit = { ...this.limit, ...limit };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Set keyword configuration (banned words, auxiliary words, and swap table).
|
|
69
|
+
* @param {KeywordConfig} config
|
|
70
|
+
*/
|
|
71
|
+
setKeywordConfig(config) {
|
|
72
|
+
this.keywordConfig = config;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Set greeting keywords.
|
|
77
|
+
* @param {string[]} greetings
|
|
78
|
+
*/
|
|
79
|
+
setGreetings(greetings) {
|
|
80
|
+
this.greetings = greetings.map(g => g.toUpperCase());
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Learn from input text without generating a reply.
|
|
85
|
+
* @param {string} input
|
|
86
|
+
*/
|
|
87
|
+
learn(input) {
|
|
88
|
+
const tokens = tokenize(input);
|
|
89
|
+
this.model.learn(tokens);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Learn from input and generate a reply.
|
|
94
|
+
* @param {string} input
|
|
95
|
+
* @returns {string}
|
|
96
|
+
*/
|
|
97
|
+
respond(input) {
|
|
98
|
+
const tokens = tokenize(input);
|
|
99
|
+
|
|
100
|
+
// Learn from the input first.
|
|
101
|
+
this.model.learn(tokens);
|
|
102
|
+
|
|
103
|
+
const keywords = extractKeywords(tokens, this.model.dictionary, this.keywordConfig);
|
|
104
|
+
|
|
105
|
+
const replyTokens = generateReply(
|
|
106
|
+
this.model,
|
|
107
|
+
tokens,
|
|
108
|
+
keywords,
|
|
109
|
+
this.keywordConfig.auxiliary,
|
|
110
|
+
this.limit,
|
|
111
|
+
this.rng
|
|
112
|
+
);
|
|
113
|
+
|
|
114
|
+
if (replyTokens.length === 0) {
|
|
115
|
+
return this.fallbackReply;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return capitalize(replyTokens);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Generate a reply without learning from the input.
|
|
123
|
+
* Returns null if no reply can be generated.
|
|
124
|
+
* @param {string} input
|
|
125
|
+
* @returns {string|null}
|
|
126
|
+
*/
|
|
127
|
+
generate(input) {
|
|
128
|
+
const tokens = tokenize(input);
|
|
129
|
+
|
|
130
|
+
const keywords = extractKeywords(tokens, this.model.dictionary, this.keywordConfig);
|
|
131
|
+
|
|
132
|
+
const replyTokens = generateReply(
|
|
133
|
+
this.model,
|
|
134
|
+
tokens,
|
|
135
|
+
keywords,
|
|
136
|
+
this.keywordConfig.auxiliary,
|
|
137
|
+
this.limit,
|
|
138
|
+
this.rng
|
|
139
|
+
);
|
|
140
|
+
|
|
141
|
+
if (replyTokens.length === 0) {
|
|
142
|
+
return null;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return capitalize(replyTokens);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Generate an initial greeting before user input.
|
|
150
|
+
* @returns {string}
|
|
151
|
+
*/
|
|
152
|
+
greet() {
|
|
153
|
+
if (this.greetings.length === 0) {
|
|
154
|
+
return this.fallbackGreeting;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const idx = randomRange(this.rng, 0, this.greetings.length);
|
|
158
|
+
const greetingWord = this.greetings[idx];
|
|
159
|
+
|
|
160
|
+
const keywords = new Set([greetingWord]);
|
|
161
|
+
|
|
162
|
+
const replyTokens = generateReply(
|
|
163
|
+
this.model,
|
|
164
|
+
[],
|
|
165
|
+
keywords,
|
|
166
|
+
this.keywordConfig.auxiliary,
|
|
167
|
+
this.limit,
|
|
168
|
+
this.rng
|
|
169
|
+
);
|
|
170
|
+
|
|
171
|
+
if (replyTokens.length === 0) {
|
|
172
|
+
return this.fallbackGreeting;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return capitalize(replyTokens);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Export the model state as a binary Uint8Array.
|
|
180
|
+
* Works in both Node and Browser.
|
|
181
|
+
* @param {{ use64Bit?: boolean }} [options] - Options for serialization
|
|
182
|
+
* @returns {Uint8Array}
|
|
183
|
+
*/
|
|
184
|
+
exportBrain(options) {
|
|
185
|
+
return serializeBrain(this.model, options);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Import the model state from binary brain data.
|
|
190
|
+
* Works in both Node and Browser.
|
|
191
|
+
* @param {Uint8Array|ArrayBuffer} data
|
|
192
|
+
*/
|
|
193
|
+
importBrain(data) {
|
|
194
|
+
deserializeBrain(data, this.model);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Train from a text string containing multiple lines of sentences.
|
|
199
|
+
* @param {string} content
|
|
200
|
+
*/
|
|
201
|
+
trainFromContent(content) {
|
|
202
|
+
const lines = content.split(/\r?\n/);
|
|
203
|
+
for (const line of lines) {
|
|
204
|
+
const trimmed = line.trim();
|
|
205
|
+
if (trimmed === '' || trimmed.startsWith('#')) {
|
|
206
|
+
continue;
|
|
207
|
+
}
|
|
208
|
+
this.learn(trimmed);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Node-only: Save the model to a binary brain file.
|
|
214
|
+
* @param {string} path
|
|
215
|
+
* @param {{ use64Bit?: boolean }} [options] - Options for serialization
|
|
216
|
+
* @returns {Promise<void>}
|
|
217
|
+
*/
|
|
218
|
+
async saveBrain(path, options) {
|
|
219
|
+
if (typeof window !== 'undefined' || typeof process === 'undefined') {
|
|
220
|
+
throw new Error('saveBrain is only supported in Node.js environment');
|
|
221
|
+
}
|
|
222
|
+
const fs = await import('node:fs/promises');
|
|
223
|
+
const data = this.exportBrain(options);
|
|
224
|
+
await fs.writeFile(path, data);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Node-only: Load the model from a binary brain file.
|
|
229
|
+
* @param {string} path
|
|
230
|
+
* @returns {Promise<void>}
|
|
231
|
+
*/
|
|
232
|
+
async loadBrain(path) {
|
|
233
|
+
if (typeof window !== 'undefined' || typeof process === 'undefined') {
|
|
234
|
+
throw new Error('loadBrain is only supported in Node.js environment');
|
|
235
|
+
}
|
|
236
|
+
const fs = await import('node:fs/promises');
|
|
237
|
+
const data = await fs.readFile(path);
|
|
238
|
+
this.importBrain(data);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Node-only: Train from a text file.
|
|
243
|
+
* @param {string} path
|
|
244
|
+
* @returns {Promise<void>}
|
|
245
|
+
*/
|
|
246
|
+
async trainFromFile(path) {
|
|
247
|
+
if (typeof window !== 'undefined' || typeof process === 'undefined') {
|
|
248
|
+
throw new Error('trainFromFile is only supported in Node.js environment');
|
|
249
|
+
}
|
|
250
|
+
const fs = await import('node:fs/promises');
|
|
251
|
+
const content = await fs.readFile(path, 'utf8');
|
|
252
|
+
this.trainFromContent(content);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Universal helper to parse a plain word list.
|
|
258
|
+
* @param {string} text
|
|
259
|
+
* @returns {string[]}
|
|
260
|
+
*/
|
|
261
|
+
export function parseWordList(text) {
|
|
262
|
+
return text
|
|
263
|
+
.split(/\r?\n/)
|
|
264
|
+
.map(line => line.trim())
|
|
265
|
+
.filter(line => line !== '' && !line.startsWith('#'))
|
|
266
|
+
.map(line => line.toUpperCase());
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* Universal helper to parse a swap table file content.
|
|
271
|
+
* @param {string} text
|
|
272
|
+
* @returns {[string, string][]}
|
|
273
|
+
*/
|
|
274
|
+
export function parseSwapFile(text) {
|
|
275
|
+
/** @type {[string, string][]} */
|
|
276
|
+
const pairs = [];
|
|
277
|
+
const lines = text.split(/\r?\n/);
|
|
278
|
+
for (const line of lines) {
|
|
279
|
+
const trimmed = line.trim();
|
|
280
|
+
if (trimmed === '' || trimmed.startsWith('#')) {
|
|
281
|
+
continue;
|
|
282
|
+
}
|
|
283
|
+
const parts = trimmed.split(/\s+/);
|
|
284
|
+
if (parts.length >= 2) {
|
|
285
|
+
pairs.push([parts[0].toUpperCase(), parts[1].toUpperCase()]);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
return pairs;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Node-only helper to load a plain word list from a file.
|
|
293
|
+
* @param {string} path
|
|
294
|
+
* @returns {Promise<string[]>}
|
|
295
|
+
*/
|
|
296
|
+
export async function loadWordList(path) {
|
|
297
|
+
if (typeof window !== 'undefined' || typeof process === 'undefined') {
|
|
298
|
+
throw new Error('loadWordList is only supported in Node.js environment');
|
|
299
|
+
}
|
|
300
|
+
const fs = await import('node:fs/promises');
|
|
301
|
+
const content = await fs.readFile(path, 'utf8');
|
|
302
|
+
return parseWordList(content);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Node-only helper to load a swap table from a file.
|
|
307
|
+
* @param {string} path
|
|
308
|
+
* @returns {Promise<[string, string][]>}
|
|
309
|
+
*/
|
|
310
|
+
export async function loadSwapFile(path) {
|
|
311
|
+
if (typeof window !== 'undefined' || typeof process === 'undefined') {
|
|
312
|
+
throw new Error('loadSwapFile is only supported in Node.js environment');
|
|
313
|
+
}
|
|
314
|
+
const fs = await import('node:fs/promises');
|
|
315
|
+
const content = await fs.readFile(path, 'utf8');
|
|
316
|
+
return parseSwapFile(content);
|
|
317
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Score a candidate reply by keyword surprise (Shannon entropy of keywords in context).
|
|
3
|
+
*
|
|
4
|
+
* @param {import('./model.js').BidirectionalModel} model
|
|
5
|
+
* @param {string[]} candidate - Tokens of the candidate reply
|
|
6
|
+
* @param {Set<string>} keywords - Set of uppercase keywords
|
|
7
|
+
* @returns {number}
|
|
8
|
+
*/
|
|
9
|
+
export function evaluateReply(model: import("./model.js").BidirectionalModel, candidate: string[], keywords: Set<string>): number;
|
|
10
|
+
//# sourceMappingURL=evaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["evaluator.js"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AACH,qCALW,OAAO,YAAY,EAAE,kBAAkB,aACvC,MAAM,EAAE,YACR,GAAG,CAAC,MAAM,CAAC,GACT,MAAM,CA8FlB"}
|