@sharpee/parser-en-us 0.9.60-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +123 -0
- package/dist/direction-mappings.d.ts +24 -0
- package/dist/direction-mappings.d.ts.map +1 -0
- package/dist/direction-mappings.js +82 -0
- package/dist/direction-mappings.js.map +1 -0
- package/dist/english-grammar-engine.d.ts +85 -0
- package/dist/english-grammar-engine.d.ts.map +1 -0
- package/dist/english-grammar-engine.js +562 -0
- package/dist/english-grammar-engine.js.map +1 -0
- package/dist/english-parser.d.ts +184 -0
- package/dist/english-parser.d.ts.map +1 -0
- package/dist/english-parser.js +1268 -0
- package/dist/english-parser.js.map +1 -0
- package/dist/english-pattern-compiler.d.ts +29 -0
- package/dist/english-pattern-compiler.d.ts.map +1 -0
- package/dist/english-pattern-compiler.js +211 -0
- package/dist/english-pattern-compiler.js.map +1 -0
- package/dist/grammar.d.ts +19 -0
- package/dist/grammar.d.ts.map +1 -0
- package/dist/grammar.js +620 -0
- package/dist/grammar.js.map +1 -0
- package/dist/index.d.ts +24 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +44 -0
- package/dist/index.js.map +1 -0
- package/dist/parse-failure.d.ts +59 -0
- package/dist/parse-failure.d.ts.map +1 -0
- package/dist/parse-failure.js +132 -0
- package/dist/parse-failure.js.map +1 -0
- package/dist/parser-types.d.ts +185 -0
- package/dist/parser-types.d.ts.map +1 -0
- package/dist/parser-types.js +134 -0
- package/dist/parser-types.js.map +1 -0
- package/dist/pronoun-context.d.ts +119 -0
- package/dist/pronoun-context.d.ts.map +1 -0
- package/dist/pronoun-context.js +249 -0
- package/dist/pronoun-context.js.map +1 -0
- package/dist/scope-evaluator.d.ts +58 -0
- package/dist/scope-evaluator.d.ts.map +1 -0
- package/dist/scope-evaluator.js +205 -0
- package/dist/scope-evaluator.js.map +1 -0
- package/dist/slot-consumers/entity-slot-consumer.d.ts +36 -0
- package/dist/slot-consumers/entity-slot-consumer.d.ts.map +1 -0
- package/dist/slot-consumers/entity-slot-consumer.js +413 -0
- package/dist/slot-consumers/entity-slot-consumer.js.map +1 -0
- package/dist/slot-consumers/index.d.ts +43 -0
- package/dist/slot-consumers/index.d.ts.map +1 -0
- package/dist/slot-consumers/index.js +78 -0
- package/dist/slot-consumers/index.js.map +1 -0
- package/dist/slot-consumers/slot-consumer.d.ts +61 -0
- package/dist/slot-consumers/slot-consumer.d.ts.map +1 -0
- package/dist/slot-consumers/slot-consumer.js +31 -0
- package/dist/slot-consumers/slot-consumer.js.map +1 -0
- package/dist/slot-consumers/text-slot-consumer.d.ts +33 -0
- package/dist/slot-consumers/text-slot-consumer.d.ts.map +1 -0
- package/dist/slot-consumers/text-slot-consumer.js +157 -0
- package/dist/slot-consumers/text-slot-consumer.js.map +1 -0
- package/dist/slot-consumers/typed-slot-consumer.d.ts +35 -0
- package/dist/slot-consumers/typed-slot-consumer.d.ts.map +1 -0
- package/dist/slot-consumers/typed-slot-consumer.js +151 -0
- package/dist/slot-consumers/typed-slot-consumer.js.map +1 -0
- package/dist/slot-consumers/vocabulary-slot-consumer.d.ts +42 -0
- package/dist/slot-consumers/vocabulary-slot-consumer.d.ts.map +1 -0
- package/dist/slot-consumers/vocabulary-slot-consumer.js +186 -0
- package/dist/slot-consumers/vocabulary-slot-consumer.js.map +1 -0
- package/package.json +65 -0
|
@@ -0,0 +1,1268 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* English Parser Implementation
|
|
4
|
+
*
|
|
5
|
+
* This parser handles English-specific grammar patterns and preserves all information
|
|
6
|
+
* in rich structured commands.
|
|
7
|
+
*/
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.EnglishParser = void 0;
|
|
10
|
+
const if_domain_1 = require("@sharpee/if-domain");
|
|
11
|
+
const world_model_1 = require("@sharpee/world-model");
|
|
12
|
+
const english_grammar_engine_1 = require("./english-grammar-engine");
|
|
13
|
+
const grammar_1 = require("./grammar");
|
|
14
|
+
const direction_mappings_1 = require("./direction-mappings");
|
|
15
|
+
const parse_failure_1 = require("./parse-failure");
|
|
16
|
+
const pronoun_context_1 = require("./pronoun-context");
|
|
17
|
+
/**
|
|
18
|
+
* Default parser options
|
|
19
|
+
*/
|
|
20
|
+
const DEFAULT_OPTIONS = {
|
|
21
|
+
allowPartial: true,
|
|
22
|
+
expandAbbreviations: true,
|
|
23
|
+
ignoreArticles: false, // We preserve everything in the English parser
|
|
24
|
+
minConfidence: 0.1
|
|
25
|
+
};
|
|
26
|
+
/**
|
|
27
|
+
* Map vocabulary part of speech to world model part of speech
|
|
28
|
+
*/
|
|
29
|
+
function mapPartOfSpeech(vocabPos) {
|
|
30
|
+
switch (vocabPos) {
|
|
31
|
+
case if_domain_1.PartOfSpeech.VERB:
|
|
32
|
+
return world_model_1.PartOfSpeech.VERB;
|
|
33
|
+
case if_domain_1.PartOfSpeech.NOUN:
|
|
34
|
+
return world_model_1.PartOfSpeech.NOUN;
|
|
35
|
+
case if_domain_1.PartOfSpeech.ADJECTIVE:
|
|
36
|
+
return world_model_1.PartOfSpeech.ADJECTIVE;
|
|
37
|
+
case if_domain_1.PartOfSpeech.ARTICLE:
|
|
38
|
+
return world_model_1.PartOfSpeech.ARTICLE;
|
|
39
|
+
case if_domain_1.PartOfSpeech.PREPOSITION:
|
|
40
|
+
return world_model_1.PartOfSpeech.PREPOSITION;
|
|
41
|
+
case if_domain_1.PartOfSpeech.PRONOUN:
|
|
42
|
+
return world_model_1.PartOfSpeech.PRONOUN;
|
|
43
|
+
// Note: stdlib doesn't have DETERMINER, but we map special words to DETERMINER
|
|
44
|
+
case if_domain_1.PartOfSpeech.SPECIAL:
|
|
45
|
+
// Special words could be determiners
|
|
46
|
+
return world_model_1.PartOfSpeech.DETERMINER;
|
|
47
|
+
default:
|
|
48
|
+
return world_model_1.PartOfSpeech.UNKNOWN;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* English parser with rich information preservation
|
|
53
|
+
*/
|
|
54
|
+
class EnglishParser {
|
|
55
|
+
options;
|
|
56
|
+
language;
|
|
57
|
+
onDebugEvent;
|
|
58
|
+
platformEventEmitter;
|
|
59
|
+
grammarEngine;
|
|
60
|
+
worldContext = null;
|
|
61
|
+
/** Pronoun context manager for "it", "them", "him", "her" resolution (ADR-089) */
|
|
62
|
+
pronounContext;
|
|
63
|
+
constructor(language, options = {}) {
|
|
64
|
+
this.language = language;
|
|
65
|
+
this.options = { ...DEFAULT_OPTIONS, ...options };
|
|
66
|
+
// Initialize grammar engine
|
|
67
|
+
this.grammarEngine = new english_grammar_engine_1.EnglishGrammarEngine();
|
|
68
|
+
const grammar = this.grammarEngine.createBuilder();
|
|
69
|
+
(0, grammar_1.defineGrammar)(grammar);
|
|
70
|
+
// Initialize pronoun context (ADR-089)
|
|
71
|
+
this.pronounContext = new pronoun_context_1.PronounContextManager();
|
|
72
|
+
(0, pronoun_context_1.setPronounContextManager)(this.pronounContext);
|
|
73
|
+
this.initializeVocabulary();
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Initialize vocabulary from language provider
|
|
77
|
+
*/
|
|
78
|
+
initializeVocabulary() {
|
|
79
|
+
// Clear any existing vocabulary
|
|
80
|
+
if_domain_1.vocabularyRegistry.clear();
|
|
81
|
+
// Register verbs (adapting from language provider format)
|
|
82
|
+
const verbs = (0, if_domain_1.adaptVerbVocabulary)(this.language);
|
|
83
|
+
if_domain_1.vocabularyRegistry.registerVerbs(verbs);
|
|
84
|
+
// Register directions (adapting from language provider format)
|
|
85
|
+
const directions = (0, if_domain_1.adaptDirectionVocabulary)(this.language);
|
|
86
|
+
if_domain_1.vocabularyRegistry.registerDirections(directions);
|
|
87
|
+
// Register special vocabulary (adapting from language provider format)
|
|
88
|
+
const special = (0, if_domain_1.adaptSpecialVocabulary)(this.language);
|
|
89
|
+
if_domain_1.vocabularyRegistry.registerSpecial(special);
|
|
90
|
+
// Register prepositions
|
|
91
|
+
const prepositions = this.language.getPrepositions();
|
|
92
|
+
if_domain_1.vocabularyRegistry.registerPrepositions(prepositions);
|
|
93
|
+
// Register determiners
|
|
94
|
+
const determiners = this.language.getDeterminers();
|
|
95
|
+
if_domain_1.vocabularyRegistry.registerDeterminers(determiners);
|
|
96
|
+
// Register conjunctions
|
|
97
|
+
const conjunctions = this.language.getConjunctions();
|
|
98
|
+
if_domain_1.vocabularyRegistry.registerConjunctions(conjunctions);
|
|
99
|
+
// Register numbers
|
|
100
|
+
const numbers = this.language.getNumbers();
|
|
101
|
+
if_domain_1.vocabularyRegistry.registerNumbers(numbers);
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Set debug event callback
|
|
105
|
+
*/
|
|
106
|
+
setDebugCallback(callback) {
|
|
107
|
+
this.onDebugEvent = callback;
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Set platform event emitter for parser debugging
|
|
111
|
+
*/
|
|
112
|
+
setPlatformEventEmitter(emitter) {
|
|
113
|
+
this.platformEventEmitter = emitter;
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Set the world context for scope constraint evaluation
|
|
117
|
+
*/
|
|
118
|
+
setWorldContext(world, actorId, currentLocation) {
|
|
119
|
+
this.worldContext = { world, actorId, currentLocation };
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Emit a platform debug event
|
|
123
|
+
*/
|
|
124
|
+
emitPlatformEvent(type, data) {
|
|
125
|
+
if (this.platformEventEmitter) {
|
|
126
|
+
this.platformEventEmitter({
|
|
127
|
+
id: `parser_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
|
128
|
+
timestamp: Date.now(),
|
|
129
|
+
type: `platform.parser.${type}`,
|
|
130
|
+
entities: {},
|
|
131
|
+
payload: {
|
|
132
|
+
subsystem: 'parser',
|
|
133
|
+
...data
|
|
134
|
+
},
|
|
135
|
+
tags: ['platform', 'parser', 'debug'],
|
|
136
|
+
priority: 0,
|
|
137
|
+
narrate: false
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Register additional verbs after parser creation
|
|
143
|
+
* Used for story-specific vocabulary
|
|
144
|
+
*/
|
|
145
|
+
registerVerbs(verbs) {
|
|
146
|
+
// Register with vocabulary registry
|
|
147
|
+
if_domain_1.vocabularyRegistry.registerDynamicVerbs(verbs, 'story');
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Register additional vocabulary entries
|
|
151
|
+
* More generic than registerVerbs - can handle any part of speech
|
|
152
|
+
*/
|
|
153
|
+
registerVocabulary(entries) {
|
|
154
|
+
// Group by part of speech and register appropriately
|
|
155
|
+
const verbs = [];
|
|
156
|
+
for (const entry of entries) {
|
|
157
|
+
if (entry.partOfSpeech === if_domain_1.PartOfSpeech.VERB) {
|
|
158
|
+
// Find or create verb definition for this action
|
|
159
|
+
let verbDef = verbs.find(v => v.actionId === entry.mapsTo);
|
|
160
|
+
if (!verbDef) {
|
|
161
|
+
verbDef = {
|
|
162
|
+
actionId: entry.mapsTo,
|
|
163
|
+
verbs: [],
|
|
164
|
+
pattern: entry.metadata?.pattern,
|
|
165
|
+
prepositions: entry.metadata?.prepositions
|
|
166
|
+
};
|
|
167
|
+
verbs.push(verbDef);
|
|
168
|
+
}
|
|
169
|
+
verbDef.verbs.push(entry.word);
|
|
170
|
+
}
|
|
171
|
+
// Future: handle other parts of speech
|
|
172
|
+
}
|
|
173
|
+
if (verbs.length > 0) {
|
|
174
|
+
this.registerVerbs(verbs);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Parse input text into structured command with rich information
|
|
179
|
+
*/
|
|
180
|
+
parse(input) {
|
|
181
|
+
// Emit parse start event
|
|
182
|
+
this.emitPlatformEvent('parse_start', { input });
|
|
183
|
+
// Tokenize with full position tracking
|
|
184
|
+
const tokens = this.tokenizeRich(input);
|
|
185
|
+
// Emit tokenize platform event
|
|
186
|
+
this.emitPlatformEvent('tokenize_complete', {
|
|
187
|
+
input,
|
|
188
|
+
tokens: tokens.map(t => ({
|
|
189
|
+
word: t.word,
|
|
190
|
+
normalized: t.normalized,
|
|
191
|
+
partOfSpeech: t.partOfSpeech,
|
|
192
|
+
candidateCount: t.candidates.length
|
|
193
|
+
}))
|
|
194
|
+
});
|
|
195
|
+
// Emit tokenize debug event
|
|
196
|
+
if (this.onDebugEvent) {
|
|
197
|
+
// Find unknown words
|
|
198
|
+
const unknownWords = tokens
|
|
199
|
+
.filter(t => t.partOfSpeech.includes(world_model_1.PartOfSpeech.UNKNOWN) || t.candidates.length === 0)
|
|
200
|
+
.map(t => t.word);
|
|
201
|
+
this.onDebugEvent({
|
|
202
|
+
id: `parser_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
|
203
|
+
timestamp: Date.now(),
|
|
204
|
+
subsystem: 'parser',
|
|
205
|
+
type: 'tokenize',
|
|
206
|
+
data: {
|
|
207
|
+
input,
|
|
208
|
+
tokens: tokens.map(t => ({
|
|
209
|
+
word: t.word,
|
|
210
|
+
normalized: t.normalized,
|
|
211
|
+
position: t.position,
|
|
212
|
+
length: t.length,
|
|
213
|
+
partOfSpeech: t.partOfSpeech,
|
|
214
|
+
candidateCount: t.candidates.length
|
|
215
|
+
})),
|
|
216
|
+
unknownWords
|
|
217
|
+
}
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
// Try to find command structure
|
|
221
|
+
const candidates = this.findCommandStructures(tokens, input);
|
|
222
|
+
// Emit pattern matching platform event
|
|
223
|
+
this.emitPlatformEvent('pattern_matching_complete', {
|
|
224
|
+
input,
|
|
225
|
+
candidateCount: candidates.length,
|
|
226
|
+
patterns: candidates.map(c => ({
|
|
227
|
+
pattern: c.pattern,
|
|
228
|
+
action: c.action,
|
|
229
|
+
confidence: c.confidence
|
|
230
|
+
}))
|
|
231
|
+
});
|
|
232
|
+
// Emit pattern match debug event
|
|
233
|
+
if (this.onDebugEvent) {
|
|
234
|
+
this.onDebugEvent({
|
|
235
|
+
id: `parser_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
|
236
|
+
timestamp: Date.now(),
|
|
237
|
+
subsystem: 'parser',
|
|
238
|
+
type: 'pattern_match',
|
|
239
|
+
data: {
|
|
240
|
+
input,
|
|
241
|
+
patternsAttempted: candidates.map(c => ({
|
|
242
|
+
name: c.pattern,
|
|
243
|
+
matched: true,
|
|
244
|
+
confidence: c.confidence
|
|
245
|
+
})),
|
|
246
|
+
totalCandidates: candidates.length
|
|
247
|
+
}
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
if (candidates.length === 0) {
|
|
251
|
+
// Analyze failures to determine the best error message
|
|
252
|
+
const failures = this.grammarEngine.getLastFailures();
|
|
253
|
+
const hadVerb = tokens.some(t => t.partOfSpeech.includes(world_model_1.PartOfSpeech.VERB));
|
|
254
|
+
const errorAnalysis = (0, parse_failure_1.analyzeBestFailure)(failures, input, hadVerb);
|
|
255
|
+
// Emit parse error platform event
|
|
256
|
+
this.emitPlatformEvent('parse_failed', {
|
|
257
|
+
input,
|
|
258
|
+
reason: 'no_matching_patterns',
|
|
259
|
+
tokenCount: tokens.length,
|
|
260
|
+
hadVerb,
|
|
261
|
+
errorCode: errorAnalysis.code,
|
|
262
|
+
errorContext: errorAnalysis.context
|
|
263
|
+
});
|
|
264
|
+
// Emit parse error debug event
|
|
265
|
+
if (this.onDebugEvent) {
|
|
266
|
+
this.onDebugEvent({
|
|
267
|
+
id: `parser_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
|
268
|
+
timestamp: Date.now(),
|
|
269
|
+
subsystem: 'parser',
|
|
270
|
+
type: 'parse_error',
|
|
271
|
+
data: {
|
|
272
|
+
input,
|
|
273
|
+
errorType: errorAnalysis.code,
|
|
274
|
+
errorDetails: {
|
|
275
|
+
messageId: errorAnalysis.messageId,
|
|
276
|
+
context: errorAnalysis.context,
|
|
277
|
+
hadTokens: tokens.length > 0,
|
|
278
|
+
hadVerb,
|
|
279
|
+
failureCount: failures.length
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
// Build the detailed error message
|
|
285
|
+
const error = this.buildParseError(input, errorAnalysis);
|
|
286
|
+
return {
|
|
287
|
+
success: false,
|
|
288
|
+
error
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
// Sort by confidence and take the best
|
|
292
|
+
candidates.sort((a, b) => b.confidence - a.confidence);
|
|
293
|
+
const best = candidates[0];
|
|
294
|
+
// Emit candidate selection debug event
|
|
295
|
+
if (this.onDebugEvent) {
|
|
296
|
+
this.onDebugEvent({
|
|
297
|
+
id: `parser_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
|
298
|
+
timestamp: Date.now(),
|
|
299
|
+
subsystem: 'parser',
|
|
300
|
+
type: 'candidate_selection',
|
|
301
|
+
data: {
|
|
302
|
+
input,
|
|
303
|
+
candidates: candidates.map((c, i) => ({
|
|
304
|
+
action: c.action,
|
|
305
|
+
pattern: c.pattern,
|
|
306
|
+
confidence: c.confidence,
|
|
307
|
+
selected: i === 0
|
|
308
|
+
})),
|
|
309
|
+
selectionReason: candidates.length === 1 ? 'only_candidate' : 'highest_confidence'
|
|
310
|
+
}
|
|
311
|
+
});
|
|
312
|
+
}
|
|
313
|
+
// Build the ParsedCommand
|
|
314
|
+
const parsed = {
|
|
315
|
+
rawInput: input,
|
|
316
|
+
tokens: best.tokens,
|
|
317
|
+
structure: {
|
|
318
|
+
verb: best.verb,
|
|
319
|
+
directObject: best.directObject,
|
|
320
|
+
preposition: best.preposition,
|
|
321
|
+
indirectObject: best.indirectObject
|
|
322
|
+
},
|
|
323
|
+
pattern: best.pattern,
|
|
324
|
+
confidence: best.confidence,
|
|
325
|
+
action: best.action,
|
|
326
|
+
// ADR-080 additions
|
|
327
|
+
textSlots: best.textSlots,
|
|
328
|
+
instrument: best.instrument,
|
|
329
|
+
excluded: best.excluded,
|
|
330
|
+
// ADR-082 additions
|
|
331
|
+
vocabularySlots: best.vocabularySlots,
|
|
332
|
+
manner: best.manner
|
|
333
|
+
};
|
|
334
|
+
// Add extras if present
|
|
335
|
+
if (best.direction) {
|
|
336
|
+
// Convert direction string to Direction constant
|
|
337
|
+
const directionConstant = (0, direction_mappings_1.parseDirection)(best.direction);
|
|
338
|
+
parsed.extras = {
|
|
339
|
+
direction: directionConstant || best.direction
|
|
340
|
+
};
|
|
341
|
+
}
|
|
342
|
+
else if (best.extras) {
|
|
343
|
+
// Check if extras contains a direction and convert it
|
|
344
|
+
const extras = { ...best.extras };
|
|
345
|
+
if (extras.direction && typeof extras.direction === 'string') {
|
|
346
|
+
const directionConstant = (0, direction_mappings_1.parseDirection)(extras.direction);
|
|
347
|
+
if (directionConstant) {
|
|
348
|
+
extras.direction = directionConstant;
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
parsed.extras = extras;
|
|
352
|
+
}
|
|
353
|
+
// Emit parse success platform event
|
|
354
|
+
this.emitPlatformEvent('parse_success', {
|
|
355
|
+
input,
|
|
356
|
+
action: parsed.action,
|
|
357
|
+
pattern: parsed.pattern,
|
|
358
|
+
confidence: parsed.confidence,
|
|
359
|
+
structure: {
|
|
360
|
+
verb: parsed.structure.verb?.text,
|
|
361
|
+
directObject: parsed.structure.directObject?.text,
|
|
362
|
+
preposition: parsed.structure.preposition?.text,
|
|
363
|
+
indirectObject: parsed.structure.indirectObject?.text
|
|
364
|
+
}
|
|
365
|
+
});
|
|
366
|
+
return {
|
|
367
|
+
success: true,
|
|
368
|
+
value: parsed
|
|
369
|
+
};
|
|
370
|
+
}
|
|
371
|
+
/**
|
|
372
|
+
* Parse input that may contain multiple commands separated by periods or commas.
|
|
373
|
+
* Returns an array of parsed commands (or errors).
|
|
374
|
+
*
|
|
375
|
+
* Period chaining:
|
|
376
|
+
* - "take sword. go north." → [take sword, go north]
|
|
377
|
+
*
|
|
378
|
+
* Comma chaining (only when verb detected after comma):
|
|
379
|
+
* - "take sword, drop it" → [take sword, drop it] (verb after comma)
|
|
380
|
+
* - "take knife, lamp" → single command with list (no verb after comma)
|
|
381
|
+
*
|
|
382
|
+
* Examples:
|
|
383
|
+
* - "take sword. go north." → [take sword, go north]
|
|
384
|
+
* - "take sword" → [take sword]
|
|
385
|
+
* - "take sword. invalid. go north" → [take sword, error, go north]
|
|
386
|
+
*/
|
|
387
|
+
parseChain(input) {
|
|
388
|
+
// First split on periods
|
|
389
|
+
const periodSegments = this.splitOnPeriods(input);
|
|
390
|
+
// Then handle comma disambiguation within each segment
|
|
391
|
+
const allSegments = [];
|
|
392
|
+
for (const segment of periodSegments) {
|
|
393
|
+
const commaSegments = this.splitOnCommasIfChain(segment);
|
|
394
|
+
allSegments.push(...commaSegments);
|
|
395
|
+
}
|
|
396
|
+
// Parse each segment
|
|
397
|
+
return allSegments.map(segment => this.parse(segment));
|
|
398
|
+
}
|
|
399
|
+
/**
|
|
400
|
+
* Split a segment on commas only if a verb is detected after the comma.
|
|
401
|
+
* "take knife, drop lamp" → ["take knife", "drop lamp"] (verb after comma)
|
|
402
|
+
* "take knife, lamp" → ["take knife, lamp"] (no verb, treat as list)
|
|
403
|
+
*/
|
|
404
|
+
splitOnCommasIfChain(input) {
|
|
405
|
+
// Replace quoted strings with placeholders to protect them
|
|
406
|
+
const placeholders = new Map();
|
|
407
|
+
let processedInput = input;
|
|
408
|
+
let placeholderIndex = 0;
|
|
409
|
+
// Handle double quotes
|
|
410
|
+
processedInput = processedInput.replace(/"[^"]*"/g, (match) => {
|
|
411
|
+
const placeholder = `__COMMA_QUOTE_${placeholderIndex++}__`;
|
|
412
|
+
placeholders.set(placeholder, match);
|
|
413
|
+
return placeholder;
|
|
414
|
+
});
|
|
415
|
+
// Handle single quotes
|
|
416
|
+
processedInput = processedInput.replace(/'[^']*'/g, (match) => {
|
|
417
|
+
const placeholder = `__COMMA_QUOTE_${placeholderIndex++}__`;
|
|
418
|
+
placeholders.set(placeholder, match);
|
|
419
|
+
return placeholder;
|
|
420
|
+
});
|
|
421
|
+
// Check for commas
|
|
422
|
+
const commaIndex = processedInput.indexOf(',');
|
|
423
|
+
if (commaIndex === -1) {
|
|
424
|
+
return [input]; // No commas, return as-is
|
|
425
|
+
}
|
|
426
|
+
// Check if word after comma is a verb
|
|
427
|
+
const afterComma = processedInput.slice(commaIndex + 1).trim();
|
|
428
|
+
const firstWordMatch = afterComma.match(/^(\w+)/);
|
|
429
|
+
if (!firstWordMatch) {
|
|
430
|
+
return [input]; // No word after comma, return as-is
|
|
431
|
+
}
|
|
432
|
+
const firstWord = firstWordMatch[1].toLowerCase();
|
|
433
|
+
// Check if it's a known verb
|
|
434
|
+
const isVerb = if_domain_1.vocabularyRegistry.hasWord(firstWord, if_domain_1.PartOfSpeech.VERB);
|
|
435
|
+
if (isVerb) {
|
|
436
|
+
// It's a verb - split into separate commands
|
|
437
|
+
const parts = processedInput.split(',');
|
|
438
|
+
const segments = [];
|
|
439
|
+
for (const part of parts) {
|
|
440
|
+
let restored = part;
|
|
441
|
+
for (const [placeholder, original] of placeholders) {
|
|
442
|
+
restored = restored.replace(placeholder, original);
|
|
443
|
+
}
|
|
444
|
+
const trimmed = restored.trim();
|
|
445
|
+
if (trimmed.length > 0) {
|
|
446
|
+
segments.push(trimmed);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
return segments;
|
|
450
|
+
}
|
|
451
|
+
// Not a verb after comma - return as single segment (list)
|
|
452
|
+
return [input];
|
|
453
|
+
}
|
|
454
|
+
/**
|
|
455
|
+
* Split input on periods, preserving quoted strings.
|
|
456
|
+
* Handles edge cases like trailing periods and empty segments.
|
|
457
|
+
*/
|
|
458
|
+
splitOnPeriods(input) {
|
|
459
|
+
// Replace quoted strings with placeholders
|
|
460
|
+
const placeholders = new Map();
|
|
461
|
+
let processedInput = input;
|
|
462
|
+
let placeholderIndex = 0;
|
|
463
|
+
// Handle double quotes
|
|
464
|
+
processedInput = processedInput.replace(/"[^"]*"/g, (match) => {
|
|
465
|
+
const placeholder = `__PERIOD_QUOTE_${placeholderIndex++}__`;
|
|
466
|
+
placeholders.set(placeholder, match);
|
|
467
|
+
return placeholder;
|
|
468
|
+
});
|
|
469
|
+
// Handle single quotes
|
|
470
|
+
processedInput = processedInput.replace(/'[^']*'/g, (match) => {
|
|
471
|
+
const placeholder = `__PERIOD_QUOTE_${placeholderIndex++}__`;
|
|
472
|
+
placeholders.set(placeholder, match);
|
|
473
|
+
return placeholder;
|
|
474
|
+
});
|
|
475
|
+
// Split on periods
|
|
476
|
+
const rawSegments = processedInput.split('.');
|
|
477
|
+
// Restore placeholders and clean up segments
|
|
478
|
+
const segments = [];
|
|
479
|
+
for (const segment of rawSegments) {
|
|
480
|
+
let restored = segment;
|
|
481
|
+
for (const [placeholder, original] of placeholders) {
|
|
482
|
+
restored = restored.replace(placeholder, original);
|
|
483
|
+
}
|
|
484
|
+
const trimmed = restored.trim();
|
|
485
|
+
if (trimmed.length > 0) {
|
|
486
|
+
segments.push(trimmed);
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
return segments;
|
|
490
|
+
}
|
|
491
|
+
/**
|
|
492
|
+
* Tokenize input with rich information preservation
|
|
493
|
+
*/
|
|
494
|
+
tokenizeRich(input) {
|
|
495
|
+
const tokens = [];
|
|
496
|
+
let position = 0;
|
|
497
|
+
// First extract quoted strings and replace them with placeholders
|
|
498
|
+
const quotedStrings = [];
|
|
499
|
+
let processedInput = input;
|
|
500
|
+
// Handle double quotes
|
|
501
|
+
const doubleQuoteRegex = /"([^"]*)"/g;
|
|
502
|
+
let match;
|
|
503
|
+
let quoteIndex = 0;
|
|
504
|
+
while ((match = doubleQuoteRegex.exec(input)) !== null) {
|
|
505
|
+
const placeholder = `__QUOTE_${quoteIndex}__`;
|
|
506
|
+
const content = match[1];
|
|
507
|
+
const quotePosition = match.index;
|
|
508
|
+
quotedStrings.push({ placeholder, content, position: quotePosition });
|
|
509
|
+
processedInput = processedInput.replace(match[0], placeholder);
|
|
510
|
+
quoteIndex++;
|
|
511
|
+
}
|
|
512
|
+
// Now tokenize the processed input
|
|
513
|
+
const words = processedInput.trim().split(/(\s+)/); // Keep whitespace for position tracking
|
|
514
|
+
for (const segment of words) {
|
|
515
|
+
// Skip pure whitespace segments
|
|
516
|
+
if (/^\s+$/.test(segment)) {
|
|
517
|
+
position += segment.length;
|
|
518
|
+
continue;
|
|
519
|
+
}
|
|
520
|
+
// Check if this is a quoted string placeholder
|
|
521
|
+
const quotedString = quotedStrings.find(qs => qs.placeholder === segment);
|
|
522
|
+
if (quotedString) {
|
|
523
|
+
// Create a token for the quoted string
|
|
524
|
+
tokens.push({
|
|
525
|
+
word: `"${quotedString.content}"`,
|
|
526
|
+
normalized: quotedString.content.toLowerCase(),
|
|
527
|
+
position,
|
|
528
|
+
length: quotedString.content.length + 2, // Include quotes
|
|
529
|
+
partOfSpeech: [world_model_1.PartOfSpeech.NOUN], // Treat quoted strings as nouns
|
|
530
|
+
candidates: [{
|
|
531
|
+
id: 'quoted_string',
|
|
532
|
+
type: 'noun',
|
|
533
|
+
confidence: 1.0
|
|
534
|
+
}]
|
|
535
|
+
});
|
|
536
|
+
position += segment.length;
|
|
537
|
+
continue;
|
|
538
|
+
}
|
|
539
|
+
const normalized = segment.toLowerCase();
|
|
540
|
+
const vocabCandidates = this.getTokenCandidates(normalized);
|
|
541
|
+
// Convert to rich token candidates
|
|
542
|
+
const candidates = vocabCandidates.map(vc => ({
|
|
543
|
+
id: vc.mapsTo,
|
|
544
|
+
type: vc.partOfSpeech,
|
|
545
|
+
confidence: vc.priority || 1.0
|
|
546
|
+
}));
|
|
547
|
+
// Determine parts of speech
|
|
548
|
+
const partsOfSpeech = Array.from(new Set(vocabCandidates.map(vc => mapPartOfSpeech(vc.partOfSpeech))));
|
|
549
|
+
// If no vocabulary matches, mark as unknown
|
|
550
|
+
if (partsOfSpeech.length === 0) {
|
|
551
|
+
partsOfSpeech.push(world_model_1.PartOfSpeech.UNKNOWN);
|
|
552
|
+
}
|
|
553
|
+
tokens.push({
|
|
554
|
+
word: segment,
|
|
555
|
+
normalized,
|
|
556
|
+
position,
|
|
557
|
+
length: segment.length,
|
|
558
|
+
partOfSpeech: partsOfSpeech,
|
|
559
|
+
candidates
|
|
560
|
+
});
|
|
561
|
+
position += segment.length;
|
|
562
|
+
}
|
|
563
|
+
return tokens;
|
|
564
|
+
}
|
|
565
|
+
/**
|
|
566
|
+
* Find possible command structures in the tokens
|
|
567
|
+
*/
|
|
568
|
+
findCommandStructures(tokens, input) {
|
|
569
|
+
// Create grammar context with world model if available
|
|
570
|
+
const context = {
|
|
571
|
+
world: this.worldContext?.world || null,
|
|
572
|
+
actorId: this.worldContext?.actorId || 'player',
|
|
573
|
+
currentLocation: this.worldContext?.currentLocation || 'current',
|
|
574
|
+
slots: new Map()
|
|
575
|
+
};
|
|
576
|
+
// Convert tokens to internal format for grammar engine
|
|
577
|
+
const internalTokens = tokens.map(t => ({
|
|
578
|
+
word: t.word,
|
|
579
|
+
normalized: t.normalized,
|
|
580
|
+
position: t.position,
|
|
581
|
+
candidates: t.candidates.map((c) => ({
|
|
582
|
+
partOfSpeech: c.type,
|
|
583
|
+
mapsTo: c.id,
|
|
584
|
+
priority: c.confidence || 0
|
|
585
|
+
}))
|
|
586
|
+
}));
|
|
587
|
+
// Use grammar engine to find matches
|
|
588
|
+
const matches = this.grammarEngine.findMatches(internalTokens, context);
|
|
589
|
+
// Convert grammar matches to RichCandidates
|
|
590
|
+
const candidates = [];
|
|
591
|
+
for (const match of matches) {
|
|
592
|
+
const candidate = this.convertGrammarMatch(match, tokens);
|
|
593
|
+
if (candidate) {
|
|
594
|
+
candidates.push(candidate);
|
|
595
|
+
}
|
|
596
|
+
else if (input.includes('throw')) {
|
|
597
|
+
console.log('Failed to convert match:', match.rule.pattern);
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
return candidates;
|
|
601
|
+
}
|
|
602
|
+
/**
|
|
603
|
+
* Convert a grammar match to a RichCandidate
|
|
604
|
+
*/
|
|
605
|
+
convertGrammarMatch(match, tokens) {
|
|
606
|
+
const rule = match.rule;
|
|
607
|
+
// Extract verb tokens from the beginning of the match
|
|
608
|
+
const verbTokenIndices = [];
|
|
609
|
+
let verbEndIndex = 0;
|
|
610
|
+
// Find verb tokens at the start
|
|
611
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
612
|
+
if (tokens[i].partOfSpeech.includes(world_model_1.PartOfSpeech.VERB)) {
|
|
613
|
+
verbTokenIndices.push(i);
|
|
614
|
+
verbEndIndex = i + 1;
|
|
615
|
+
}
|
|
616
|
+
else {
|
|
617
|
+
break;
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
// Build verb phrase
|
|
621
|
+
const verbPhrase = {
|
|
622
|
+
tokens: verbTokenIndices,
|
|
623
|
+
text: verbTokenIndices.map(i => tokens[i].word).join(' '),
|
|
624
|
+
head: verbTokenIndices.length > 0 ? tokens[verbTokenIndices[0]].normalized : ''
|
|
625
|
+
};
|
|
626
|
+
// For complex patterns with multiple slots and prepositions,
|
|
627
|
+
// we need to analyze the pattern structure
|
|
628
|
+
const slotEntries = Array.from(match.slots.entries());
|
|
629
|
+
// Sort slots by their token positions
|
|
630
|
+
slotEntries.sort((a, b) => (a[1].tokens[0] || 0) - (b[1].tokens[0] || 0));
|
|
631
|
+
// Extract structure based on pattern and position
|
|
632
|
+
let directObject;
|
|
633
|
+
let preposition;
|
|
634
|
+
let indirectObject;
|
|
635
|
+
let extras = {};
|
|
636
|
+
// Analyze the pattern to understand the expected structure
|
|
637
|
+
const patternParts = rule.pattern.split(/\s+/);
|
|
638
|
+
const slotPositions = {};
|
|
639
|
+
let positionCounter = 0;
|
|
640
|
+
// Map slot names to their positions in the pattern
|
|
641
|
+
for (const part of patternParts) {
|
|
642
|
+
if (part.startsWith(':')) {
|
|
643
|
+
const slotName = part.substring(1);
|
|
644
|
+
slotPositions[slotName] = positionCounter++;
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
// ADR-080: Track text slots, instruments, and excluded items
|
|
648
|
+
let textSlots;
|
|
649
|
+
let instrument;
|
|
650
|
+
let excluded;
|
|
651
|
+
// ADR-082: Track vocabulary slots and manner
|
|
652
|
+
let vocabularySlots;
|
|
653
|
+
let manner;
|
|
654
|
+
// Process slots based on the pattern structure
|
|
655
|
+
for (const [slotName, slotData] of slotEntries) {
|
|
656
|
+
const slotTokens = slotData.tokens.map((idx) => tokens[idx]);
|
|
657
|
+
// Check slot type from the match data (set by grammar engine)
|
|
658
|
+
const slotType = slotData.slotType;
|
|
659
|
+
// Handle text slots (TEXT or TEXT_GREEDY)
|
|
660
|
+
if (slotType === if_domain_1.SlotType.TEXT || slotType === if_domain_1.SlotType.TEXT_GREEDY) {
|
|
661
|
+
if (!textSlots) {
|
|
662
|
+
textSlots = new Map();
|
|
663
|
+
}
|
|
664
|
+
textSlots.set(slotName, slotData.text);
|
|
665
|
+
continue; // Don't also add to direct/indirect objects
|
|
666
|
+
}
|
|
667
|
+
// ADR-082: Handle vocabulary slots
|
|
668
|
+
if (slotType === if_domain_1.SlotType.VOCABULARY) {
|
|
669
|
+
const slotDataAny = slotData;
|
|
670
|
+
if (!vocabularySlots) {
|
|
671
|
+
vocabularySlots = new Map();
|
|
672
|
+
}
|
|
673
|
+
vocabularySlots.set(slotName, {
|
|
674
|
+
word: slotDataAny.matchedWord || slotData.text.toLowerCase(),
|
|
675
|
+
category: slotDataAny.category || ''
|
|
676
|
+
});
|
|
677
|
+
continue; // Don't also add to direct/indirect objects
|
|
678
|
+
}
|
|
679
|
+
// ADR-082: Handle manner slots
|
|
680
|
+
if (slotType === if_domain_1.SlotType.MANNER) {
|
|
681
|
+
const slotDataAny = slotData;
|
|
682
|
+
manner = slotDataAny.manner || slotData.text.toLowerCase();
|
|
683
|
+
continue; // Don't also add to direct/indirect objects
|
|
684
|
+
}
|
|
685
|
+
// ADR-084: Handle direction slots - put in extras.direction
|
|
686
|
+
if (slotType === if_domain_1.SlotType.DIRECTION) {
|
|
687
|
+
const directionText = slotData.text.toLowerCase();
|
|
688
|
+
const directionConstant = (0, direction_mappings_1.parseDirection)(directionText);
|
|
689
|
+
extras.direction = directionConstant || directionText;
|
|
690
|
+
continue; // Don't also add to direct/indirect objects
|
|
691
|
+
}
|
|
692
|
+
// Build base noun phrase
|
|
693
|
+
const phrase = {
|
|
694
|
+
tokens: slotData.tokens,
|
|
695
|
+
text: slotData.text,
|
|
696
|
+
head: slotTokens[slotTokens.length - 1]?.normalized || slotData.text,
|
|
697
|
+
modifiers: [],
|
|
698
|
+
articles: [],
|
|
699
|
+
determiners: [],
|
|
700
|
+
candidates: [slotData.text]
|
|
701
|
+
};
|
|
702
|
+
// ADR-080 Phase 2: Add multi-object support
|
|
703
|
+
const slotDataAny = slotData;
|
|
704
|
+
if (slotDataAny.isAll) {
|
|
705
|
+
phrase.isAll = true;
|
|
706
|
+
// Extract excluded items for "all but X" patterns
|
|
707
|
+
if (slotDataAny.excluded && slotDataAny.excluded.length > 0) {
|
|
708
|
+
excluded = slotDataAny.excluded.map((item) => ({
|
|
709
|
+
tokens: item.tokens,
|
|
710
|
+
text: item.text,
|
|
711
|
+
head: item.text.split(' ').pop() || item.text,
|
|
712
|
+
modifiers: [],
|
|
713
|
+
articles: [],
|
|
714
|
+
determiners: [],
|
|
715
|
+
candidates: [item.text]
|
|
716
|
+
}));
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
if (slotDataAny.isList && slotDataAny.items) {
|
|
720
|
+
phrase.isList = true;
|
|
721
|
+
phrase.items = slotDataAny.items.map((item) => ({
|
|
722
|
+
tokens: item.tokens,
|
|
723
|
+
text: item.text,
|
|
724
|
+
head: item.text.split(' ').pop() || item.text,
|
|
725
|
+
modifiers: [],
|
|
726
|
+
articles: [],
|
|
727
|
+
determiners: [],
|
|
728
|
+
candidates: [item.text],
|
|
729
|
+
entityId: item.entityId // ADR-089: preserve pre-resolved entity ID
|
|
730
|
+
}));
|
|
731
|
+
}
|
|
732
|
+
// ADR-089: Copy pre-resolved entity ID from pronoun resolution
|
|
733
|
+
if (slotDataAny.entityId) {
|
|
734
|
+
phrase.entityId = slotDataAny.entityId;
|
|
735
|
+
}
|
|
736
|
+
// ADR-104: Mark if this was a pronoun (for implicit inference)
|
|
737
|
+
if (slotDataAny.isPronoun) {
|
|
738
|
+
phrase.wasPronoun = true;
|
|
739
|
+
}
|
|
740
|
+
// Handle instrument slots
|
|
741
|
+
if (slotType === if_domain_1.SlotType.INSTRUMENT) {
|
|
742
|
+
instrument = phrase;
|
|
743
|
+
continue; // Don't also add to direct/indirect objects
|
|
744
|
+
}
|
|
745
|
+
// Determine where this slot should go based on the pattern
|
|
746
|
+
if (rule.pattern.includes(' with :' + slotName)) {
|
|
747
|
+
// This slot comes after 'with', put it in extras
|
|
748
|
+
extras[slotName] = phrase;
|
|
749
|
+
}
|
|
750
|
+
else if (rule.pattern.includes('give :recipient :item')) {
|
|
751
|
+
// Special case for give patterns
|
|
752
|
+
if (slotName === 'item') {
|
|
753
|
+
directObject = phrase;
|
|
754
|
+
}
|
|
755
|
+
else if (slotName === 'recipient') {
|
|
756
|
+
indirectObject = phrase;
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
else if (rule.pattern.includes('give :item to :recipient')) {
|
|
760
|
+
// Give with 'to' pattern
|
|
761
|
+
if (slotName === 'item') {
|
|
762
|
+
directObject = phrase;
|
|
763
|
+
}
|
|
764
|
+
else if (slotName === 'recipient') {
|
|
765
|
+
indirectObject = phrase;
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
else if (rule.pattern.includes('show :recipient :item')) {
|
|
769
|
+
// Special case for show recipient item
|
|
770
|
+
if (slotName === 'item') {
|
|
771
|
+
directObject = phrase;
|
|
772
|
+
}
|
|
773
|
+
else if (slotName === 'recipient') {
|
|
774
|
+
indirectObject = phrase;
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
else if (rule.pattern.includes('show :item to :recipient')) {
|
|
778
|
+
// Show with 'to' pattern
|
|
779
|
+
if (slotName === 'item') {
|
|
780
|
+
directObject = phrase;
|
|
781
|
+
}
|
|
782
|
+
else if (slotName === 'recipient') {
|
|
783
|
+
indirectObject = phrase;
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
else if (rule.pattern.includes(':item from :container')) {
|
|
787
|
+
// Take from pattern
|
|
788
|
+
if (slotName === 'item') {
|
|
789
|
+
directObject = phrase;
|
|
790
|
+
}
|
|
791
|
+
else if (slotName === 'container') {
|
|
792
|
+
indirectObject = phrase;
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
else {
|
|
796
|
+
// General case: first slot is direct object, second is indirect object
|
|
797
|
+
const slotPosition = slotPositions[slotName];
|
|
798
|
+
if (slotPosition === 0 && !directObject) {
|
|
799
|
+
directObject = phrase;
|
|
800
|
+
}
|
|
801
|
+
else if (slotPosition === 1 && !indirectObject) {
|
|
802
|
+
indirectObject = phrase;
|
|
803
|
+
}
|
|
804
|
+
else {
|
|
805
|
+
// Additional slots or already assigned slots go to extras
|
|
806
|
+
if (!extras[slotName]) {
|
|
807
|
+
extras[slotName] = phrase;
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
// Find prepositions between direct and indirect objects
|
|
813
|
+
if (directObject && indirectObject) {
|
|
814
|
+
const directObjLastToken = directObject.tokens[directObject.tokens.length - 1];
|
|
815
|
+
const indirectObjFirstToken = indirectObject.tokens[0];
|
|
816
|
+
for (let i = directObjLastToken + 1; i < indirectObjFirstToken; i++) {
|
|
817
|
+
if (tokens[i].partOfSpeech.includes(world_model_1.PartOfSpeech.PREPOSITION)) {
|
|
818
|
+
preposition = {
|
|
819
|
+
tokens: [i],
|
|
820
|
+
text: tokens[i].word
|
|
821
|
+
};
|
|
822
|
+
break;
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
// For direction commands, handle specially
|
|
827
|
+
if (rule.action === 'if.action.going' && !directObject) {
|
|
828
|
+
// Extract direction from pattern
|
|
829
|
+
const directionToken = tokens.find(t => t.candidates.some((c) => c.type === if_domain_1.PartOfSpeech.DIRECTION) ||
|
|
830
|
+
['north', 'south', 'east', 'west', 'up', 'down', 'in', 'out', 'n', 's', 'e', 'w', 'u', 'd'].includes(t.normalized));
|
|
831
|
+
if (directionToken) {
|
|
832
|
+
return {
|
|
833
|
+
tokens,
|
|
834
|
+
verb: verbPhrase,
|
|
835
|
+
pattern: 'DIRECTION_ONLY',
|
|
836
|
+
confidence: match.confidence,
|
|
837
|
+
action: rule.action,
|
|
838
|
+
direction: directionToken.normalized
|
|
839
|
+
};
|
|
840
|
+
}
|
|
841
|
+
}
|
|
842
|
+
// Determine pattern type
|
|
843
|
+
let pattern = 'VERB_ONLY';
|
|
844
|
+
if (directObject && indirectObject) {
|
|
845
|
+
if (preposition) {
|
|
846
|
+
pattern = 'VERB_NOUN_PREP_NOUN';
|
|
847
|
+
}
|
|
848
|
+
else {
|
|
849
|
+
pattern = 'VERB_NOUN_NOUN';
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
else if (directObject) {
|
|
853
|
+
pattern = 'VERB_NOUN';
|
|
854
|
+
}
|
|
855
|
+
const candidate = {
|
|
856
|
+
tokens,
|
|
857
|
+
verb: verbPhrase,
|
|
858
|
+
directObject,
|
|
859
|
+
preposition,
|
|
860
|
+
indirectObject,
|
|
861
|
+
pattern,
|
|
862
|
+
confidence: match.confidence,
|
|
863
|
+
action: rule.action,
|
|
864
|
+
// ADR-080 additions
|
|
865
|
+
textSlots,
|
|
866
|
+
instrument,
|
|
867
|
+
excluded,
|
|
868
|
+
// ADR-082 additions
|
|
869
|
+
vocabularySlots,
|
|
870
|
+
manner
|
|
871
|
+
};
|
|
872
|
+
// Add extras if present
|
|
873
|
+
if (Object.keys(extras).length > 0) {
|
|
874
|
+
candidate.extras = extras;
|
|
875
|
+
}
|
|
876
|
+
return candidate;
|
|
877
|
+
}
|
|
878
|
+
/**
|
|
879
|
+
* Register story-specific grammar rules
|
|
880
|
+
* @deprecated Use getStoryGrammar() for full API
|
|
881
|
+
*/
|
|
882
|
+
registerGrammar(pattern, action, constraints) {
|
|
883
|
+
const builder = this.grammarEngine.createBuilder().define(pattern)
|
|
884
|
+
.mapsTo(action);
|
|
885
|
+
// Apply constraints if provided
|
|
886
|
+
if (constraints) {
|
|
887
|
+
for (const [slot, constraint] of Object.entries(constraints)) {
|
|
888
|
+
builder.where(slot, constraint);
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
builder.build();
|
|
892
|
+
}
|
|
893
|
+
/**
|
|
894
|
+
* Get the grammar builder for story-specific rules.
|
|
895
|
+
* Stories use this to define custom grammar patterns.
|
|
896
|
+
*
|
|
897
|
+
* ADR-084: Returns the grammar builder directly instead of a wrapper,
|
|
898
|
+
* giving stories full access to all PatternBuilder methods.
|
|
899
|
+
*/
|
|
900
|
+
getStoryGrammar() {
|
|
901
|
+
return this.grammarEngine.createBuilder();
|
|
902
|
+
}
|
|
903
|
+
/**
|
|
904
|
+
* Get candidate interpretations for a token
|
|
905
|
+
*/
|
|
906
|
+
getTokenCandidates(word) {
|
|
907
|
+
const entries = if_domain_1.vocabularyRegistry.lookup(word);
|
|
908
|
+
return entries.map((entry) => ({
|
|
909
|
+
partOfSpeech: entry.partOfSpeech,
|
|
910
|
+
mapsTo: entry.mapsTo,
|
|
911
|
+
priority: entry.priority || 0,
|
|
912
|
+
source: entry.source,
|
|
913
|
+
metadata: entry.metadata
|
|
914
|
+
}));
|
|
915
|
+
}
|
|
916
|
+
/**
|
|
917
|
+
* Old tokenize method for compatibility
|
|
918
|
+
*/
|
|
919
|
+
tokenize(input) {
|
|
920
|
+
const richTokens = this.tokenizeRich(input);
|
|
921
|
+
// Convert to old format
|
|
922
|
+
return richTokens.map(rt => ({
|
|
923
|
+
word: rt.word,
|
|
924
|
+
normalized: rt.normalized,
|
|
925
|
+
position: rt.position,
|
|
926
|
+
candidates: this.getTokenCandidates(rt.normalized)
|
|
927
|
+
}));
|
|
928
|
+
}
|
|
929
|
+
/**
|
|
930
|
+
* Parse with errors for testing compatibility
|
|
931
|
+
*/
|
|
932
|
+
parseWithErrors(input, options) {
|
|
933
|
+
// Merge options with current parser options
|
|
934
|
+
const mergedOptions = { ...this.options, ...options };
|
|
935
|
+
// First, tokenize to check for unknown words
|
|
936
|
+
const tokens = this.tokenizeRich(input);
|
|
937
|
+
const errors = [];
|
|
938
|
+
// Check for unknown words
|
|
939
|
+
for (const token of tokens) {
|
|
940
|
+
if (token.partOfSpeech.includes(world_model_1.PartOfSpeech.UNKNOWN) &&
|
|
941
|
+
token.candidates.length === 0) {
|
|
942
|
+
errors.push({
|
|
943
|
+
type: if_domain_1.ParseErrorType.UNKNOWN_WORD,
|
|
944
|
+
message: `Unknown word: ${token.word}`,
|
|
945
|
+
words: [token.word],
|
|
946
|
+
position: token.position
|
|
947
|
+
});
|
|
948
|
+
}
|
|
949
|
+
}
|
|
950
|
+
// If we have unknown words and allowPartial is true, we should still try to parse
|
|
951
|
+
const hasUnknownWords = errors.length > 0;
|
|
952
|
+
const shouldContinue = !hasUnknownWords || mergedOptions.allowPartial;
|
|
953
|
+
if (!shouldContinue) {
|
|
954
|
+
return {
|
|
955
|
+
candidates: [],
|
|
956
|
+
errors,
|
|
957
|
+
partial: false
|
|
958
|
+
};
|
|
959
|
+
}
|
|
960
|
+
// Try to parse
|
|
961
|
+
const result = this.parse(input);
|
|
962
|
+
if (result.success) {
|
|
963
|
+
// Convert to old candidate format for test compatibility
|
|
964
|
+
const candidate = {
|
|
965
|
+
action: result.value.action,
|
|
966
|
+
originalInput: input,
|
|
967
|
+
tokens: this.tokenize(input),
|
|
968
|
+
pattern: result.value.pattern,
|
|
969
|
+
confidence: result.value.confidence
|
|
970
|
+
};
|
|
971
|
+
// Add noun information
|
|
972
|
+
if (result.value.structure.directObject) {
|
|
973
|
+
candidate.nounText = result.value.structure.directObject.text;
|
|
974
|
+
candidate.nounCandidates = result.value.structure.directObject.candidates;
|
|
975
|
+
}
|
|
976
|
+
// Add preposition
|
|
977
|
+
if (result.value.structure.preposition) {
|
|
978
|
+
candidate.preposition = result.value.structure.preposition.text;
|
|
979
|
+
}
|
|
980
|
+
// Add indirect object
|
|
981
|
+
if (result.value.structure.indirectObject) {
|
|
982
|
+
candidate.secondNounText = result.value.structure.indirectObject.text;
|
|
983
|
+
candidate.secondNounCandidates = result.value.structure.indirectObject.candidates;
|
|
984
|
+
}
|
|
985
|
+
// Filter by confidence if specified
|
|
986
|
+
const candidateConfidence = candidate.confidence || 0;
|
|
987
|
+
const candidates = mergedOptions.minConfidence !== undefined &&
|
|
988
|
+
candidateConfidence < mergedOptions.minConfidence
|
|
989
|
+
? []
|
|
990
|
+
: [candidate];
|
|
991
|
+
return {
|
|
992
|
+
candidates,
|
|
993
|
+
errors,
|
|
994
|
+
partial: hasUnknownWords && (mergedOptions.allowPartial === true)
|
|
995
|
+
};
|
|
996
|
+
}
|
|
997
|
+
else {
|
|
998
|
+
// If we already have errors from unknown words, return those
|
|
999
|
+
if (errors.length > 0) {
|
|
1000
|
+
return {
|
|
1001
|
+
candidates: [],
|
|
1002
|
+
errors,
|
|
1003
|
+
partial: mergedOptions.allowPartial === true
|
|
1004
|
+
};
|
|
1005
|
+
}
|
|
1006
|
+
// Other parsing errors
|
|
1007
|
+
const error = {
|
|
1008
|
+
type: if_domain_1.ParseErrorType.PATTERN_MISMATCH,
|
|
1009
|
+
message: result.error.message,
|
|
1010
|
+
position: result.error.position
|
|
1011
|
+
};
|
|
1012
|
+
return {
|
|
1013
|
+
candidates: [],
|
|
1014
|
+
errors: [error],
|
|
1015
|
+
partial: false
|
|
1016
|
+
};
|
|
1017
|
+
}
|
|
1018
|
+
}
|
|
1019
|
+
/**
|
|
1020
|
+
* Add a custom verb to the parser vocabulary
|
|
1021
|
+
* @param actionId The action ID this verb maps to
|
|
1022
|
+
* @param verbs Array of verb forms to recognize
|
|
1023
|
+
* @param pattern Optional grammar pattern for the verb (e.g., 'VERB_OBJ')
|
|
1024
|
+
* @param prepositions Optional prepositions this verb can use
|
|
1025
|
+
*/
|
|
1026
|
+
addVerb(actionId, verbs, pattern, prepositions) {
|
|
1027
|
+
const verbDef = {
|
|
1028
|
+
actionId,
|
|
1029
|
+
verbs,
|
|
1030
|
+
pattern: pattern || 'VERB_ONLY',
|
|
1031
|
+
prepositions
|
|
1032
|
+
};
|
|
1033
|
+
if_domain_1.vocabularyRegistry.registerDynamicVerbs([verbDef], 'story');
|
|
1034
|
+
// Also register grammar patterns for the verb
|
|
1035
|
+
const grammarBuilder = this.grammarEngine.createBuilder();
|
|
1036
|
+
for (const verb of verbs) {
|
|
1037
|
+
// Register patterns based on the pattern type
|
|
1038
|
+
if (pattern === 'VERB_OBJ' || pattern === 'VERB_NOUN') {
|
|
1039
|
+
// Register verb + object pattern
|
|
1040
|
+
grammarBuilder.define(`${verb} :object`)
|
|
1041
|
+
.mapsTo(actionId)
|
|
1042
|
+
.withPriority(150)
|
|
1043
|
+
.build();
|
|
1044
|
+
}
|
|
1045
|
+
else if (pattern === 'VERB_PREP_NOUN' && prepositions) {
|
|
1046
|
+
// Register verb + preposition + object patterns
|
|
1047
|
+
for (const prep of prepositions) {
|
|
1048
|
+
grammarBuilder.define(`${verb} ${prep} :object`)
|
|
1049
|
+
.mapsTo(actionId)
|
|
1050
|
+
.withPriority(150)
|
|
1051
|
+
.build();
|
|
1052
|
+
}
|
|
1053
|
+
}
|
|
1054
|
+
else if (pattern === 'VERB_NOUN_PREP_NOUN' && prepositions) {
|
|
1055
|
+
// Register verb + object + preposition + object patterns
|
|
1056
|
+
for (const prep of prepositions) {
|
|
1057
|
+
grammarBuilder.define(`${verb} :object1 ${prep} :object2`)
|
|
1058
|
+
.mapsTo(actionId)
|
|
1059
|
+
.withPriority(150)
|
|
1060
|
+
.build();
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
else {
|
|
1064
|
+
// Default: verb only pattern
|
|
1065
|
+
grammarBuilder.define(verb)
|
|
1066
|
+
.mapsTo(actionId)
|
|
1067
|
+
.withPriority(150)
|
|
1068
|
+
.build();
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
// Emit debug event if configured
|
|
1072
|
+
if (this.onDebugEvent) {
|
|
1073
|
+
this.onDebugEvent({
|
|
1074
|
+
id: `parser_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
|
1075
|
+
timestamp: Date.now(),
|
|
1076
|
+
subsystem: 'parser',
|
|
1077
|
+
type: 'vocabulary.add_verb',
|
|
1078
|
+
data: {
|
|
1079
|
+
actionId,
|
|
1080
|
+
verbs,
|
|
1081
|
+
pattern,
|
|
1082
|
+
prepositions
|
|
1083
|
+
}
|
|
1084
|
+
});
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
/**
|
|
1088
|
+
* Add a custom noun/synonym to the parser vocabulary
|
|
1089
|
+
* @param word The word to add
|
|
1090
|
+
* @param canonical The canonical form it maps to
|
|
1091
|
+
*/
|
|
1092
|
+
addNoun(word, canonical) {
|
|
1093
|
+
// For now, nouns are handled through the world model's entity names
|
|
1094
|
+
// This method is provided for future extension when we add a noun registry
|
|
1095
|
+
// Emit debug event if configured
|
|
1096
|
+
if (this.onDebugEvent) {
|
|
1097
|
+
this.onDebugEvent({
|
|
1098
|
+
id: `parser_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
|
1099
|
+
timestamp: Date.now(),
|
|
1100
|
+
subsystem: 'parser',
|
|
1101
|
+
type: 'vocabulary.add_noun',
|
|
1102
|
+
data: {
|
|
1103
|
+
word,
|
|
1104
|
+
canonical
|
|
1105
|
+
}
|
|
1106
|
+
});
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1109
|
+
/**
|
|
1110
|
+
* Add a custom adjective to the parser vocabulary
|
|
1111
|
+
* @param word The adjective to add
|
|
1112
|
+
*/
|
|
1113
|
+
addAdjective(word) {
|
|
1114
|
+
// Adjectives are currently recognized but not stored in a registry
|
|
1115
|
+
// This method is provided for future extension
|
|
1116
|
+
// Emit debug event if configured
|
|
1117
|
+
if (this.onDebugEvent) {
|
|
1118
|
+
this.onDebugEvent({
|
|
1119
|
+
id: `parser_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
|
1120
|
+
timestamp: Date.now(),
|
|
1121
|
+
subsystem: 'parser',
|
|
1122
|
+
type: 'vocabulary.add_adjective',
|
|
1123
|
+
data: {
|
|
1124
|
+
word
|
|
1125
|
+
}
|
|
1126
|
+
});
|
|
1127
|
+
}
|
|
1128
|
+
}
|
|
1129
|
+
/**
|
|
1130
|
+
* Add a custom preposition to the parser vocabulary
|
|
1131
|
+
* @param word The preposition to add
|
|
1132
|
+
*/
|
|
1133
|
+
addPreposition(word) {
|
|
1134
|
+
if_domain_1.vocabularyRegistry.registerPrepositions([word]);
|
|
1135
|
+
// Also register common grammar patterns that use this preposition
|
|
1136
|
+
// This allows actions like "put X <preposition> Y" to work
|
|
1137
|
+
const grammarBuilder = this.grammarEngine.createBuilder();
|
|
1138
|
+
grammarBuilder.define(`put :object ${word} :location`)
|
|
1139
|
+
.mapsTo('if.action.putting')
|
|
1140
|
+
.withPriority(150)
|
|
1141
|
+
.build();
|
|
1142
|
+
grammarBuilder.define(`place :object ${word} :location`)
|
|
1143
|
+
.mapsTo('if.action.putting')
|
|
1144
|
+
.withPriority(150)
|
|
1145
|
+
.build();
|
|
1146
|
+
// Emit debug event if configured
|
|
1147
|
+
if (this.onDebugEvent) {
|
|
1148
|
+
this.onDebugEvent({
|
|
1149
|
+
id: `parser_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
|
1150
|
+
timestamp: Date.now(),
|
|
1151
|
+
subsystem: 'parser',
|
|
1152
|
+
type: 'vocabulary.add_preposition',
|
|
1153
|
+
data: {
|
|
1154
|
+
word
|
|
1155
|
+
}
|
|
1156
|
+
});
|
|
1157
|
+
}
|
|
1158
|
+
}
|
|
1159
|
+
/**
|
|
1160
|
+
* Build a detailed parse error from failure analysis
|
|
1161
|
+
*/
|
|
1162
|
+
buildParseError(input, analysis) {
|
|
1163
|
+
const { code, messageId, context } = analysis;
|
|
1164
|
+
// Generate fallback message based on error code and context
|
|
1165
|
+
let message;
|
|
1166
|
+
switch (code) {
|
|
1167
|
+
case 'NO_VERB':
|
|
1168
|
+
message = "I beg your pardon?";
|
|
1169
|
+
break;
|
|
1170
|
+
case 'UNKNOWN_VERB':
|
|
1171
|
+
message = context.verb
|
|
1172
|
+
? `I don't know the verb "${context.verb}".`
|
|
1173
|
+
: "I don't understand that sentence.";
|
|
1174
|
+
break;
|
|
1175
|
+
case 'MISSING_OBJECT':
|
|
1176
|
+
message = context.verb
|
|
1177
|
+
? `What do you want to ${context.verb}?`
|
|
1178
|
+
: "What do you want to do that to?";
|
|
1179
|
+
break;
|
|
1180
|
+
case 'MISSING_INDIRECT':
|
|
1181
|
+
message = context.verb
|
|
1182
|
+
? `${capitalize(context.verb)} it where?`
|
|
1183
|
+
: "Where do you want to do that?";
|
|
1184
|
+
break;
|
|
1185
|
+
case 'ENTITY_NOT_FOUND':
|
|
1186
|
+
message = context.noun
|
|
1187
|
+
? `I don't see any "${context.noun}" here.`
|
|
1188
|
+
: "I don't see that here.";
|
|
1189
|
+
break;
|
|
1190
|
+
case 'SCOPE_VIOLATION':
|
|
1191
|
+
message = context.noun
|
|
1192
|
+
? `You can't reach the ${context.noun}.`
|
|
1193
|
+
: "You can't reach that.";
|
|
1194
|
+
break;
|
|
1195
|
+
case 'AMBIGUOUS_INPUT':
|
|
1196
|
+
message = context.noun
|
|
1197
|
+
? `Which ${context.noun} do you mean?`
|
|
1198
|
+
: "Which do you mean?";
|
|
1199
|
+
break;
|
|
1200
|
+
default:
|
|
1201
|
+
message = "I don't understand that.";
|
|
1202
|
+
}
|
|
1203
|
+
return {
|
|
1204
|
+
type: 'PARSE_ERROR',
|
|
1205
|
+
code,
|
|
1206
|
+
messageId,
|
|
1207
|
+
message,
|
|
1208
|
+
input,
|
|
1209
|
+
verb: context.verb,
|
|
1210
|
+
failedWord: context.noun || context.verb,
|
|
1211
|
+
slot: context.slot
|
|
1212
|
+
};
|
|
1213
|
+
}
|
|
1214
|
+
// ============================================
|
|
1215
|
+
// Pronoun Context Methods (ADR-089 Phase B)
|
|
1216
|
+
// ============================================
|
|
1217
|
+
/**
|
|
1218
|
+
* Update pronoun context after a successful command execution
|
|
1219
|
+
* Called by the engine after command execution succeeds
|
|
1220
|
+
* @param command The validated command with resolved entity IDs
|
|
1221
|
+
* @param turnNumber Current turn number
|
|
1222
|
+
*/
|
|
1223
|
+
updatePronounContext(command, turnNumber) {
|
|
1224
|
+
if (this.worldContext?.world) {
|
|
1225
|
+
this.pronounContext.updateFromCommand(command, this.worldContext.world, turnNumber);
|
|
1226
|
+
}
|
|
1227
|
+
}
|
|
1228
|
+
/**
|
|
1229
|
+
* Register an entity that was mentioned in context
|
|
1230
|
+
* Used when entities are referenced outside of standard parsing
|
|
1231
|
+
* @param entityId The entity's ID
|
|
1232
|
+
* @param text How the player referred to it
|
|
1233
|
+
* @param turnNumber Current turn number
|
|
1234
|
+
*/
|
|
1235
|
+
registerPronounEntity(entityId, text, turnNumber) {
|
|
1236
|
+
if (this.worldContext?.world) {
|
|
1237
|
+
this.pronounContext.registerEntity(entityId, text, this.worldContext.world, turnNumber);
|
|
1238
|
+
}
|
|
1239
|
+
}
|
|
1240
|
+
/**
|
|
1241
|
+
* Reset the pronoun context
|
|
1242
|
+
* Called on game restart or when context should be cleared
|
|
1243
|
+
*/
|
|
1244
|
+
resetPronounContext() {
|
|
1245
|
+
this.pronounContext.reset();
|
|
1246
|
+
}
|
|
1247
|
+
/**
|
|
1248
|
+
* Get the last successfully parsed command
|
|
1249
|
+
* Used for "again" / "g" command support
|
|
1250
|
+
*/
|
|
1251
|
+
getLastCommand() {
|
|
1252
|
+
return this.pronounContext.getLastCommand();
|
|
1253
|
+
}
|
|
1254
|
+
/**
|
|
1255
|
+
* Get the pronoun context manager (for testing/debugging)
|
|
1256
|
+
*/
|
|
1257
|
+
getPronounContextManager() {
|
|
1258
|
+
return this.pronounContext;
|
|
1259
|
+
}
|
|
1260
|
+
}
|
|
1261
|
+
exports.EnglishParser = EnglishParser;
|
|
1262
|
+
/**
|
|
1263
|
+
* Capitalize the first letter of a string
|
|
1264
|
+
*/
|
|
1265
|
+
function capitalize(s) {
|
|
1266
|
+
return s.charAt(0).toUpperCase() + s.slice(1);
|
|
1267
|
+
}
|
|
1268
|
+
//# sourceMappingURL=english-parser.js.map
|