grammar-well 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.cjs +14 -0
- package/README.md +288 -0
- package/bootstrap.ts +35 -0
- package/build/compiler/compiler.d.ts +48 -0
- package/build/compiler/compiler.js +227 -0
- package/build/compiler/compiler.js.map +1 -0
- package/build/compiler/generator.d.ts +23 -0
- package/build/compiler/generator.js +213 -0
- package/build/compiler/generator.js.map +1 -0
- package/build/compiler/import-resolver.d.ts +15 -0
- package/build/compiler/import-resolver.js +37 -0
- package/build/compiler/import-resolver.js.map +1 -0
- package/build/compiler/outputs/javascript.d.ts +3 -0
- package/build/compiler/outputs/javascript.js +29 -0
- package/build/compiler/outputs/javascript.js.map +1 -0
- package/build/compiler/outputs/json.d.ts +2 -0
- package/build/compiler/outputs/json.js +8 -0
- package/build/compiler/outputs/json.js.map +1 -0
- package/build/compiler/outputs/typescript.d.ts +2 -0
- package/build/compiler/outputs/typescript.js +108 -0
- package/build/compiler/outputs/typescript.js.map +1 -0
- package/build/grammars/gwell.d.ts +997 -0
- package/build/grammars/gwell.js +537 -0
- package/build/grammars/gwell.js.map +1 -0
- package/build/grammars/json.d.ts +151 -0
- package/build/grammars/json.js +112 -0
- package/build/grammars/json.js.map +1 -0
- package/build/grammars/number.d.ts +239 -0
- package/build/grammars/number.js +115 -0
- package/build/grammars/number.js.map +1 -0
- package/build/grammars/number.json +1 -0
- package/build/grammars/string.d.ts +116 -0
- package/build/grammars/string.js +50 -0
- package/build/grammars/string.js.map +1 -0
- package/build/grammars/string.json +1 -0
- package/build/grammars/whitespace.d.ts +51 -0
- package/build/grammars/whitespace.js +30 -0
- package/build/grammars/whitespace.js.map +1 -0
- package/build/grammars/whitespace.json +1 -0
- package/build/index.d.ts +4 -0
- package/build/index.js +21 -0
- package/build/index.js.map +1 -0
- package/build/lexers/character-lexer.d.ts +27 -0
- package/build/lexers/character-lexer.js +71 -0
- package/build/lexers/character-lexer.js.map +1 -0
- package/build/lexers/stateful-lexer.d.ts +48 -0
- package/build/lexers/stateful-lexer.js +309 -0
- package/build/lexers/stateful-lexer.js.map +1 -0
- package/build/lexers/token-buffer.d.ts +32 -0
- package/build/lexers/token-buffer.js +92 -0
- package/build/lexers/token-buffer.js.map +1 -0
- package/build/parser/algorithms/cyk.d.ts +16 -0
- package/build/parser/algorithms/cyk.js +58 -0
- package/build/parser/algorithms/cyk.js.map +1 -0
- package/build/parser/algorithms/earley.d.ts +48 -0
- package/build/parser/algorithms/earley.js +158 -0
- package/build/parser/algorithms/earley.js.map +1 -0
- package/build/parser/algorithms/lr.d.ts +10 -0
- package/build/parser/algorithms/lr.js +34 -0
- package/build/parser/algorithms/lr.js.map +1 -0
- package/build/parser/parser.d.ts +26 -0
- package/build/parser/parser.js +74 -0
- package/build/parser/parser.js.map +1 -0
- package/build/typings.d.ts +198 -0
- package/build/typings.js +3 -0
- package/build/typings.js.map +1 -0
- package/build/utility/general.d.ts +46 -0
- package/build/utility/general.js +112 -0
- package/build/utility/general.js.map +1 -0
- package/build/utility/lint.d.ts +2 -0
- package/build/utility/lint.js +28 -0
- package/build/utility/lint.js.map +1 -0
- package/build/utility/lr.d.ts +56 -0
- package/build/utility/lr.js +131 -0
- package/build/utility/lr.js.map +1 -0
- package/build/utility/text-format.d.ts +11 -0
- package/build/utility/text-format.js +84 -0
- package/build/utility/text-format.js.map +1 -0
- package/licenses/LICENSE.txt +165 -0
- package/licenses/moo.license +29 -0
- package/licenses/nearley.license +21 -0
- package/package.json +52 -0
- package/src/compiler/compiler.ts +239 -0
- package/src/compiler/generator.ts +229 -0
- package/src/compiler/import-resolver.ts +36 -0
- package/src/compiler/outputs/javascript.ts +27 -0
- package/src/compiler/outputs/json.ts +5 -0
- package/src/compiler/outputs/typescript.ts +105 -0
- package/src/grammars/gwell.gwell +278 -0
- package/src/grammars/gwell.js +539 -0
- package/src/grammars/gwell.json +1 -0
- package/src/grammars/json.gwell +75 -0
- package/src/grammars/json.js +121 -0
- package/src/grammars/json.json +1 -0
- package/src/grammars/number.gwell +20 -0
- package/src/grammars/number.js +117 -0
- package/src/grammars/number.json +1 -0
- package/src/grammars/string.gwell +15 -0
- package/src/grammars/string.js +52 -0
- package/src/grammars/string.json +1 -0
- package/src/grammars/whitespace.gwell +6 -0
- package/src/grammars/whitespace.js +32 -0
- package/src/grammars/whitespace.json +1 -0
- package/src/index.ts +4 -0
- package/src/lexers/character-lexer.ts +73 -0
- package/src/lexers/stateful-lexer.ts +335 -0
- package/src/lexers/token-buffer.ts +102 -0
- package/src/parser/algorithms/cyk.ts +74 -0
- package/src/parser/algorithms/earley.ts +193 -0
- package/src/parser/algorithms/lr.ts +37 -0
- package/src/parser/parser.ts +77 -0
- package/src/typings.ts +221 -0
- package/src/utility/general.ts +120 -0
- package/src/utility/lint.ts +26 -0
- package/src/utility/lr.ts +153 -0
- package/src/utility/text-format.ts +84 -0
- package/testing.ts +18 -0
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
import { CompiledStateDefinition, LexerStateMatchRule, ResolvedStateDefinition, LexerStateDefinition, LexerConfig } from "../typings";
|
|
2
|
+
|
|
3
|
+
export class StatefulLexer {
|
|
4
|
+
private start: string;
|
|
5
|
+
private states: { [key: string]: CompiledStateDefinition } = Object.create(null);
|
|
6
|
+
private buffer: string;
|
|
7
|
+
private stack: string[];
|
|
8
|
+
private index: number;
|
|
9
|
+
private line: number;
|
|
10
|
+
private column: number;
|
|
11
|
+
private prefetched?: RegExpExecArray;
|
|
12
|
+
private current: string;
|
|
13
|
+
private unmatched: LexerStateMatchRule;
|
|
14
|
+
private rules: LexerStateMatchRule[];
|
|
15
|
+
private regexp: RegExp;
|
|
16
|
+
private tags = new Map<string[], Set<string>>();
|
|
17
|
+
|
|
18
|
+
constructor({ states, start }: LexerConfig) {
|
|
19
|
+
ResolveStates(states, start);
|
|
20
|
+
for (const key in states) {
|
|
21
|
+
this.states[key] = {
|
|
22
|
+
regexp: CompileRegExp(states[key] as ResolvedStateDefinition),
|
|
23
|
+
rules: states[key].rules as LexerStateMatchRule[],
|
|
24
|
+
unmatched: states[key].unmatched ? { type: states[key].unmatched } as LexerStateMatchRule : null
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
this.start = start;
|
|
28
|
+
this.buffer = '';
|
|
29
|
+
this.stack = [];
|
|
30
|
+
this.feed();
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
feed(data?: string, state?: ReturnType<StatefulLexer['state']>) {
|
|
34
|
+
this.buffer = data || '';
|
|
35
|
+
this.index = 0;
|
|
36
|
+
this.line = state ? state.line : 1;
|
|
37
|
+
this.column = state ? state.column : 1;
|
|
38
|
+
this.prefetched = state?.prefetched;
|
|
39
|
+
this.set(state ? state.state : this.start);
|
|
40
|
+
this.stack = state && state.stack ? state.stack.slice() : [];
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
state() {
|
|
44
|
+
return {
|
|
45
|
+
line: this.line,
|
|
46
|
+
column: this.column,
|
|
47
|
+
state: this.current,
|
|
48
|
+
stack: this.stack.slice(),
|
|
49
|
+
prefetched: this.prefetched,
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
next() {
|
|
54
|
+
const next = this.matchNext();
|
|
55
|
+
if (!next) {
|
|
56
|
+
return
|
|
57
|
+
}
|
|
58
|
+
const { rule, text, index } = next;
|
|
59
|
+
if (!rule) {
|
|
60
|
+
throw new Error(`No matching rule for ${text}`);
|
|
61
|
+
}
|
|
62
|
+
const token = this.createToken(rule, text, index)
|
|
63
|
+
this.processRule(rule);
|
|
64
|
+
return token;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
private set(current: string) {
|
|
68
|
+
if (!current || this.current === current)
|
|
69
|
+
return
|
|
70
|
+
const info = this.states[current];
|
|
71
|
+
this.current = current;
|
|
72
|
+
this.rules = info.rules;
|
|
73
|
+
this.unmatched = info.unmatched;
|
|
74
|
+
this.regexp = info.regexp;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
private pop() {
|
|
78
|
+
this.set(this.stack.pop());
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
private goto(state: string) {
|
|
82
|
+
this.stack.push(this.current)
|
|
83
|
+
this.set(state)
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
private matchNext() {
|
|
87
|
+
if (this.index === this.buffer.length) {
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const { index, buffer } = this;
|
|
92
|
+
let text;
|
|
93
|
+
let rule;
|
|
94
|
+
let match;
|
|
95
|
+
|
|
96
|
+
this.regexp.lastIndex = index;
|
|
97
|
+
if (this.prefetched) {
|
|
98
|
+
match = this.prefetched;
|
|
99
|
+
this.prefetched = null;
|
|
100
|
+
} else {
|
|
101
|
+
match = this.regexp.exec(buffer)
|
|
102
|
+
}
|
|
103
|
+
if (match == null) {
|
|
104
|
+
rule = this.unmatched;
|
|
105
|
+
text = buffer.slice(index, buffer.length);
|
|
106
|
+
} else if (match.index !== index) {
|
|
107
|
+
rule = this.unmatched;
|
|
108
|
+
text = buffer.slice(index, match.index)
|
|
109
|
+
this.prefetched = match;
|
|
110
|
+
} else {
|
|
111
|
+
rule = this.getGroup(match)
|
|
112
|
+
text = match[0]
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return { index, rule, text }
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
private createToken(rule: LexerStateMatchRule, text: string, offset: number) {
|
|
119
|
+
const token = {
|
|
120
|
+
type: rule.type,
|
|
121
|
+
tag: this.getTags(rule.tag),
|
|
122
|
+
value: text,
|
|
123
|
+
text: text,
|
|
124
|
+
offset: offset,
|
|
125
|
+
line: this.line,
|
|
126
|
+
column: this.column,
|
|
127
|
+
state: this.current
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
for (let i = 0; i < text.length; i++) {
|
|
131
|
+
this.index++;
|
|
132
|
+
this.column++;
|
|
133
|
+
if (text[i] == '\n') {
|
|
134
|
+
this.line++;
|
|
135
|
+
this.column = 1;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
return token;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
private getTags(tags?: string[]) {
|
|
142
|
+
if (!tags)
|
|
143
|
+
return undefined;
|
|
144
|
+
if (!this.tags.has(tags))
|
|
145
|
+
this.tags.set(tags, new Set(tags));
|
|
146
|
+
return this.tags.get(tags);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
private processRule(rule: LexerStateMatchRule) {
|
|
150
|
+
if (rule.pop) {
|
|
151
|
+
let i = rule.pop === 'all' ? this.stack.length : rule.pop;
|
|
152
|
+
while (i-- > 0) {
|
|
153
|
+
this.pop();
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if (rule.set) {
|
|
158
|
+
this.set(rule.set);
|
|
159
|
+
}
|
|
160
|
+
if (rule.goto) {
|
|
161
|
+
this.goto(rule.goto);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
if (rule.inset) {
|
|
165
|
+
let i = rule.inset;
|
|
166
|
+
while (--i >= 0) {
|
|
167
|
+
this.goto(this.current);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
private getGroup(match): LexerStateMatchRule {
|
|
173
|
+
for (let i = 0; i < this.rules.length; i++) {
|
|
174
|
+
if (match[i + 1] !== undefined) {
|
|
175
|
+
return this.rules[i];
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
throw new Error('Cannot find token type for matched text')
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
class RegexLib {
|
|
183
|
+
|
|
184
|
+
static IsRegex(o: any) {
|
|
185
|
+
return o instanceof RegExp
|
|
186
|
+
}
|
|
187
|
+
static Escape(s: string) {
|
|
188
|
+
return s.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&')
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
static HasGroups(s: string) {
|
|
192
|
+
return (new RegExp('|' + s)).exec('').length > 1
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
static Capture(source: string) {
|
|
196
|
+
return '(' + source + ')'
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
static Join(regexps: string[]) {
|
|
200
|
+
if (!regexps.length)
|
|
201
|
+
return '(?!)';
|
|
202
|
+
const source = regexps.map((s) => `(?:${s})`).join('|');
|
|
203
|
+
return `(?:${source})`;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
static Source(search: string | RegExp) {
|
|
207
|
+
if (typeof search === 'string') {
|
|
208
|
+
return `(?:${RegexLib.Escape(search)})`;
|
|
209
|
+
}
|
|
210
|
+
if (RegexLib.IsRegex(search)) {
|
|
211
|
+
return search.source;
|
|
212
|
+
}
|
|
213
|
+
throw new Error('Not a pattern: ' + search)
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
function CompileRegExp(state: ResolvedStateDefinition): RegExp {
|
|
219
|
+
const rules = [];
|
|
220
|
+
const subexpressions = [];
|
|
221
|
+
|
|
222
|
+
let isUnicode = null;
|
|
223
|
+
let isCI = null;
|
|
224
|
+
for (const options of state.rules) {
|
|
225
|
+
if (RegexLib.IsRegex(options.when)) {
|
|
226
|
+
const when = options.when as RegExp;
|
|
227
|
+
if (isUnicode === null) {
|
|
228
|
+
isUnicode = when.unicode
|
|
229
|
+
} else if (isUnicode !== when.unicode && !state.unmatched) {
|
|
230
|
+
throw new Error(`Inconsistent Regex Flag /u in state: ${state.name}`);
|
|
231
|
+
}
|
|
232
|
+
if (isCI === null) {
|
|
233
|
+
isCI = when.ignoreCase
|
|
234
|
+
} else if (isCI !== when.ignoreCase) {
|
|
235
|
+
throw new Error(`Inconsistent Regex Flag /i in state: ${state.name}`);
|
|
236
|
+
}
|
|
237
|
+
} else {
|
|
238
|
+
if (isCI == null) {
|
|
239
|
+
isCI = false;
|
|
240
|
+
} else if (isCI != false) {
|
|
241
|
+
throw new Error(`Inconsistent Regex Flag /i in state: ${state.name}`);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
rules.push(options);
|
|
246
|
+
const pat = RegexLib.Source(options.when);
|
|
247
|
+
const regexp = new RegExp(pat)
|
|
248
|
+
if (regexp.test("")) {
|
|
249
|
+
throw new Error("RegExp matches empty string: " + regexp)
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
if (RegexLib.HasGroups(pat)) {
|
|
253
|
+
throw new Error("RegExp has capture groups: " + regexp + "\nUse (?: … ) instead")
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
subexpressions.push(RegexLib.Capture(pat))
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
let flags = !state.unmatched ? 'ym' : 'gm';
|
|
260
|
+
if (isUnicode === true)
|
|
261
|
+
flags += "u"
|
|
262
|
+
if (isCI === true)
|
|
263
|
+
flags += "i"
|
|
264
|
+
return new RegExp(RegexLib.Join(subexpressions), flags);
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
export function ResolveStates(states: { [key: string]: LexerStateDefinition }, start: string) {
|
|
268
|
+
|
|
269
|
+
const resolved = new Set<string>();
|
|
270
|
+
const resolving = new Set<string>();
|
|
271
|
+
const chain = new Set<string>();
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
ResolveRuleImports(start, states, resolved, resolving, chain);
|
|
275
|
+
for (const key in states) {
|
|
276
|
+
if (!resolved.has(key)) {
|
|
277
|
+
delete states[key];
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
return states;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
function ResolveRuleImports(name: string, states: { [key: string]: LexerStateDefinition }, resolved: Set<string>, resolving: Set<string>, chain: Set<string>) {
|
|
284
|
+
if (chain.has(name))
|
|
285
|
+
throw new Error(`Can not resolve circular import of ${name}`);
|
|
286
|
+
if (!states[name])
|
|
287
|
+
throw new Error(`Can not import unknown state ${name}`);
|
|
288
|
+
if (resolved.has(name) || resolving.has(name))
|
|
289
|
+
return;
|
|
290
|
+
const state = states[name];
|
|
291
|
+
const rules = new UniqueRules();
|
|
292
|
+
chain.add(name);
|
|
293
|
+
resolving.add(name);
|
|
294
|
+
for (let i = 0; i < state.rules.length; i++) {
|
|
295
|
+
const rule = state.rules[i];
|
|
296
|
+
if ("import" in rule) {
|
|
297
|
+
for (const ref of rule.import) {
|
|
298
|
+
ResolveRuleImports(ref, states, resolved, resolving, chain);
|
|
299
|
+
rules.push(...states[ref].rules as LexerStateMatchRule[]);
|
|
300
|
+
}
|
|
301
|
+
} else {
|
|
302
|
+
rules.push(rule);
|
|
303
|
+
if ("set" in rule && !resolving.has(rule.set)) {
|
|
304
|
+
ResolveRuleImports(rule.set, states, resolved, resolving, new Set());
|
|
305
|
+
}
|
|
306
|
+
if ("goto" in rule && !resolving.has(rule.goto)) {
|
|
307
|
+
ResolveRuleImports(rule.goto, states, resolved, resolving, new Set());
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
state.rules = rules.rules;
|
|
312
|
+
chain.delete(name);
|
|
313
|
+
resolved.add(name);
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
class UniqueRules {
|
|
317
|
+
private regexps = new Set<string>();
|
|
318
|
+
private strings = new Set<string>();
|
|
319
|
+
rules: LexerStateMatchRule[] = [];
|
|
320
|
+
|
|
321
|
+
push(...rules: LexerStateMatchRule[]) {
|
|
322
|
+
for (const rule of rules) {
|
|
323
|
+
if (RegexLib.IsRegex(rule.when)) {
|
|
324
|
+
if (!this.regexps.has((rule.when as RegExp).source)) {
|
|
325
|
+
this.rules.push(rule);
|
|
326
|
+
}
|
|
327
|
+
} else {
|
|
328
|
+
if (!this.strings.has(rule.when as string)) {
|
|
329
|
+
this.rules.push(rule);
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { Lexer, TQRestorePoint, LexerToken } from '../typings';
|
|
2
|
+
|
|
3
|
+
export class TokenBuffer {
|
|
4
|
+
private history: LexerToken[] = [];
|
|
5
|
+
private queued: string = '';
|
|
6
|
+
|
|
7
|
+
private $historyIndex = -1;
|
|
8
|
+
|
|
9
|
+
get offset() { return this.active?.offset || 0 }
|
|
10
|
+
get line() { return this.active?.line || 0 }
|
|
11
|
+
get column() { return this.active?.column || 0; }
|
|
12
|
+
get active() { return this.history[this.$historyIndex]; }
|
|
13
|
+
|
|
14
|
+
get state(): TQRestorePoint {
|
|
15
|
+
return { historyIndex: this.$historyIndex, offset: this.offset };
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
constructor(private lexer: Lexer) { }
|
|
19
|
+
|
|
20
|
+
reset(buffer: string) {
|
|
21
|
+
this.lexer.feed(buffer);
|
|
22
|
+
this.history = [];
|
|
23
|
+
this.$historyIndex = -1;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
restore(state: TQRestorePoint) {
|
|
27
|
+
if (this.history[state.historyIndex].offset != state.offset) {
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
this.$historyIndex = state.historyIndex;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
feed(buffer: string, flush?: boolean) {
|
|
34
|
+
this.queued += buffer;
|
|
35
|
+
if (flush) {
|
|
36
|
+
this.flush();
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
flush() {
|
|
41
|
+
this.history = this.history.slice(this.$historyIndex);
|
|
42
|
+
this.$historyIndex = 0;
|
|
43
|
+
if (this.lexer.flush) {
|
|
44
|
+
this.lexer.flush()
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
previous() {
|
|
49
|
+
if (this.$historyIndex > 0) {
|
|
50
|
+
return this.history[--this.$historyIndex];
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
next() {
|
|
55
|
+
if (this.$historyIndex + 1 >= this.history.length) {
|
|
56
|
+
this.lexerNext();
|
|
57
|
+
}
|
|
58
|
+
if (this.$historyIndex + 1 < this.history.length) {
|
|
59
|
+
return this.history[++this.$historyIndex];
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
peek(offset: number) {
|
|
64
|
+
offset += this.$historyIndex;
|
|
65
|
+
while ((offset >= this.history.length) && this.lexerNext()) {
|
|
66
|
+
// Seeking
|
|
67
|
+
}
|
|
68
|
+
if (offset >= 0 && offset < this.history.length)
|
|
69
|
+
return this.history[offset];
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
private lexerNext() {
|
|
73
|
+
let token = this.lexer.next();
|
|
74
|
+
|
|
75
|
+
if (typeof token === 'undefined' && this.queued) {
|
|
76
|
+
this.lexer.feed(this.queued, this.$historyIndex >= 0 ? this.lexer.state() : undefined);
|
|
77
|
+
this.queued = '';
|
|
78
|
+
token = this.lexer.next();
|
|
79
|
+
}
|
|
80
|
+
if (token)
|
|
81
|
+
this.history.push(token);
|
|
82
|
+
return token;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
[Symbol.iterator]() {
|
|
86
|
+
return new TokenIterator(this)
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
class TokenIterator {
|
|
92
|
+
constructor(private buffer: TokenBuffer) { }
|
|
93
|
+
|
|
94
|
+
next() {
|
|
95
|
+
const token = this.buffer.next()
|
|
96
|
+
return { value: token, done: !token }
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
[Symbol.iterator]() {
|
|
100
|
+
return this
|
|
101
|
+
}
|
|
102
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { TokenBuffer } from "../../lexers/token-buffer";
|
|
2
|
+
import { GrammarRule, GrammarRuleSymbol, LanguageDefinition, LexerToken } from "../../typings";
|
|
3
|
+
import { Matrix } from "../../utility/general";
|
|
4
|
+
import { ParserUtility } from "../parser";
|
|
5
|
+
|
|
6
|
+
export function CYK(language: LanguageDefinition & { tokens: TokenBuffer }, _options = {}) {
|
|
7
|
+
const { grammar, tokens } = language;
|
|
8
|
+
|
|
9
|
+
const terminals: GrammarRule[] = [];
|
|
10
|
+
const nonTerminals: GrammarRule[] = [];
|
|
11
|
+
|
|
12
|
+
for (const name in grammar.rules) {
|
|
13
|
+
for (const rule of grammar.rules[name]) {
|
|
14
|
+
const { symbols } = rule;
|
|
15
|
+
if (ParserUtility.SymbolIsTerminal(symbols[0])) {
|
|
16
|
+
terminals.push(rule);
|
|
17
|
+
} else {
|
|
18
|
+
nonTerminals.push(rule);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
let currentTokenIndex = -1;
|
|
24
|
+
const chart = new Matrix(0, 0, () => new Map<GrammarRuleSymbol, Terminal | NonTerminal>());
|
|
25
|
+
for (const token of tokens) {
|
|
26
|
+
currentTokenIndex++;
|
|
27
|
+
chart.resize(currentTokenIndex + 2, currentTokenIndex + 2);
|
|
28
|
+
for (const rule of terminals) {
|
|
29
|
+
if (ParserUtility.TokenMatchesSymbol(token, rule.symbols[0])) {
|
|
30
|
+
chart.get(currentTokenIndex, currentTokenIndex).set(rule.name, { rule, token })
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
for (let floor = currentTokenIndex; floor >= 0; floor--) {
|
|
36
|
+
for (let inner = floor; inner <= currentTokenIndex; inner++) {
|
|
37
|
+
const leftCell = chart.get(floor, inner);
|
|
38
|
+
const rightCell = chart.get(inner + 1, currentTokenIndex);
|
|
39
|
+
|
|
40
|
+
for (const rule of nonTerminals) {
|
|
41
|
+
const { symbols: [leftSymbol, rightSymbol] } = rule;
|
|
42
|
+
const left: Terminal | NonTerminal = leftCell.get(leftSymbol);
|
|
43
|
+
const right: Terminal | NonTerminal = rightCell.get(rightSymbol);
|
|
44
|
+
if (left && right) {
|
|
45
|
+
chart.get(floor, currentTokenIndex).set(rule.name, { rule, left, right });
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const results = Array.from(chart.get(0, currentTokenIndex).values()).map(v => GetValue(v));
|
|
53
|
+
return { results };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function GetValue(ref: Terminal | NonTerminal) {
|
|
57
|
+
if (!ref)
|
|
58
|
+
return;
|
|
59
|
+
if ('token' in ref) {
|
|
60
|
+
return ParserUtility.PostProcess(ref.rule, [ref.token]);
|
|
61
|
+
}
|
|
62
|
+
return ParserUtility.PostProcess(ref.rule, [GetValue(ref.left), GetValue(ref.right)])
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export interface NonTerminal {
|
|
66
|
+
rule: GrammarRule;
|
|
67
|
+
left: NonTerminal | Terminal;
|
|
68
|
+
right: NonTerminal | Terminal;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export interface Terminal {
|
|
72
|
+
rule: GrammarRule;
|
|
73
|
+
token: LexerToken;
|
|
74
|
+
}
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
import { Dictionary, GrammarRule, LanguageDefinition } from "../../typings";
|
|
2
|
+
import { TokenBuffer } from "../../lexers/token-buffer";
|
|
3
|
+
import { TextFormatter } from "../../utility/text-format";
|
|
4
|
+
import { ParserUtility } from "../parser";
|
|
5
|
+
|
|
6
|
+
export interface EarleyParserOptions {
|
|
7
|
+
keepHistory?: boolean;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export function Earley(language: LanguageDefinition & { tokens: TokenBuffer }, options: EarleyParserOptions = {}) {
|
|
11
|
+
const { tokens } = language;
|
|
12
|
+
const { rules, start } = language.grammar;
|
|
13
|
+
const column = new Column(rules, 0);
|
|
14
|
+
const table: Column[] = [column];
|
|
15
|
+
column.wants[start] = [];
|
|
16
|
+
column.predict(start);
|
|
17
|
+
column.process();
|
|
18
|
+
|
|
19
|
+
let current: number = 0;
|
|
20
|
+
|
|
21
|
+
for (const token of tokens) {
|
|
22
|
+
const previousColumn: Column = table[current];
|
|
23
|
+
|
|
24
|
+
if (!(options.keepHistory)) {
|
|
25
|
+
delete table[current - 1];
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
current++;
|
|
29
|
+
|
|
30
|
+
const nextColumn = new Column(rules, current);
|
|
31
|
+
table.push(nextColumn);
|
|
32
|
+
|
|
33
|
+
const literal = token.value;
|
|
34
|
+
const data = token;
|
|
35
|
+
nextColumn.data = literal;
|
|
36
|
+
const { scannable } = previousColumn;
|
|
37
|
+
for (let w = scannable.length; w--;) {
|
|
38
|
+
const state = scannable[w];
|
|
39
|
+
const symbol = state.rule.symbols[state.dot];
|
|
40
|
+
if (ParserUtility.TokenMatchesSymbol(token, symbol)) {
|
|
41
|
+
const next = state.nextState({ data, token, isToken: true, reference: current - 1 });
|
|
42
|
+
nextColumn.states.push(next);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
nextColumn.process();
|
|
47
|
+
|
|
48
|
+
if (nextColumn.states.length === 0) {
|
|
49
|
+
throw TextFormatter.UnexpectedToken(tokens, previousColumn.expects());
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const results = [];
|
|
54
|
+
const { states } = table[table.length - 1];
|
|
55
|
+
for (const { rule: { name, symbols }, dot, reference, data } of states) {
|
|
56
|
+
if (name === start && dot === symbols.length && reference == 0) {
|
|
57
|
+
results.push(data);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return { results, info: { table } };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class Column {
|
|
65
|
+
data: any;
|
|
66
|
+
states: State[] = [];
|
|
67
|
+
wants: Dictionary<State[]> = Object.create(null);// states indexed by the non-terminal they expect
|
|
68
|
+
scannable: State[] = [];// list of states that expect a token
|
|
69
|
+
completed: Dictionary<State[]> = Object.create(null); // states that are nullable
|
|
70
|
+
|
|
71
|
+
constructor(
|
|
72
|
+
private rules: Dictionary<GrammarRule[]>,
|
|
73
|
+
public index: number
|
|
74
|
+
) { }
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
process() {
|
|
78
|
+
let w = 0;
|
|
79
|
+
let state: State;
|
|
80
|
+
|
|
81
|
+
// eslint-disable-next-line no-cond-assign
|
|
82
|
+
while (state = this.states[w++]) { // nb. we push() during iteration
|
|
83
|
+
if (state.isComplete) {
|
|
84
|
+
state.finish();
|
|
85
|
+
const { wantedBy } = state;
|
|
86
|
+
for (let i = wantedBy.length; i--;) { // this line is hot
|
|
87
|
+
this.complete(wantedBy[i], state);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// special-case nullables
|
|
91
|
+
if (state.reference === this.index) {
|
|
92
|
+
const { name } = state.rule;
|
|
93
|
+
this.completed[name] = this.completed[name] || [];
|
|
94
|
+
this.completed[name].push(state);
|
|
95
|
+
}
|
|
96
|
+
} else {
|
|
97
|
+
const exp = state.rule.symbols[state.dot];
|
|
98
|
+
if (typeof exp !== 'string') {
|
|
99
|
+
this.scannable.push(state);
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// predict
|
|
104
|
+
if (this.wants[exp]) {
|
|
105
|
+
this.wants[exp].push(state);
|
|
106
|
+
|
|
107
|
+
if (this.completed[exp]) {
|
|
108
|
+
for (const right of this.completed[exp]) {
|
|
109
|
+
this.complete(state, right);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
} else {
|
|
113
|
+
this.wants[exp] = [state];
|
|
114
|
+
this.predict(exp);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
predict(exp: string) {
|
|
121
|
+
if (!this.rules[exp])
|
|
122
|
+
return;
|
|
123
|
+
|
|
124
|
+
for (const rule of this.rules[exp]) {
|
|
125
|
+
this.states.push(new State(rule, 0, this.index, this.wants[exp]));
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
expects(): GrammarRule[] {
|
|
130
|
+
return this.states
|
|
131
|
+
.filter((state) => {
|
|
132
|
+
const nextSymbol = state.rule.symbols[state.dot];
|
|
133
|
+
return nextSymbol && typeof nextSymbol !== "string";
|
|
134
|
+
})
|
|
135
|
+
.map(v => ({ ...v.rule, index: v.dot }));
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
private complete(left: State, right: State) {
|
|
139
|
+
const copy = left.nextState(right);
|
|
140
|
+
this.states.push(copy);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
class State {
|
|
147
|
+
isComplete: boolean;
|
|
148
|
+
data: any = [];
|
|
149
|
+
left: State;
|
|
150
|
+
right: State | StateToken;
|
|
151
|
+
constructor(
|
|
152
|
+
public rule: GrammarRule,
|
|
153
|
+
public dot: number,
|
|
154
|
+
public reference: number,
|
|
155
|
+
public wantedBy: State[]
|
|
156
|
+
) {
|
|
157
|
+
this.isComplete = this.dot === rule.symbols.length;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
nextState(child: State | StateToken) {
|
|
161
|
+
const state = new State(this.rule, this.dot + 1, this.reference, this.wantedBy);
|
|
162
|
+
state.left = this;
|
|
163
|
+
state.right = child;
|
|
164
|
+
if (state.isComplete) {
|
|
165
|
+
state.data = state.build();
|
|
166
|
+
state.right = undefined;
|
|
167
|
+
}
|
|
168
|
+
return state;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
finish() {
|
|
173
|
+
this.data = ParserUtility.PostProcess(this.rule, this.data, { reference: this.reference, dot: this.dot });
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
protected build() {
|
|
177
|
+
const children = [];
|
|
178
|
+
// eslint-disable-next-line @typescript-eslint/no-this-alias
|
|
179
|
+
let node: State = this;
|
|
180
|
+
do {
|
|
181
|
+
children[node.dot - 1] = node.right.data;
|
|
182
|
+
node = node.left;
|
|
183
|
+
} while (node.left);
|
|
184
|
+
return children;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
interface StateToken {
|
|
189
|
+
data: any,
|
|
190
|
+
token: any,
|
|
191
|
+
isToken: boolean,
|
|
192
|
+
reference: number
|
|
193
|
+
}
|