grammar-well 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/.eslintrc.cjs +14 -0
  2. package/README.md +288 -0
  3. package/bootstrap.ts +35 -0
  4. package/build/compiler/compiler.d.ts +48 -0
  5. package/build/compiler/compiler.js +227 -0
  6. package/build/compiler/compiler.js.map +1 -0
  7. package/build/compiler/generator.d.ts +23 -0
  8. package/build/compiler/generator.js +213 -0
  9. package/build/compiler/generator.js.map +1 -0
  10. package/build/compiler/import-resolver.d.ts +15 -0
  11. package/build/compiler/import-resolver.js +37 -0
  12. package/build/compiler/import-resolver.js.map +1 -0
  13. package/build/compiler/outputs/javascript.d.ts +3 -0
  14. package/build/compiler/outputs/javascript.js +29 -0
  15. package/build/compiler/outputs/javascript.js.map +1 -0
  16. package/build/compiler/outputs/json.d.ts +2 -0
  17. package/build/compiler/outputs/json.js +8 -0
  18. package/build/compiler/outputs/json.js.map +1 -0
  19. package/build/compiler/outputs/typescript.d.ts +2 -0
  20. package/build/compiler/outputs/typescript.js +108 -0
  21. package/build/compiler/outputs/typescript.js.map +1 -0
  22. package/build/grammars/gwell.d.ts +997 -0
  23. package/build/grammars/gwell.js +537 -0
  24. package/build/grammars/gwell.js.map +1 -0
  25. package/build/grammars/json.d.ts +151 -0
  26. package/build/grammars/json.js +112 -0
  27. package/build/grammars/json.js.map +1 -0
  28. package/build/grammars/number.d.ts +239 -0
  29. package/build/grammars/number.js +115 -0
  30. package/build/grammars/number.js.map +1 -0
  31. package/build/grammars/number.json +1 -0
  32. package/build/grammars/string.d.ts +116 -0
  33. package/build/grammars/string.js +50 -0
  34. package/build/grammars/string.js.map +1 -0
  35. package/build/grammars/string.json +1 -0
  36. package/build/grammars/whitespace.d.ts +51 -0
  37. package/build/grammars/whitespace.js +30 -0
  38. package/build/grammars/whitespace.js.map +1 -0
  39. package/build/grammars/whitespace.json +1 -0
  40. package/build/index.d.ts +4 -0
  41. package/build/index.js +21 -0
  42. package/build/index.js.map +1 -0
  43. package/build/lexers/character-lexer.d.ts +27 -0
  44. package/build/lexers/character-lexer.js +71 -0
  45. package/build/lexers/character-lexer.js.map +1 -0
  46. package/build/lexers/stateful-lexer.d.ts +48 -0
  47. package/build/lexers/stateful-lexer.js +309 -0
  48. package/build/lexers/stateful-lexer.js.map +1 -0
  49. package/build/lexers/token-buffer.d.ts +32 -0
  50. package/build/lexers/token-buffer.js +92 -0
  51. package/build/lexers/token-buffer.js.map +1 -0
  52. package/build/parser/algorithms/cyk.d.ts +16 -0
  53. package/build/parser/algorithms/cyk.js +58 -0
  54. package/build/parser/algorithms/cyk.js.map +1 -0
  55. package/build/parser/algorithms/earley.d.ts +48 -0
  56. package/build/parser/algorithms/earley.js +158 -0
  57. package/build/parser/algorithms/earley.js.map +1 -0
  58. package/build/parser/algorithms/lr.d.ts +10 -0
  59. package/build/parser/algorithms/lr.js +34 -0
  60. package/build/parser/algorithms/lr.js.map +1 -0
  61. package/build/parser/parser.d.ts +26 -0
  62. package/build/parser/parser.js +74 -0
  63. package/build/parser/parser.js.map +1 -0
  64. package/build/typings.d.ts +198 -0
  65. package/build/typings.js +3 -0
  66. package/build/typings.js.map +1 -0
  67. package/build/utility/general.d.ts +46 -0
  68. package/build/utility/general.js +112 -0
  69. package/build/utility/general.js.map +1 -0
  70. package/build/utility/lint.d.ts +2 -0
  71. package/build/utility/lint.js +28 -0
  72. package/build/utility/lint.js.map +1 -0
  73. package/build/utility/lr.d.ts +56 -0
  74. package/build/utility/lr.js +131 -0
  75. package/build/utility/lr.js.map +1 -0
  76. package/build/utility/text-format.d.ts +11 -0
  77. package/build/utility/text-format.js +84 -0
  78. package/build/utility/text-format.js.map +1 -0
  79. package/licenses/LICENSE.txt +165 -0
  80. package/licenses/moo.license +29 -0
  81. package/licenses/nearley.license +21 -0
  82. package/package.json +52 -0
  83. package/src/compiler/compiler.ts +239 -0
  84. package/src/compiler/generator.ts +229 -0
  85. package/src/compiler/import-resolver.ts +36 -0
  86. package/src/compiler/outputs/javascript.ts +27 -0
  87. package/src/compiler/outputs/json.ts +5 -0
  88. package/src/compiler/outputs/typescript.ts +105 -0
  89. package/src/grammars/gwell.gwell +278 -0
  90. package/src/grammars/gwell.js +539 -0
  91. package/src/grammars/gwell.json +1 -0
  92. package/src/grammars/json.gwell +75 -0
  93. package/src/grammars/json.js +121 -0
  94. package/src/grammars/json.json +1 -0
  95. package/src/grammars/number.gwell +20 -0
  96. package/src/grammars/number.js +117 -0
  97. package/src/grammars/number.json +1 -0
  98. package/src/grammars/string.gwell +15 -0
  99. package/src/grammars/string.js +52 -0
  100. package/src/grammars/string.json +1 -0
  101. package/src/grammars/whitespace.gwell +6 -0
  102. package/src/grammars/whitespace.js +32 -0
  103. package/src/grammars/whitespace.json +1 -0
  104. package/src/index.ts +4 -0
  105. package/src/lexers/character-lexer.ts +73 -0
  106. package/src/lexers/stateful-lexer.ts +335 -0
  107. package/src/lexers/token-buffer.ts +102 -0
  108. package/src/parser/algorithms/cyk.ts +74 -0
  109. package/src/parser/algorithms/earley.ts +193 -0
  110. package/src/parser/algorithms/lr.ts +37 -0
  111. package/src/parser/parser.ts +77 -0
  112. package/src/typings.ts +221 -0
  113. package/src/utility/general.ts +120 -0
  114. package/src/utility/lint.ts +26 -0
  115. package/src/utility/lr.ts +153 -0
  116. package/src/utility/text-format.ts +84 -0
  117. package/testing.ts +18 -0
@@ -0,0 +1,335 @@
1
+ import { CompiledStateDefinition, LexerStateMatchRule, ResolvedStateDefinition, LexerStateDefinition, LexerConfig } from "../typings";
2
+
3
+ export class StatefulLexer {
4
+ private start: string;
5
+ private states: { [key: string]: CompiledStateDefinition } = Object.create(null);
6
+ private buffer: string;
7
+ private stack: string[];
8
+ private index: number;
9
+ private line: number;
10
+ private column: number;
11
+ private prefetched?: RegExpExecArray;
12
+ private current: string;
13
+ private unmatched: LexerStateMatchRule;
14
+ private rules: LexerStateMatchRule[];
15
+ private regexp: RegExp;
16
+ private tags = new Map<string[], Set<string>>();
17
+
18
+ constructor({ states, start }: LexerConfig) {
19
+ ResolveStates(states, start);
20
+ for (const key in states) {
21
+ this.states[key] = {
22
+ regexp: CompileRegExp(states[key] as ResolvedStateDefinition),
23
+ rules: states[key].rules as LexerStateMatchRule[],
24
+ unmatched: states[key].unmatched ? { type: states[key].unmatched } as LexerStateMatchRule : null
25
+ };
26
+ }
27
+ this.start = start;
28
+ this.buffer = '';
29
+ this.stack = [];
30
+ this.feed();
31
+ }
32
+
33
+ feed(data?: string, state?: ReturnType<StatefulLexer['state']>) {
34
+ this.buffer = data || '';
35
+ this.index = 0;
36
+ this.line = state ? state.line : 1;
37
+ this.column = state ? state.column : 1;
38
+ this.prefetched = state?.prefetched;
39
+ this.set(state ? state.state : this.start);
40
+ this.stack = state && state.stack ? state.stack.slice() : [];
41
+ }
42
+
43
+ state() {
44
+ return {
45
+ line: this.line,
46
+ column: this.column,
47
+ state: this.current,
48
+ stack: this.stack.slice(),
49
+ prefetched: this.prefetched,
50
+ }
51
+ }
52
+
53
+ next() {
54
+ const next = this.matchNext();
55
+ if (!next) {
56
+ return
57
+ }
58
+ const { rule, text, index } = next;
59
+ if (!rule) {
60
+ throw new Error(`No matching rule for ${text}`);
61
+ }
62
+ const token = this.createToken(rule, text, index)
63
+ this.processRule(rule);
64
+ return token;
65
+ }
66
+
67
+ private set(current: string) {
68
+ if (!current || this.current === current)
69
+ return
70
+ const info = this.states[current];
71
+ this.current = current;
72
+ this.rules = info.rules;
73
+ this.unmatched = info.unmatched;
74
+ this.regexp = info.regexp;
75
+ }
76
+
77
+ private pop() {
78
+ this.set(this.stack.pop());
79
+ }
80
+
81
+ private goto(state: string) {
82
+ this.stack.push(this.current)
83
+ this.set(state)
84
+ }
85
+
86
+ private matchNext() {
87
+ if (this.index === this.buffer.length) {
88
+ return;
89
+ }
90
+
91
+ const { index, buffer } = this;
92
+ let text;
93
+ let rule;
94
+ let match;
95
+
96
+ this.regexp.lastIndex = index;
97
+ if (this.prefetched) {
98
+ match = this.prefetched;
99
+ this.prefetched = null;
100
+ } else {
101
+ match = this.regexp.exec(buffer)
102
+ }
103
+ if (match == null) {
104
+ rule = this.unmatched;
105
+ text = buffer.slice(index, buffer.length);
106
+ } else if (match.index !== index) {
107
+ rule = this.unmatched;
108
+ text = buffer.slice(index, match.index)
109
+ this.prefetched = match;
110
+ } else {
111
+ rule = this.getGroup(match)
112
+ text = match[0]
113
+ }
114
+
115
+ return { index, rule, text }
116
+ }
117
+
118
+ private createToken(rule: LexerStateMatchRule, text: string, offset: number) {
119
+ const token = {
120
+ type: rule.type,
121
+ tag: this.getTags(rule.tag),
122
+ value: text,
123
+ text: text,
124
+ offset: offset,
125
+ line: this.line,
126
+ column: this.column,
127
+ state: this.current
128
+ }
129
+
130
+ for (let i = 0; i < text.length; i++) {
131
+ this.index++;
132
+ this.column++;
133
+ if (text[i] == '\n') {
134
+ this.line++;
135
+ this.column = 1;
136
+ }
137
+ }
138
+ return token;
139
+ }
140
+
141
+ private getTags(tags?: string[]) {
142
+ if (!tags)
143
+ return undefined;
144
+ if (!this.tags.has(tags))
145
+ this.tags.set(tags, new Set(tags));
146
+ return this.tags.get(tags);
147
+ }
148
+
149
+ private processRule(rule: LexerStateMatchRule) {
150
+ if (rule.pop) {
151
+ let i = rule.pop === 'all' ? this.stack.length : rule.pop;
152
+ while (i-- > 0) {
153
+ this.pop();
154
+ }
155
+ }
156
+
157
+ if (rule.set) {
158
+ this.set(rule.set);
159
+ }
160
+ if (rule.goto) {
161
+ this.goto(rule.goto);
162
+ }
163
+
164
+ if (rule.inset) {
165
+ let i = rule.inset;
166
+ while (--i >= 0) {
167
+ this.goto(this.current);
168
+ }
169
+ }
170
+ }
171
+
172
+ private getGroup(match): LexerStateMatchRule {
173
+ for (let i = 0; i < this.rules.length; i++) {
174
+ if (match[i + 1] !== undefined) {
175
+ return this.rules[i];
176
+ }
177
+ }
178
+ throw new Error('Cannot find token type for matched text')
179
+ }
180
+ }
181
+
182
+ class RegexLib {
183
+
184
+ static IsRegex(o: any) {
185
+ return o instanceof RegExp
186
+ }
187
+ static Escape(s: string) {
188
+ return s.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&')
189
+ }
190
+
191
+ static HasGroups(s: string) {
192
+ return (new RegExp('|' + s)).exec('').length > 1
193
+ }
194
+
195
+ static Capture(source: string) {
196
+ return '(' + source + ')'
197
+ }
198
+
199
+ static Join(regexps: string[]) {
200
+ if (!regexps.length)
201
+ return '(?!)';
202
+ const source = regexps.map((s) => `(?:${s})`).join('|');
203
+ return `(?:${source})`;
204
+ }
205
+
206
+ static Source(search: string | RegExp) {
207
+ if (typeof search === 'string') {
208
+ return `(?:${RegexLib.Escape(search)})`;
209
+ }
210
+ if (RegexLib.IsRegex(search)) {
211
+ return search.source;
212
+ }
213
+ throw new Error('Not a pattern: ' + search)
214
+ }
215
+
216
+ }
217
+
218
+ function CompileRegExp(state: ResolvedStateDefinition): RegExp {
219
+ const rules = [];
220
+ const subexpressions = [];
221
+
222
+ let isUnicode = null;
223
+ let isCI = null;
224
+ for (const options of state.rules) {
225
+ if (RegexLib.IsRegex(options.when)) {
226
+ const when = options.when as RegExp;
227
+ if (isUnicode === null) {
228
+ isUnicode = when.unicode
229
+ } else if (isUnicode !== when.unicode && !state.unmatched) {
230
+ throw new Error(`Inconsistent Regex Flag /u in state: ${state.name}`);
231
+ }
232
+ if (isCI === null) {
233
+ isCI = when.ignoreCase
234
+ } else if (isCI !== when.ignoreCase) {
235
+ throw new Error(`Inconsistent Regex Flag /i in state: ${state.name}`);
236
+ }
237
+ } else {
238
+ if (isCI == null) {
239
+ isCI = false;
240
+ } else if (isCI != false) {
241
+ throw new Error(`Inconsistent Regex Flag /i in state: ${state.name}`);
242
+ }
243
+ }
244
+
245
+ rules.push(options);
246
+ const pat = RegexLib.Source(options.when);
247
+ const regexp = new RegExp(pat)
248
+ if (regexp.test("")) {
249
+ throw new Error("RegExp matches empty string: " + regexp)
250
+ }
251
+
252
+ if (RegexLib.HasGroups(pat)) {
253
+ throw new Error("RegExp has capture groups: " + regexp + "\nUse (?: … ) instead")
254
+ }
255
+
256
+ subexpressions.push(RegexLib.Capture(pat))
257
+ }
258
+
259
+ let flags = !state.unmatched ? 'ym' : 'gm';
260
+ if (isUnicode === true)
261
+ flags += "u"
262
+ if (isCI === true)
263
+ flags += "i"
264
+ return new RegExp(RegexLib.Join(subexpressions), flags);
265
+ }
266
+
267
+ export function ResolveStates(states: { [key: string]: LexerStateDefinition }, start: string) {
268
+
269
+ const resolved = new Set<string>();
270
+ const resolving = new Set<string>();
271
+ const chain = new Set<string>();
272
+
273
+
274
+ ResolveRuleImports(start, states, resolved, resolving, chain);
275
+ for (const key in states) {
276
+ if (!resolved.has(key)) {
277
+ delete states[key];
278
+ }
279
+ }
280
+ return states;
281
+ }
282
+
283
+ function ResolveRuleImports(name: string, states: { [key: string]: LexerStateDefinition }, resolved: Set<string>, resolving: Set<string>, chain: Set<string>) {
284
+ if (chain.has(name))
285
+ throw new Error(`Can not resolve circular import of ${name}`);
286
+ if (!states[name])
287
+ throw new Error(`Can not import unknown state ${name}`);
288
+ if (resolved.has(name) || resolving.has(name))
289
+ return;
290
+ const state = states[name];
291
+ const rules = new UniqueRules();
292
+ chain.add(name);
293
+ resolving.add(name);
294
+ for (let i = 0; i < state.rules.length; i++) {
295
+ const rule = state.rules[i];
296
+ if ("import" in rule) {
297
+ for (const ref of rule.import) {
298
+ ResolveRuleImports(ref, states, resolved, resolving, chain);
299
+ rules.push(...states[ref].rules as LexerStateMatchRule[]);
300
+ }
301
+ } else {
302
+ rules.push(rule);
303
+ if ("set" in rule && !resolving.has(rule.set)) {
304
+ ResolveRuleImports(rule.set, states, resolved, resolving, new Set());
305
+ }
306
+ if ("goto" in rule && !resolving.has(rule.goto)) {
307
+ ResolveRuleImports(rule.goto, states, resolved, resolving, new Set());
308
+ }
309
+ }
310
+ }
311
+ state.rules = rules.rules;
312
+ chain.delete(name);
313
+ resolved.add(name);
314
+ }
315
+
316
+ class UniqueRules {
317
+ private regexps = new Set<string>();
318
+ private strings = new Set<string>();
319
+ rules: LexerStateMatchRule[] = [];
320
+
321
+ push(...rules: LexerStateMatchRule[]) {
322
+ for (const rule of rules) {
323
+ if (RegexLib.IsRegex(rule.when)) {
324
+ if (!this.regexps.has((rule.when as RegExp).source)) {
325
+ this.rules.push(rule);
326
+ }
327
+ } else {
328
+ if (!this.strings.has(rule.when as string)) {
329
+ this.rules.push(rule);
330
+ }
331
+ }
332
+ }
333
+ }
334
+
335
+ }
@@ -0,0 +1,102 @@
1
+ import { Lexer, TQRestorePoint, LexerToken } from '../typings';
2
+
3
+ export class TokenBuffer {
4
+ private history: LexerToken[] = [];
5
+ private queued: string = '';
6
+
7
+ private $historyIndex = -1;
8
+
9
+ get offset() { return this.active?.offset || 0 }
10
+ get line() { return this.active?.line || 0 }
11
+ get column() { return this.active?.column || 0; }
12
+ get active() { return this.history[this.$historyIndex]; }
13
+
14
+ get state(): TQRestorePoint {
15
+ return { historyIndex: this.$historyIndex, offset: this.offset };
16
+ }
17
+
18
+ constructor(private lexer: Lexer) { }
19
+
20
+ reset(buffer: string) {
21
+ this.lexer.feed(buffer);
22
+ this.history = [];
23
+ this.$historyIndex = -1;
24
+ }
25
+
26
+ restore(state: TQRestorePoint) {
27
+ if (this.history[state.historyIndex].offset != state.offset) {
28
+ return;
29
+ }
30
+ this.$historyIndex = state.historyIndex;
31
+ }
32
+
33
+ feed(buffer: string, flush?: boolean) {
34
+ this.queued += buffer;
35
+ if (flush) {
36
+ this.flush();
37
+ }
38
+ }
39
+
40
+ flush() {
41
+ this.history = this.history.slice(this.$historyIndex);
42
+ this.$historyIndex = 0;
43
+ if (this.lexer.flush) {
44
+ this.lexer.flush()
45
+ }
46
+ }
47
+
48
+ previous() {
49
+ if (this.$historyIndex > 0) {
50
+ return this.history[--this.$historyIndex];
51
+ }
52
+ }
53
+
54
+ next() {
55
+ if (this.$historyIndex + 1 >= this.history.length) {
56
+ this.lexerNext();
57
+ }
58
+ if (this.$historyIndex + 1 < this.history.length) {
59
+ return this.history[++this.$historyIndex];
60
+ }
61
+ }
62
+
63
+ peek(offset: number) {
64
+ offset += this.$historyIndex;
65
+ while ((offset >= this.history.length) && this.lexerNext()) {
66
+ // Seeking
67
+ }
68
+ if (offset >= 0 && offset < this.history.length)
69
+ return this.history[offset];
70
+ }
71
+
72
+ private lexerNext() {
73
+ let token = this.lexer.next();
74
+
75
+ if (typeof token === 'undefined' && this.queued) {
76
+ this.lexer.feed(this.queued, this.$historyIndex >= 0 ? this.lexer.state() : undefined);
77
+ this.queued = '';
78
+ token = this.lexer.next();
79
+ }
80
+ if (token)
81
+ this.history.push(token);
82
+ return token;
83
+ }
84
+
85
+ [Symbol.iterator]() {
86
+ return new TokenIterator(this)
87
+ }
88
+
89
+ }
90
+
91
+ class TokenIterator {
92
+ constructor(private buffer: TokenBuffer) { }
93
+
94
+ next() {
95
+ const token = this.buffer.next()
96
+ return { value: token, done: !token }
97
+ }
98
+
99
+ [Symbol.iterator]() {
100
+ return this
101
+ }
102
+ }
@@ -0,0 +1,74 @@
1
+ import { TokenBuffer } from "../../lexers/token-buffer";
2
+ import { GrammarRule, GrammarRuleSymbol, LanguageDefinition, LexerToken } from "../../typings";
3
+ import { Matrix } from "../../utility/general";
4
+ import { ParserUtility } from "../parser";
5
+
6
+ export function CYK(language: LanguageDefinition & { tokens: TokenBuffer }, _options = {}) {
7
+ const { grammar, tokens } = language;
8
+
9
+ const terminals: GrammarRule[] = [];
10
+ const nonTerminals: GrammarRule[] = [];
11
+
12
+ for (const name in grammar.rules) {
13
+ for (const rule of grammar.rules[name]) {
14
+ const { symbols } = rule;
15
+ if (ParserUtility.SymbolIsTerminal(symbols[0])) {
16
+ terminals.push(rule);
17
+ } else {
18
+ nonTerminals.push(rule);
19
+ }
20
+ }
21
+ }
22
+
23
+ let currentTokenIndex = -1;
24
+ const chart = new Matrix(0, 0, () => new Map<GrammarRuleSymbol, Terminal | NonTerminal>());
25
+ for (const token of tokens) {
26
+ currentTokenIndex++;
27
+ chart.resize(currentTokenIndex + 2, currentTokenIndex + 2);
28
+ for (const rule of terminals) {
29
+ if (ParserUtility.TokenMatchesSymbol(token, rule.symbols[0])) {
30
+ chart.get(currentTokenIndex, currentTokenIndex).set(rule.name, { rule, token })
31
+ }
32
+ }
33
+
34
+
35
+ for (let floor = currentTokenIndex; floor >= 0; floor--) {
36
+ for (let inner = floor; inner <= currentTokenIndex; inner++) {
37
+ const leftCell = chart.get(floor, inner);
38
+ const rightCell = chart.get(inner + 1, currentTokenIndex);
39
+
40
+ for (const rule of nonTerminals) {
41
+ const { symbols: [leftSymbol, rightSymbol] } = rule;
42
+ const left: Terminal | NonTerminal = leftCell.get(leftSymbol);
43
+ const right: Terminal | NonTerminal = rightCell.get(rightSymbol);
44
+ if (left && right) {
45
+ chart.get(floor, currentTokenIndex).set(rule.name, { rule, left, right });
46
+ }
47
+ }
48
+ }
49
+ }
50
+ }
51
+
52
+ const results = Array.from(chart.get(0, currentTokenIndex).values()).map(v => GetValue(v));
53
+ return { results };
54
+ }
55
+
56
+ function GetValue(ref: Terminal | NonTerminal) {
57
+ if (!ref)
58
+ return;
59
+ if ('token' in ref) {
60
+ return ParserUtility.PostProcess(ref.rule, [ref.token]);
61
+ }
62
+ return ParserUtility.PostProcess(ref.rule, [GetValue(ref.left), GetValue(ref.right)])
63
+ }
64
+
65
+ export interface NonTerminal {
66
+ rule: GrammarRule;
67
+ left: NonTerminal | Terminal;
68
+ right: NonTerminal | Terminal;
69
+ }
70
+
71
+ export interface Terminal {
72
+ rule: GrammarRule;
73
+ token: LexerToken;
74
+ }
@@ -0,0 +1,193 @@
1
+ import { Dictionary, GrammarRule, LanguageDefinition } from "../../typings";
2
+ import { TokenBuffer } from "../../lexers/token-buffer";
3
+ import { TextFormatter } from "../../utility/text-format";
4
+ import { ParserUtility } from "../parser";
5
+
6
+ export interface EarleyParserOptions {
7
+ keepHistory?: boolean;
8
+ }
9
+
10
+ export function Earley(language: LanguageDefinition & { tokens: TokenBuffer }, options: EarleyParserOptions = {}) {
11
+ const { tokens } = language;
12
+ const { rules, start } = language.grammar;
13
+ const column = new Column(rules, 0);
14
+ const table: Column[] = [column];
15
+ column.wants[start] = [];
16
+ column.predict(start);
17
+ column.process();
18
+
19
+ let current: number = 0;
20
+
21
+ for (const token of tokens) {
22
+ const previousColumn: Column = table[current];
23
+
24
+ if (!(options.keepHistory)) {
25
+ delete table[current - 1];
26
+ }
27
+
28
+ current++;
29
+
30
+ const nextColumn = new Column(rules, current);
31
+ table.push(nextColumn);
32
+
33
+ const literal = token.value;
34
+ const data = token;
35
+ nextColumn.data = literal;
36
+ const { scannable } = previousColumn;
37
+ for (let w = scannable.length; w--;) {
38
+ const state = scannable[w];
39
+ const symbol = state.rule.symbols[state.dot];
40
+ if (ParserUtility.TokenMatchesSymbol(token, symbol)) {
41
+ const next = state.nextState({ data, token, isToken: true, reference: current - 1 });
42
+ nextColumn.states.push(next);
43
+ }
44
+ }
45
+
46
+ nextColumn.process();
47
+
48
+ if (nextColumn.states.length === 0) {
49
+ throw TextFormatter.UnexpectedToken(tokens, previousColumn.expects());
50
+ }
51
+ }
52
+
53
+ const results = [];
54
+ const { states } = table[table.length - 1];
55
+ for (const { rule: { name, symbols }, dot, reference, data } of states) {
56
+ if (name === start && dot === symbols.length && reference == 0) {
57
+ results.push(data);
58
+ }
59
+ }
60
+ return { results, info: { table } };
61
+ }
62
+
63
+
64
+ class Column {
65
+ data: any;
66
+ states: State[] = [];
67
+ wants: Dictionary<State[]> = Object.create(null);// states indexed by the non-terminal they expect
68
+ scannable: State[] = [];// list of states that expect a token
69
+ completed: Dictionary<State[]> = Object.create(null); // states that are nullable
70
+
71
+ constructor(
72
+ private rules: Dictionary<GrammarRule[]>,
73
+ public index: number
74
+ ) { }
75
+
76
+
77
+ process() {
78
+ let w = 0;
79
+ let state: State;
80
+
81
+ // eslint-disable-next-line no-cond-assign
82
+ while (state = this.states[w++]) { // nb. we push() during iteration
83
+ if (state.isComplete) {
84
+ state.finish();
85
+ const { wantedBy } = state;
86
+ for (let i = wantedBy.length; i--;) { // this line is hot
87
+ this.complete(wantedBy[i], state);
88
+ }
89
+
90
+ // special-case nullables
91
+ if (state.reference === this.index) {
92
+ const { name } = state.rule;
93
+ this.completed[name] = this.completed[name] || [];
94
+ this.completed[name].push(state);
95
+ }
96
+ } else {
97
+ const exp = state.rule.symbols[state.dot];
98
+ if (typeof exp !== 'string') {
99
+ this.scannable.push(state);
100
+ continue;
101
+ }
102
+
103
+ // predict
104
+ if (this.wants[exp]) {
105
+ this.wants[exp].push(state);
106
+
107
+ if (this.completed[exp]) {
108
+ for (const right of this.completed[exp]) {
109
+ this.complete(state, right);
110
+ }
111
+ }
112
+ } else {
113
+ this.wants[exp] = [state];
114
+ this.predict(exp);
115
+ }
116
+ }
117
+ }
118
+ }
119
+
120
+ predict(exp: string) {
121
+ if (!this.rules[exp])
122
+ return;
123
+
124
+ for (const rule of this.rules[exp]) {
125
+ this.states.push(new State(rule, 0, this.index, this.wants[exp]));
126
+ }
127
+ }
128
+
129
+ expects(): GrammarRule[] {
130
+ return this.states
131
+ .filter((state) => {
132
+ const nextSymbol = state.rule.symbols[state.dot];
133
+ return nextSymbol && typeof nextSymbol !== "string";
134
+ })
135
+ .map(v => ({ ...v.rule, index: v.dot }));
136
+ }
137
+
138
+ private complete(left: State, right: State) {
139
+ const copy = left.nextState(right);
140
+ this.states.push(copy);
141
+ }
142
+
143
+
144
+ }
145
+
146
+ class State {
147
+ isComplete: boolean;
148
+ data: any = [];
149
+ left: State;
150
+ right: State | StateToken;
151
+ constructor(
152
+ public rule: GrammarRule,
153
+ public dot: number,
154
+ public reference: number,
155
+ public wantedBy: State[]
156
+ ) {
157
+ this.isComplete = this.dot === rule.symbols.length;
158
+ }
159
+
160
+ nextState(child: State | StateToken) {
161
+ const state = new State(this.rule, this.dot + 1, this.reference, this.wantedBy);
162
+ state.left = this;
163
+ state.right = child;
164
+ if (state.isComplete) {
165
+ state.data = state.build();
166
+ state.right = undefined;
167
+ }
168
+ return state;
169
+ }
170
+
171
+
172
+ finish() {
173
+ this.data = ParserUtility.PostProcess(this.rule, this.data, { reference: this.reference, dot: this.dot });
174
+ }
175
+
176
+ protected build() {
177
+ const children = [];
178
+ // eslint-disable-next-line @typescript-eslint/no-this-alias
179
+ let node: State = this;
180
+ do {
181
+ children[node.dot - 1] = node.right.data;
182
+ node = node.left;
183
+ } while (node.left);
184
+ return children;
185
+ }
186
+ }
187
+
188
+ interface StateToken {
189
+ data: any,
190
+ token: any,
191
+ isToken: boolean,
192
+ reference: number
193
+ }