grammar-well 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/.eslintrc.cjs +14 -0
  2. package/README.md +288 -0
  3. package/bootstrap.ts +35 -0
  4. package/build/compiler/compiler.d.ts +48 -0
  5. package/build/compiler/compiler.js +227 -0
  6. package/build/compiler/compiler.js.map +1 -0
  7. package/build/compiler/generator.d.ts +23 -0
  8. package/build/compiler/generator.js +213 -0
  9. package/build/compiler/generator.js.map +1 -0
  10. package/build/compiler/import-resolver.d.ts +15 -0
  11. package/build/compiler/import-resolver.js +37 -0
  12. package/build/compiler/import-resolver.js.map +1 -0
  13. package/build/compiler/outputs/javascript.d.ts +3 -0
  14. package/build/compiler/outputs/javascript.js +29 -0
  15. package/build/compiler/outputs/javascript.js.map +1 -0
  16. package/build/compiler/outputs/json.d.ts +2 -0
  17. package/build/compiler/outputs/json.js +8 -0
  18. package/build/compiler/outputs/json.js.map +1 -0
  19. package/build/compiler/outputs/typescript.d.ts +2 -0
  20. package/build/compiler/outputs/typescript.js +108 -0
  21. package/build/compiler/outputs/typescript.js.map +1 -0
  22. package/build/grammars/gwell.d.ts +997 -0
  23. package/build/grammars/gwell.js +537 -0
  24. package/build/grammars/gwell.js.map +1 -0
  25. package/build/grammars/json.d.ts +151 -0
  26. package/build/grammars/json.js +112 -0
  27. package/build/grammars/json.js.map +1 -0
  28. package/build/grammars/number.d.ts +239 -0
  29. package/build/grammars/number.js +115 -0
  30. package/build/grammars/number.js.map +1 -0
  31. package/build/grammars/number.json +1 -0
  32. package/build/grammars/string.d.ts +116 -0
  33. package/build/grammars/string.js +50 -0
  34. package/build/grammars/string.js.map +1 -0
  35. package/build/grammars/string.json +1 -0
  36. package/build/grammars/whitespace.d.ts +51 -0
  37. package/build/grammars/whitespace.js +30 -0
  38. package/build/grammars/whitespace.js.map +1 -0
  39. package/build/grammars/whitespace.json +1 -0
  40. package/build/index.d.ts +4 -0
  41. package/build/index.js +21 -0
  42. package/build/index.js.map +1 -0
  43. package/build/lexers/character-lexer.d.ts +27 -0
  44. package/build/lexers/character-lexer.js +71 -0
  45. package/build/lexers/character-lexer.js.map +1 -0
  46. package/build/lexers/stateful-lexer.d.ts +48 -0
  47. package/build/lexers/stateful-lexer.js +309 -0
  48. package/build/lexers/stateful-lexer.js.map +1 -0
  49. package/build/lexers/token-buffer.d.ts +32 -0
  50. package/build/lexers/token-buffer.js +92 -0
  51. package/build/lexers/token-buffer.js.map +1 -0
  52. package/build/parser/algorithms/cyk.d.ts +16 -0
  53. package/build/parser/algorithms/cyk.js +58 -0
  54. package/build/parser/algorithms/cyk.js.map +1 -0
  55. package/build/parser/algorithms/earley.d.ts +48 -0
  56. package/build/parser/algorithms/earley.js +158 -0
  57. package/build/parser/algorithms/earley.js.map +1 -0
  58. package/build/parser/algorithms/lr.d.ts +10 -0
  59. package/build/parser/algorithms/lr.js +34 -0
  60. package/build/parser/algorithms/lr.js.map +1 -0
  61. package/build/parser/parser.d.ts +26 -0
  62. package/build/parser/parser.js +74 -0
  63. package/build/parser/parser.js.map +1 -0
  64. package/build/typings.d.ts +198 -0
  65. package/build/typings.js +3 -0
  66. package/build/typings.js.map +1 -0
  67. package/build/utility/general.d.ts +46 -0
  68. package/build/utility/general.js +112 -0
  69. package/build/utility/general.js.map +1 -0
  70. package/build/utility/lint.d.ts +2 -0
  71. package/build/utility/lint.js +28 -0
  72. package/build/utility/lint.js.map +1 -0
  73. package/build/utility/lr.d.ts +56 -0
  74. package/build/utility/lr.js +131 -0
  75. package/build/utility/lr.js.map +1 -0
  76. package/build/utility/text-format.d.ts +11 -0
  77. package/build/utility/text-format.js +84 -0
  78. package/build/utility/text-format.js.map +1 -0
  79. package/licenses/LICENSE.txt +165 -0
  80. package/licenses/moo.license +29 -0
  81. package/licenses/nearley.license +21 -0
  82. package/package.json +52 -0
  83. package/src/compiler/compiler.ts +239 -0
  84. package/src/compiler/generator.ts +229 -0
  85. package/src/compiler/import-resolver.ts +36 -0
  86. package/src/compiler/outputs/javascript.ts +27 -0
  87. package/src/compiler/outputs/json.ts +5 -0
  88. package/src/compiler/outputs/typescript.ts +105 -0
  89. package/src/grammars/gwell.gwell +278 -0
  90. package/src/grammars/gwell.js +539 -0
  91. package/src/grammars/gwell.json +1 -0
  92. package/src/grammars/json.gwell +75 -0
  93. package/src/grammars/json.js +121 -0
  94. package/src/grammars/json.json +1 -0
  95. package/src/grammars/number.gwell +20 -0
  96. package/src/grammars/number.js +117 -0
  97. package/src/grammars/number.json +1 -0
  98. package/src/grammars/string.gwell +15 -0
  99. package/src/grammars/string.js +52 -0
  100. package/src/grammars/string.json +1 -0
  101. package/src/grammars/whitespace.gwell +6 -0
  102. package/src/grammars/whitespace.js +32 -0
  103. package/src/grammars/whitespace.json +1 -0
  104. package/src/index.ts +4 -0
  105. package/src/lexers/character-lexer.ts +73 -0
  106. package/src/lexers/stateful-lexer.ts +335 -0
  107. package/src/lexers/token-buffer.ts +102 -0
  108. package/src/parser/algorithms/cyk.ts +74 -0
  109. package/src/parser/algorithms/earley.ts +193 -0
  110. package/src/parser/algorithms/lr.ts +37 -0
  111. package/src/parser/parser.ts +77 -0
  112. package/src/typings.ts +221 -0
  113. package/src/utility/general.ts +120 -0
  114. package/src/utility/lint.ts +26 -0
  115. package/src/utility/lr.ts +153 -0
  116. package/src/utility/text-format.ts +84 -0
  117. package/testing.ts +18 -0
@@ -0,0 +1,37 @@
1
+ import { TokenBuffer } from "../../lexers/token-buffer";
2
+ import { LanguageDefinition } from "../../typings";
3
+ import { CanonicalCollection, LRStack } from "../../utility/lr";
4
+ import { ParserUtility } from "../parser";
5
+
6
+ export function LR(language: LanguageDefinition & { tokens: TokenBuffer, canonical?: CanonicalCollection }, _options = {}) {
7
+ const { grammar, tokens, canonical } = language;
8
+ const collection = canonical || new CanonicalCollection(grammar);
9
+ const stack = new LRStack();
10
+ const s = collection.states['0.0'];
11
+ stack.add(null);
12
+ stack.shift(s);
13
+ let token;
14
+
15
+ // eslint-disable-next-line no-cond-assign
16
+ while (token = tokens.next()) {
17
+ for (const [symbol, state] of stack.current.state.actions) {
18
+ if (ParserUtility.TokenMatchesSymbol(token, symbol)) {
19
+ stack.add(symbol);
20
+ stack.shift(collection.states[state]);
21
+ stack.current.value = token;
22
+ break;
23
+ }
24
+ }
25
+
26
+ while (stack.current.state?.isFinal) {
27
+ const rule = stack.current.state.reduce;
28
+ stack.reduce(rule);
29
+ stack.current.value = ParserUtility.PostProcess(rule, stack.current.children.map(v => v.value));
30
+ const s = stack.previous.state.goto.get(rule.name);
31
+ stack.shift(collection.states[s]);
32
+
33
+ }
34
+ }
35
+
36
+ return { results: [stack.current.value], canonical: collection }
37
+ }
@@ -0,0 +1,77 @@
1
+ import { CharacterLexer } from "../lexers/character-lexer";
2
+ import { StatefulLexer } from "../lexers/stateful-lexer";
3
+ import { TokenBuffer } from "../lexers/token-buffer";
4
+ import { GrammarRule, GrammarRuleSymbol, LanguageDefinition, LexerToken, ParserAlgorithm } from "../typings";
5
+ import { CYK } from "./algorithms/cyk";
6
+ import { Earley } from "./algorithms/earley";
7
+ import { LR } from "./algorithms/lr";
8
+
9
+ const ParserRegistry: { [key: string]: ParserAlgorithm } = {
10
+ earley: Earley,
11
+ cyk: CYK,
12
+ lr: LR
13
+ }
14
+
15
+ export function Parse(language: LanguageDefinition, input: string, options?: ParserOptions) {
16
+ const i = new Parser(language, options);
17
+ return i.run(input);
18
+ }
19
+
20
+ export class Parser {
21
+
22
+ constructor(private language: LanguageDefinition, private options: ParserOptions = { algorithm: 'earley', parserOptions: {} }) { }
23
+
24
+ run(input: string): { results: any[] } {
25
+ const tokenQueue = this.getTokenQueue();
26
+ tokenQueue.feed(input);
27
+ if (typeof this.options.algorithm == 'function')
28
+ return this.options.algorithm({ ...this.language, tokens: tokenQueue, utility: ParserUtility }, this.options.parserOptions);
29
+ return ParserRegistry[this.options.algorithm]({ ...this.language, tokens: tokenQueue, utility: ParserUtility }, this.options.parserOptions);
30
+ }
31
+
32
+ private getTokenQueue() {
33
+ const { lexer } = this.language;
34
+ if (!lexer) {
35
+ return new TokenBuffer(new CharacterLexer());
36
+ } else if ("feed" in lexer && typeof lexer.feed == 'function') {
37
+ return new TokenBuffer(lexer);
38
+ } else if ('states' in lexer) {
39
+ return new TokenBuffer(new StatefulLexer(lexer));
40
+ }
41
+ }
42
+ }
43
+
44
+
45
+ export class ParserUtility {
46
+
47
+ static TokenMatchesSymbol(token: LexerToken, symbol: GrammarRuleSymbol) {
48
+ if (typeof symbol === 'string')
49
+ throw 'Attempted to match token against non-terminal';
50
+ if (typeof symbol == 'function')
51
+ return symbol(token);
52
+ if (!symbol)
53
+ return
54
+ if ("test" in symbol)
55
+ return symbol.test(token.value);
56
+ if ("token" in symbol)
57
+ return symbol.token === token.type || token.tag?.has(symbol.token);
58
+ if ("literal" in symbol)
59
+ return symbol.literal === token.value;
60
+ }
61
+
62
+ static SymbolIsTerminal<T extends GrammarRuleSymbol>(symbol: T) {
63
+ return typeof symbol != 'string';
64
+ }
65
+
66
+ static PostProcess(rule: GrammarRule, data: any, meta?: any) {
67
+ if (rule.postprocess) {
68
+ return rule.postprocess({ rule, data, meta });
69
+ }
70
+ return data;
71
+ }
72
+ }
73
+
74
+ interface ParserOptions {
75
+ algorithm: (keyof typeof ParserRegistry) | ParserAlgorithm,
76
+ parserOptions?: any;
77
+ }
package/src/typings.ts ADDED
@@ -0,0 +1,221 @@
1
+ import { TokenBuffer } from "./lexers/token-buffer";
2
+ import { ParserUtility } from "./parser/parser";
3
+
4
+ export interface Dictionary<T> {
5
+ [key: string]: T;
6
+ }
7
+
8
+ export interface CompileOptions {
9
+ version?: string;
10
+ noscript?: boolean;
11
+ basedir?: string;
12
+ resolver?: ImportResolverConstructor;
13
+ resolverInstance?: ImportResolver;
14
+ exportName?: string;
15
+ format?: OutputFormat;
16
+ }
17
+
18
+ export type OutputFormat = '_default' | 'object' | 'json' | 'js' | 'javascript' | 'module' | 'esmodule' | 'ts' | 'typescript'
19
+
20
+ export interface GrammarBuilderContext {
21
+ alreadyCompiled: Set<string>;
22
+ resolver: ImportResolver;
23
+ uuids: Dictionary<number>;
24
+ }
25
+
26
+
27
+ export interface ImportResolver {
28
+ path(path: string): string;
29
+ body(path: string): Promise<string>;
30
+ }
31
+
32
+ export interface ImportResolverConstructor {
33
+ new(basePath: string): ImportResolver;
34
+ }
35
+
36
+ export type PostProcessor = (payload: PostProcessorPayload) => any;
37
+
38
+ interface PostProcessorPayload {
39
+ data: any[];
40
+ rule: GrammarRule;
41
+ meta: any;
42
+ }
43
+
44
+ export type JavascriptDirective = { body: GrammarTypeJS; } | { head: GrammarTypeJS }
45
+
46
+ export interface ImportDirective {
47
+ import: string;
48
+ path?: boolean;
49
+ }
50
+
51
+ export interface ConfigDirective {
52
+ config: Dictionary<any>;
53
+ }
54
+
55
+ export interface GrammarDirective {
56
+ grammar: {
57
+ config?: Dictionary<any>;
58
+ rules: GrammarBuilderRule[];
59
+ }
60
+ }
61
+
62
+ export interface LexerDirective {
63
+ lexer: {
64
+ start?: string,
65
+ states: LexerStateDefinition[];
66
+ };
67
+ }
68
+
69
+ export interface GrammarBuilderRule {
70
+ name: string;
71
+ expressions: GrammarBuilderExpression[];
72
+ postprocess?: GrammarTypeJS | GrammarTypeBuiltIn | GrammarTypeTemplate;
73
+ }
74
+
75
+ export interface GrammarBuilderExpression {
76
+ symbols: GrammarBuilderSymbol[];
77
+ postprocess?: GrammarTypeJS | GrammarTypeBuiltIn | GrammarTypeTemplate;
78
+ }
79
+
80
+ export type GrammarBuilderSymbol = GrammarTypeRule | GrammarTypeRegex | GrammarTypeToken | GrammarTypeLiteral | GrammarBuilderSymbolRepeat | GrammarBuilderSymbolSubexpression;
81
+
82
+ export interface GrammarBuilderSymbolSubexpression {
83
+ subexpression: GrammarBuilderExpression[];
84
+ }
85
+
86
+ export interface GrammarBuilderSymbolRepeat {
87
+ expression: GrammarBuilderSymbol;
88
+ repeat: "+" | "*" | "?";
89
+ }
90
+
91
+ export interface GrammarTypeRule {
92
+ rule: string;
93
+ }
94
+
95
+ export interface GrammarTypeRegex {
96
+ regex: string;
97
+ flags?: string
98
+ }
99
+
100
+ export interface GrammarTypeToken {
101
+ token: string;
102
+ }
103
+
104
+ export interface GrammarTypeLiteral {
105
+ literal: string;
106
+ insensitive?: boolean;
107
+ }
108
+
109
+ export type GrammarTypeBuiltIn = { builtin: string };
110
+ export type GrammarTypeTemplate = { template: string };
111
+ export type GrammarTypeJS = { js: string };
112
+
113
+
114
+ export type ParserAlgorithm = ((language: LanguageDefinition & { tokens: TokenBuffer; utility: ParserUtility }, options?: any) => { results: any[], info?: any });
115
+
116
+ export type LanguageDirective = (JavascriptDirective | ImportDirective | ConfigDirective | GrammarDirective | LexerDirective);
117
+
118
+ type GrammarRuleSymbolFunction = (data: LexerToken) => boolean;
119
+
120
+ export interface GrammarRule {
121
+ name: string;
122
+ symbols: GrammarRuleSymbol[];
123
+ postprocess?: PostProcessor;
124
+ }
125
+
126
+ export type GrammarRuleSymbol = string | RegExp | GrammarTypeLiteral | GrammarTypeToken | GrammarRuleSymbolFunction;
127
+
128
+ export interface GeneratorGrammarRule {
129
+ name: string;
130
+ symbols: GeneratorGrammarSymbol[];
131
+ postprocess?: GrammarTypeTemplate | GrammarTypeBuiltIn | GrammarTypeJS;
132
+ }
133
+
134
+ export type GeneratorGrammarSymbol = { alias?: string } & (GrammarTypeRule | GrammarTypeRegex | GrammarTypeLiteral | GrammarTypeToken | GrammarTypeJS);
135
+
136
+ export interface LanguageDefinition {
137
+ lexer?: Lexer | LexerConfig;
138
+ grammar: {
139
+ start: string;
140
+ rules: Dictionary<GrammarRule[]>;
141
+ }
142
+ }
143
+
144
+ export interface TQRestorePoint {
145
+ historyIndex: number;
146
+ offset: number;
147
+ }
148
+
149
+ export interface Lexer {
150
+ next(): LexerToken | undefined;
151
+ feed(chunk?: string, state?: ReturnType<Lexer['state']>): void;
152
+ state(): any;
153
+ flush?(): void;
154
+ }
155
+
156
+ export interface LexerToken {
157
+ type?: string | undefined;
158
+ tag?: Set<string>;
159
+ value: string;
160
+ offset: number;
161
+ line: number;
162
+ column: number;
163
+ }
164
+
165
+ export interface LexerStatus {
166
+ index: number;
167
+ line: number;
168
+ column: number;
169
+ state: string;
170
+ }
171
+
172
+ export interface LexerStateDefinition {
173
+ name: string;
174
+ unmatched?: string;
175
+ default?: string;
176
+ rules: (LexerStateImportRule | LexerStateMatchRule)[];
177
+ }
178
+
179
+ export interface LexerStateImportRule {
180
+ import: string[]
181
+ }
182
+
183
+ export interface LexerStateMatchRule {
184
+ when: string | RegExp
185
+ type?: string;
186
+ tag?: string[];
187
+ pop?: number | 'all';
188
+ inset?: number;
189
+ goto?: string;
190
+ set?: string;
191
+ }
192
+
193
+ export interface ResolvedStateDefinition {
194
+ name: string;
195
+ unmatched?: string;
196
+ rules: LexerStateMatchRule[];
197
+ }
198
+
199
+ export interface CompiledStateDefinition {
200
+ regexp: RegExp;
201
+ unmatched?: LexerStateMatchRule;
202
+ rules: LexerStateMatchRule[];
203
+ }
204
+
205
+ export interface LexerConfig {
206
+ start?: string
207
+ states: Dictionary<LexerStateDefinition>;
208
+ }
209
+
210
+ export interface GeneratorState {
211
+ version: string;
212
+ config: Dictionary<string>;
213
+ head: string[];
214
+ body: string[];
215
+ lexer?: LexerConfig;
216
+ grammar: {
217
+ start: string;
218
+ rules: Dictionary<GeneratorGrammarRule[]>,
219
+ uuids: { [key: string]: number }
220
+ }
221
+ }
@@ -0,0 +1,120 @@
1
+ import { Dictionary, GrammarRuleSymbol } from "../typings";
2
+
3
+
4
+ export class Collection<T> {
5
+ categorized: Dictionary<Dictionary<number>> = {};
6
+ private uncategorized = new Map<T, number>();
7
+ private items: T[] = [];
8
+
9
+ constructor(ref: T[] = []) {
10
+ for (const s of ref) {
11
+ this.encode(s);
12
+ }
13
+ }
14
+
15
+ encode(ref: T): number {
16
+ const c = this.resolve(ref);
17
+ if (c)
18
+ return this.addCategorized(c.category, c.key, ref);
19
+ return this.addUncategorized(ref);
20
+ }
21
+
22
+ decode(id: number | string): T {
23
+ return this.items[typeof id == 'string' ? parseInt(id) : id];
24
+ }
25
+
26
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars, @typescript-eslint/no-empty-function
27
+ resolve(_: T): { category: keyof Collection<T>['categorized'], key: string } | void { }
28
+
29
+ private addCategorized(category: keyof Collection<T>['categorized'], key: string, ref: T): number {
30
+ if (!(key in this.categorized[category])) {
31
+ this.categorized[category][key] = this.items.length;
32
+ this.items.push(ref);
33
+ }
34
+ return this.categorized[category][key];
35
+ }
36
+
37
+ private addUncategorized(ref: T): number {
38
+ if (!this.uncategorized.has(ref)) {
39
+ this.uncategorized.set(ref, this.items.length);
40
+ this.items.push(ref);
41
+ }
42
+ return this.uncategorized.get(ref);
43
+ }
44
+
45
+ }
46
+
47
+ export class SymbolCollection extends Collection<GrammarRuleSymbol>{
48
+ categorized = {
49
+ nonTerminal: {},
50
+ literalI: {},
51
+ literalS: {},
52
+ token: {},
53
+ regex: {},
54
+ function: {},
55
+ }
56
+
57
+ resolve(symbol: GrammarRuleSymbol) {
58
+ if (typeof symbol == 'string') {
59
+ return { category: 'nonTerminal', key: symbol };
60
+ } else if ('literal' in symbol) {
61
+ if (symbol.insensitive)
62
+ return { category: 'literalI', key: symbol.literal }
63
+ return { category: 'literalS', key: symbol.literal }
64
+ } else if ('token' in symbol) {
65
+ return { category: 'token', key: symbol.token }
66
+ } else if ('test' in symbol) {
67
+ return { category: 'regex', key: symbol.toString() }
68
+ } else if (typeof symbol == 'function') {
69
+ return { category: 'function', key: symbol.toString() }
70
+ }
71
+ }
72
+ }
73
+
74
+ export class Matrix<T> {
75
+ private $x = 0;
76
+ private $y = 0;
77
+ get x() { return this.$x }
78
+ set x(x: number) { x != this.$x && this.resize(x, this.y); }
79
+ get y() { return this.$y }
80
+ set y(y: number) { y != this.$y && this.resize(this.x, y); }
81
+
82
+ matrix: GetCallbackOrValue<T>[][] = [];
83
+
84
+ constructor(x: number, y: number, private initial?: T | ((...args: any) => T)) {
85
+ this.resize(x, y);
86
+ }
87
+
88
+ get(x: number, y: number): T {
89
+ return this.matrix[x][y];
90
+ }
91
+
92
+ set(x: number, y: number, value: any) {
93
+ return this.matrix[x][y] = value;
94
+ }
95
+
96
+ resize(x: number, y: number) {
97
+ if (x < this.x) {
98
+ this.matrix.splice(x);
99
+ this.$x = x;
100
+ }
101
+ if (y > this.y) {
102
+ this.matrix.forEach(a => a.push(...Matrix.Array(y - a.length, this.initial)));
103
+ this.$y = y;
104
+ } else if (y < this.y) {
105
+ this.matrix.forEach(a => a.splice(y + 1));
106
+ this.$y = y;
107
+ }
108
+ if (x > this.x) {
109
+ const ext = Matrix.Array(x - this.x, () => Matrix.Array(this.y, this.initial))
110
+ this.matrix.push(...ext);
111
+ this.$x = x;
112
+ }
113
+ }
114
+
115
+ static Array<T>(length, initial?: T | ((...args: any) => T)): GetCallbackOrValue<T>[] {
116
+ return Array.from({ length }, (typeof initial == 'function' ? initial : () => initial) as any);
117
+ }
118
+ }
119
+
120
+ type GetCallbackOrValue<T> = T extends (...args: any) => any ? ReturnType<T> : T;
@@ -0,0 +1,26 @@
1
+ import { LanguageDefinition, GrammarRuleSymbol, Dictionary, GrammarRule } from "../typings";
2
+
3
+
4
+ export function LintGrammarSymbols(language: LanguageDefinition): GrammarRuleSymbol[] {
5
+ const unused = new Set<string>();
6
+ const { rules, start } = language.grammar;
7
+ for (const rule in rules) {
8
+ unused.add(rule);
9
+ }
10
+ TraverseRule(start, rules, unused);
11
+ return Array.from(unused);
12
+ }
13
+ function TraverseRule(name: string, rules: Dictionary<GrammarRule[]>, unvisited: Set<string>) {
14
+ if (!unvisited.has(name)) {
15
+ return;
16
+ }
17
+ unvisited.add(name);
18
+ const n = rules[name];
19
+ for (const { symbols } of n) {
20
+ for (const symbol of symbols) {
21
+ if (typeof symbol == 'string') {
22
+ TraverseRule(symbol, rules, unvisited);
23
+ }
24
+ }
25
+ }
26
+ }
@@ -0,0 +1,153 @@
1
+ import { ParserUtility } from "../parser/parser";
2
+ import { Dictionary, GrammarRule, GrammarRuleSymbol, LanguageDefinition } from "../typings";
3
+ import { Collection, SymbolCollection } from "./general";
4
+
5
+ export class CanonicalCollection {
6
+ rules: Collection<GrammarRule> = new Collection();
7
+ states: { [key: string]: State } = Object.create(null)
8
+ symbols: SymbolCollection;
9
+
10
+ constructor(public grammar: LanguageGrammar & { symbols?: SymbolCollection }) {
11
+ this.symbols = grammar.symbols || new SymbolCollection();
12
+ const augmented = { name: Symbol() as unknown as string, symbols: [grammar.start] }
13
+ grammar.rules[augmented.name] = [augmented];
14
+ this.addState([{ rule: augmented, dot: 0 }]);
15
+ }
16
+
17
+ addState(seed: StateItem[]) {
18
+ const id = this.encodeStateItems(seed);
19
+ if (id in this.states)
20
+ return this.states[id];
21
+
22
+ const state = new State(this, seed);
23
+ this.states[id] = state;
24
+ for (const q in state.queue) {
25
+ this.addState(state.queue[q])
26
+ }
27
+ state.queue = {};
28
+ }
29
+
30
+ encodeRule(rule: GrammarRule, dot: number) {
31
+ return this.rules.encode(rule) + '.' + dot;
32
+ }
33
+
34
+ encodeStateItems(seed: StateItem[]) {
35
+ return Array.from(new Set(seed)).map(v => this.encodeRule(v.rule, v.dot)).sort().join()
36
+ }
37
+ }
38
+
39
+ class State {
40
+ isFinal = false;
41
+
42
+ outputs: StateOut = {
43
+ goto: {},
44
+ action: {}
45
+ };
46
+
47
+ queue: { [key: string]: StateItem[] } = {};
48
+ actions: Map<GrammarRuleSymbol, string> = new Map();
49
+ goto: Map<GrammarRuleSymbol, string> = new Map();
50
+ reduce?: GrammarRule;
51
+
52
+ constructor(private collection: CanonicalCollection, items: StateItem[]) {
53
+ const visited = new Set<GrammarRuleSymbol>();
54
+ for (const item of items) {
55
+ this.closure(item.rule, item.dot, visited);
56
+ }
57
+ if (this.isFinal) {
58
+ if (items.length == 1 && visited.size < 1) {
59
+ this.reduce = items[0].rule;
60
+ } else {
61
+ throw 'Conflict Detected';
62
+ }
63
+ }
64
+
65
+ for (const k in this.outputs.goto) {
66
+ const seed = this.outputs.goto[k];
67
+ const stateId = this.collection.encodeStateItems(seed);
68
+ this.queue[stateId] = seed;
69
+ this.goto.set(this.collection.symbols.decode(k), stateId);
70
+ }
71
+
72
+ for (const k in this.outputs.action) {
73
+ const seed = this.outputs.action[k];
74
+ const stateId = this.collection.encodeStateItems(seed);
75
+ this.queue[stateId] = seed;
76
+ this.actions.set(this.collection.symbols.decode(k), stateId);
77
+ }
78
+ }
79
+
80
+ private closure(rule: GrammarRule, dot: number, visited: Set<GrammarRuleSymbol>) {
81
+ const isFinal = rule.symbols.length == dot;
82
+ this.isFinal = isFinal || this.isFinal;
83
+ const { [dot]: symbol } = rule.symbols;
84
+
85
+ if (isFinal || visited.has(symbol))
86
+ return;
87
+
88
+ visited.add(symbol);
89
+ const stateItem = { rule, dot: dot + 1 };
90
+
91
+ if (ParserUtility.SymbolIsTerminal(symbol)) {
92
+ const id = this.collection.symbols.encode(symbol);
93
+ this.outputs.action[id] = this.outputs.action[id] || [];
94
+ this.outputs.action[id].push(stateItem);
95
+ } else {
96
+ const id = this.collection.symbols.encode(symbol);
97
+ this.outputs.goto[id] = this.outputs.goto[id] || [];
98
+ this.outputs.goto[id].push(stateItem);
99
+ for (const rule of this.collection.grammar.rules[symbol as string]) {
100
+ this.closure(rule, 0, visited)
101
+ }
102
+ }
103
+ }
104
+ }
105
+
106
+ export class LRStack {
107
+
108
+ stack: LRStackItem[] = [];
109
+
110
+ get current() {
111
+ return this.stack[this.stack.length - 1];
112
+ }
113
+
114
+ get previous() {
115
+ return this.stack[this.stack.length - 2];
116
+ }
117
+
118
+ shift(state: State) {
119
+ this.current.state = state;
120
+ }
121
+
122
+ reduce(rule: GrammarRule) {
123
+ const n = new LRStackItem();
124
+ const l = rule.symbols.length;
125
+ n.children = this.stack.splice(l * -1, l);
126
+ n.children.forEach(v => delete v.state);
127
+ n.rule = rule;
128
+ n.symbol = rule.name;
129
+ this.stack.push(n);
130
+ }
131
+
132
+ add(symbol: GrammarRuleSymbol) {
133
+ this.stack.push(new LRStackItem())
134
+ this.current.symbol = symbol;
135
+ }
136
+ }
137
+
138
+
139
+ class LRStackItem {
140
+ children: LRStackItem[] = [];
141
+ state: State;
142
+ symbol: GrammarRuleSymbol;
143
+ rule: GrammarRule;
144
+ value: any;
145
+ }
146
+
147
+ type LanguageGrammar = LanguageDefinition['grammar'];
148
+ type StateItem = { rule: GrammarRule, dot: number };
149
+
150
+ interface StateOut {
151
+ action: Dictionary<StateItem[]>;
152
+ goto: Dictionary<StateItem[]>;
153
+ }