grammar-well 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bootstrap.ts +14 -7
- package/build/compiler/builtin.json +1 -0
- package/build/compiler/compiler.d.ts +2 -1
- package/build/compiler/compiler.js +43 -24
- package/build/compiler/compiler.js.map +1 -1
- package/build/compiler/gwell.d.ts +1050 -0
- package/build/compiler/gwell.js +555 -0
- package/build/compiler/gwell.js.map +1 -0
- package/build/index.d.ts +1 -0
- package/build/index.js +1 -0
- package/build/index.js.map +1 -1
- package/build/parser/algorithms/cyk.js +1 -1
- package/build/parser/algorithms/cyk.js.map +1 -1
- package/build/parser/algorithms/earley.js +10 -8
- package/build/parser/algorithms/earley.js.map +1 -1
- package/build/parser/algorithms/lr.js +47 -0
- package/build/parser/algorithms/lr.js.map +5 -1
- package/build/parser/algorithms/lr0.d.ts +7 -0
- package/build/parser/algorithms/lr0.js +156 -0
- package/build/parser/algorithms/lr0.js.map +1 -0
- package/build/parser/algorithms/lrk/algorithm.d.ts +7 -0
- package/build/parser/algorithms/lrk/algorithm.js +35 -0
- package/build/parser/algorithms/lrk/algorithm.js.map +1 -0
- package/build/parser/algorithms/lrk/bimap.d.ts +6 -0
- package/build/parser/algorithms/lrk/bimap.js +19 -0
- package/build/parser/algorithms/lrk/bimap.js.map +1 -0
- package/build/parser/algorithms/lrk/canonical-collection.d.ts +14 -0
- package/build/parser/algorithms/lrk/canonical-collection.js +73 -0
- package/build/parser/algorithms/lrk/canonical-collection.js.map +1 -0
- package/build/parser/algorithms/lrk/closure.d.ts +10 -0
- package/build/parser/algorithms/lrk/closure.js +30 -0
- package/build/parser/algorithms/lrk/closure.js.map +1 -0
- package/build/parser/algorithms/lrk/stack.d.ts +19 -0
- package/build/parser/algorithms/lrk/stack.js +39 -0
- package/build/parser/algorithms/lrk/stack.js.map +1 -0
- package/build/parser/algorithms/lrk/state.d.ts +12 -0
- package/build/parser/algorithms/lrk/state.js +3 -0
- package/build/parser/algorithms/lrk/state.js.map +1 -0
- package/build/parser/parser.d.ts +3 -3
- package/build/parser/parser.js +3 -3
- package/build/parser/parser.js.map +1 -1
- package/build/typings.d.ts +1 -0
- package/build/utility/monarch.d.ts +5 -0
- package/build/utility/monarch.js +42 -0
- package/build/utility/monarch.js.map +1 -0
- package/package.json +1 -1
- package/src/compiler/builtin/json.gwell +74 -0
- package/src/compiler/builtin/number.gwell +20 -0
- package/src/compiler/builtin/string.gwell +48 -0
- package/src/compiler/builtin/whitespace.gwell +10 -0
- package/src/compiler/builtin.json +1 -0
- package/src/compiler/compiler.ts +45 -24
- package/src/compiler/gwell.gwell +283 -0
- package/src/compiler/gwell.js +557 -0
- package/src/index.ts +2 -1
- package/src/parser/algorithms/cyk.ts +1 -1
- package/src/parser/algorithms/earley.ts +10 -10
- package/src/parser/algorithms/lrk/algorithm.ts +36 -0
- package/src/parser/algorithms/lrk/bimap.ts +17 -0
- package/src/parser/algorithms/lrk/canonical-collection.ts +79 -0
- package/src/parser/algorithms/lrk/closure.ts +37 -0
- package/src/parser/algorithms/lrk/stack.ts +53 -0
- package/src/parser/algorithms/lrk/state.ts +10 -0
- package/src/parser/parser.ts +5 -5
- package/src/typings.ts +1 -0
- package/src/utility/monarch.ts +36 -0
- package/src/parser/algorithms/lr.ts +0 -74
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export class BiMap<T>{
|
|
2
|
+
private map: Map<T, number> = new Map();
|
|
3
|
+
private items: T[] = [];
|
|
4
|
+
|
|
5
|
+
id(ref: T) {
|
|
6
|
+
if (!this.map.has(ref)) {
|
|
7
|
+
this.map.set(ref, this.items.length);
|
|
8
|
+
this.items.push(ref);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
return this.map.get(ref);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
fetch(index: number) {
|
|
15
|
+
return this.items[index];
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { GrammarRule, GrammarRuleSymbol, LanguageDefinition } from "../../../typings";
|
|
2
|
+
import { ParserUtility } from "../../parser";
|
|
3
|
+
import { BiMap } from "./bimap";
|
|
4
|
+
import { ClosureBuilder } from "./closure";
|
|
5
|
+
import { State } from "./state";
|
|
6
|
+
|
|
7
|
+
export class CanonicalCollection {
|
|
8
|
+
states: Map<string, State> = new Map();
|
|
9
|
+
rules: BiMap<GrammarRule> = new BiMap();
|
|
10
|
+
terminals: BiMap<GrammarRuleSymbol> = new BiMap();
|
|
11
|
+
|
|
12
|
+
private closure: ClosureBuilder;
|
|
13
|
+
constructor(
|
|
14
|
+
public grammar: LanguageDefinition['grammar']
|
|
15
|
+
) {
|
|
16
|
+
const augmented = {
|
|
17
|
+
name: Symbol() as unknown as string,
|
|
18
|
+
symbols: [grammar.start]
|
|
19
|
+
}
|
|
20
|
+
grammar['rules'][augmented.name] = [augmented];
|
|
21
|
+
this.closure = new ClosureBuilder(grammar);
|
|
22
|
+
this.rules.id(augmented);
|
|
23
|
+
this.addState(grammar['rules'][augmented.name][0], 0);
|
|
24
|
+
this.linkStates('0.0');
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
private addState(rule: GrammarRule, dot: number) {
|
|
28
|
+
const id = this.getStateId(rule, dot);
|
|
29
|
+
if (this.states.has(id))
|
|
30
|
+
return;
|
|
31
|
+
|
|
32
|
+
const state: State = {
|
|
33
|
+
items: [],
|
|
34
|
+
isFinal: false,
|
|
35
|
+
actions: new Map(),
|
|
36
|
+
goto: new Map(),
|
|
37
|
+
reduce: null,
|
|
38
|
+
rule: rule
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
state.items.push({ rule, dot });
|
|
42
|
+
if (rule.symbols.length == dot)
|
|
43
|
+
state.isFinal = true;
|
|
44
|
+
|
|
45
|
+
this.states.set(id, state);
|
|
46
|
+
|
|
47
|
+
state.items.push(...this.closure.get(rule.symbols[dot] as string))
|
|
48
|
+
|
|
49
|
+
if (!state.isFinal)
|
|
50
|
+
for (const { rule, dot } of state.items) {
|
|
51
|
+
this.addState(rule, dot + 1);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
private linkStates(id: string, completed: Set<string> = new Set()) {
|
|
56
|
+
completed.add(id);
|
|
57
|
+
const state = this.states.get(id);
|
|
58
|
+
if (!state.isFinal) {
|
|
59
|
+
for (const { rule, dot } of state.items) {
|
|
60
|
+
const symbol = rule.symbols[dot];
|
|
61
|
+
const itemStateId = this.getStateId(rule, dot + 1);
|
|
62
|
+
if (ParserUtility.SymbolIsTerminal(symbol) && typeof symbol != 'symbol') {
|
|
63
|
+
state.actions.set(symbol, itemStateId);
|
|
64
|
+
} else {
|
|
65
|
+
state.goto.set(symbol, itemStateId);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (!completed.has(itemStateId))
|
|
69
|
+
this.linkStates(itemStateId, completed);
|
|
70
|
+
}
|
|
71
|
+
} else {
|
|
72
|
+
state.reduce = this.rules.id(state.rule);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
private getStateId(rule: GrammarRule, dot: number) {
|
|
77
|
+
return this.rules.id(rule) + '.' + dot;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { GrammarRule, GrammarRuleSymbol, LanguageDefinition } from "../../../typings"
|
|
2
|
+
import { ParserUtility } from "../../parser"
|
|
3
|
+
|
|
4
|
+
export class ClosureBuilder {
|
|
5
|
+
constructor(
|
|
6
|
+
private grammar: LanguageDefinition['grammar'],
|
|
7
|
+
) { }
|
|
8
|
+
|
|
9
|
+
get(rule: string) {
|
|
10
|
+
const closure: RuleClosure = { items: [], visited: new Set() };
|
|
11
|
+
this.addClosure(closure, rule);
|
|
12
|
+
return closure.items;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
private addClosure(closure: RuleClosure, symbol: GrammarRuleSymbol) {
|
|
16
|
+
if (!ParserUtility.SymbolIsTerminal(symbol)) {
|
|
17
|
+
const key = symbol as string;
|
|
18
|
+
if (!(closure.visited.has(key))) {
|
|
19
|
+
closure.visited.add(key);
|
|
20
|
+
|
|
21
|
+
const rules = this.grammar.rules[key];
|
|
22
|
+
for (const rule of rules) {
|
|
23
|
+
closure.items.push({ rule, dot: 0 })
|
|
24
|
+
this.addClosure(closure, rule.symbols[0]);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
interface RuleClosure {
|
|
32
|
+
items: {
|
|
33
|
+
rule: GrammarRule,
|
|
34
|
+
dot: number,
|
|
35
|
+
}[]
|
|
36
|
+
visited: Set<GrammarRuleSymbol>;
|
|
37
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { GrammarRule, GrammarRuleSymbol } from "../../../typings";
|
|
2
|
+
import { State } from "./state";
|
|
3
|
+
|
|
4
|
+
export class LRStack {
|
|
5
|
+
|
|
6
|
+
stack: LRStackItem[] = [];
|
|
7
|
+
|
|
8
|
+
get current() {
|
|
9
|
+
return this.stack[this.stack.length - 1];
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
get previous() {
|
|
13
|
+
return this.stack[this.stack.length - 2];
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
shift(state: State) {
|
|
18
|
+
this.current.state = state;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
reduce(rule: GrammarRule) {
|
|
22
|
+
const n = LRStack.NewItem();
|
|
23
|
+
const l = rule.symbols.length;
|
|
24
|
+
n.children = this.stack.splice(l * -1, l);
|
|
25
|
+
n.children.forEach(v => delete v.state);
|
|
26
|
+
n.rule = rule;
|
|
27
|
+
n.symbol = rule.name;
|
|
28
|
+
this.stack.push(n);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
append(symbol: GrammarRuleSymbol) {
|
|
32
|
+
this.stack.push(LRStack.NewItem())
|
|
33
|
+
this.current.symbol = symbol;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
static NewItem(): LRStackItem {
|
|
37
|
+
return {
|
|
38
|
+
children: [],
|
|
39
|
+
state: null,
|
|
40
|
+
symbol: null,
|
|
41
|
+
rule: null,
|
|
42
|
+
value: null
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
interface LRStackItem {
|
|
48
|
+
children: LRStackItem[];
|
|
49
|
+
state: State;
|
|
50
|
+
symbol: GrammarRuleSymbol;
|
|
51
|
+
rule: GrammarRule;
|
|
52
|
+
value: any;
|
|
53
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { GrammarRule, GrammarRuleSymbol } from "../../../typings";
|
|
2
|
+
|
|
3
|
+
export interface State {
|
|
4
|
+
items: { rule: GrammarRule, dot: number }[];
|
|
5
|
+
isFinal: boolean;
|
|
6
|
+
actions: Map<GrammarRuleSymbol, string>;
|
|
7
|
+
goto: Map<GrammarRuleSymbol, string>;
|
|
8
|
+
reduce: number;
|
|
9
|
+
rule: GrammarRule
|
|
10
|
+
}
|
package/src/parser/parser.ts
CHANGED
|
@@ -4,12 +4,12 @@ import { TokenBuffer } from "../lexers/token-buffer";
|
|
|
4
4
|
import { GrammarRule, GrammarRuleSymbol, LanguageDefinition, LexerToken, ParserAlgorithm } from "../typings";
|
|
5
5
|
import { CYK } from "./algorithms/cyk";
|
|
6
6
|
import { Earley } from "./algorithms/earley";
|
|
7
|
-
import {
|
|
7
|
+
import { LRK } from "./algorithms/lrk/algorithm";
|
|
8
8
|
|
|
9
9
|
const ParserRegistry: { [key: string]: ParserAlgorithm } = {
|
|
10
10
|
earley: Earley,
|
|
11
11
|
cyk: CYK,
|
|
12
|
-
|
|
12
|
+
lr0: LRK
|
|
13
13
|
}
|
|
14
14
|
|
|
15
15
|
export function Parse(language: LanguageDefinition, input: string, options?: ParserOptions) {
|
|
@@ -42,9 +42,9 @@ export class Parser {
|
|
|
42
42
|
}
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
export class ParserUtility {
|
|
45
|
+
export abstract class ParserUtility {
|
|
46
46
|
|
|
47
|
-
static
|
|
47
|
+
static SymbolMatchesToken(symbol: GrammarRuleSymbol, token: LexerToken) {
|
|
48
48
|
if (typeof symbol === 'string')
|
|
49
49
|
throw 'Attempted to match token against non-terminal';
|
|
50
50
|
if (typeof symbol == 'function')
|
|
@@ -59,7 +59,7 @@ export class ParserUtility {
|
|
|
59
59
|
return symbol.literal === token.value;
|
|
60
60
|
}
|
|
61
61
|
|
|
62
|
-
static SymbolIsTerminal
|
|
62
|
+
static SymbolIsTerminal(symbol: GrammarRuleSymbol) {
|
|
63
63
|
return typeof symbol != 'string';
|
|
64
64
|
}
|
|
65
65
|
|
package/src/typings.ts
CHANGED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { LexerConfig } from "../typings";
|
|
2
|
+
|
|
3
|
+
export function CreateMonarchTokenizer(lexer: LexerConfig) {
|
|
4
|
+
const tokenizer: any = {}; // languages.IMonarchLanguage['tokenizer']
|
|
5
|
+
const { start, states } = lexer;
|
|
6
|
+
for (const key in states) {
|
|
7
|
+
const { name, rules } = states[key];
|
|
8
|
+
tokenizer[name] = [];
|
|
9
|
+
for (const rule of rules) {
|
|
10
|
+
if ('import' in rule) {
|
|
11
|
+
for (const i of rule.import) {
|
|
12
|
+
tokenizer[name].push({ include: i })
|
|
13
|
+
}
|
|
14
|
+
} else if ('pop' in rule) {
|
|
15
|
+
tokenizer[name].push([TransformWhen(rule.when), { token: rule.highlight || 'source', next: '@pop' }])
|
|
16
|
+
} else if ('goto' in rule) {
|
|
17
|
+
tokenizer[name].push([TransformWhen(rule.when), { token: rule.highlight || 'source', next: '@' + rule.goto }])
|
|
18
|
+
} else if ('set' in rule) {
|
|
19
|
+
tokenizer[name].push([TransformWhen(rule.when), { token: rule.highlight || 'source', switchTo: '@' + rule.set }])
|
|
20
|
+
} else if ('inset' in rule) {
|
|
21
|
+
tokenizer[name].push([TransformWhen(rule.when), { token: rule.highlight || 'source', next: '@push' }])
|
|
22
|
+
} else if ('when' in rule) {
|
|
23
|
+
tokenizer[name].push([TransformWhen(rule.when), { token: rule.highlight || 'source' }])
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return { start, tokenizer };
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function TransformWhen(obj) {
|
|
31
|
+
return typeof obj == 'string' ? new RegExp(RegexEscape(obj)) : new RegExp(obj.regex, obj.flags);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function RegexEscape(string) {
|
|
35
|
+
return string.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&')
|
|
36
|
+
}
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
import type { TokenBuffer } from "../../lexers/token-buffer";
|
|
2
|
-
import type { LanguageDefinition } from "../../typings";
|
|
3
|
-
import type { GrammarRule, GrammarRuleSymbol, LRState } from "../../typings";
|
|
4
|
-
import { ParserUtility } from "../parser";
|
|
5
|
-
|
|
6
|
-
export function LR(language: LanguageDefinition & { tokens: TokenBuffer }, _options = {}) {
|
|
7
|
-
const { lr, tokens } = language;
|
|
8
|
-
const { table } = lr;
|
|
9
|
-
const stack = new LRStack();
|
|
10
|
-
stack.push({ state: table['0.0'] });
|
|
11
|
-
|
|
12
|
-
let token;
|
|
13
|
-
|
|
14
|
-
// eslint-disable-next-line no-cond-assign
|
|
15
|
-
while (token = tokens.next()) {
|
|
16
|
-
for (const { symbol, next } of stack.current.state.actions) {
|
|
17
|
-
if (ParserUtility.TokenMatchesSymbol(token, symbol)) {
|
|
18
|
-
stack.push({ symbol, state: table[next], value: token });
|
|
19
|
-
break;
|
|
20
|
-
}
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
while (stack.current.state?.isFinal) {
|
|
24
|
-
const rule = stack.current.state.reduce;
|
|
25
|
-
stack.reduce(rule);
|
|
26
|
-
stack.current.value = ParserUtility.PostProcess(rule, stack.current.children.map(v => v.value));
|
|
27
|
-
const s = stack.previous?.state.goto[rule.name];
|
|
28
|
-
stack.shift(table[s]);
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
return { results: [stack.current.value] }
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class LRStack {
|
|
37
|
-
|
|
38
|
-
stack: LRStackItem[] = [];
|
|
39
|
-
|
|
40
|
-
get current() {
|
|
41
|
-
return this.stack[this.stack.length - 1];
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
get previous() {
|
|
45
|
-
return this.stack[this.stack.length - 2];
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
shift(state: LRState) {
|
|
49
|
-
this.current.state = state;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
reduce(rule: GrammarRule) {
|
|
53
|
-
const n = new LRStackItem();
|
|
54
|
-
const l = rule.symbols.length;
|
|
55
|
-
n.children = this.stack.splice(l * -1, l);
|
|
56
|
-
n.children.forEach(v => delete v.state);
|
|
57
|
-
n.rule = rule;
|
|
58
|
-
n.symbol = rule.name;
|
|
59
|
-
this.stack.push(n);
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
push(item: Partial<LRStackItem>) {
|
|
63
|
-
this.stack.push(new LRStackItem());
|
|
64
|
-
Object.assign(this.current, item);
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
class LRStackItem {
|
|
69
|
-
children: LRStackItem[] = [];
|
|
70
|
-
state: LRState;
|
|
71
|
-
symbol: GrammarRuleSymbol;
|
|
72
|
-
rule: GrammarRule;
|
|
73
|
-
value: any;
|
|
74
|
-
}
|