@chr33s/pdf-dfa 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -0
- package/dfa.d.ts +44 -0
- package/dist/compile.d.ts +6 -0
- package/dist/compile.js +22 -0
- package/dist/compile.js.map +1 -0
- package/dist/dfa.d.ts +16 -0
- package/dist/dfa.js +81 -0
- package/dist/dfa.js.map +1 -0
- package/dist/grammar.d.ts +11 -0
- package/dist/grammar.js +1266 -0
- package/dist/grammar.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -0
- package/dist/nodes.d.ts +113 -0
- package/dist/nodes.js +241 -0
- package/dist/nodes.js.map +1 -0
- package/dist/state-machine.d.ts +29 -0
- package/dist/state-machine.js +71 -0
- package/dist/state-machine.js.map +1 -0
- package/dist/symbol-table.d.ts +17 -0
- package/dist/symbol-table.js +64 -0
- package/dist/symbol-table.js.map +1 -0
- package/dist/utils.d.ts +12 -0
- package/dist/utils.js +34 -0
- package/dist/utils.js.map +1 -0
- package/package.json +41 -0
- package/scripts/build-grammar.ts +33 -0
- package/src/compile.ts +31 -0
- package/src/dfa.ts +104 -0
- package/src/grammar.js +1312 -0
- package/src/grammar.peg +72 -0
- package/src/index.ts +9 -0
- package/src/nodes.ts +308 -0
- package/src/state-machine.ts +94 -0
- package/src/symbol-table.ts +78 -0
- package/src/utils.ts +38 -0
- package/test/compile.test.ts +131 -0
- package/test/dfa.test.ts +87 -0
- package/test/nodes.test.ts +324 -0
- package/test/parse-build.test.ts +50 -0
- package/test/state-machine.test.ts +132 -0
- package/test/symbol-table.test.ts +69 -0
- package/test/utils.test.ts +108 -0
- package/tsconfig.json +16 -0
- package/tsconfig.test.json +8 -0
- package/tsconfig.typecheck.json +16 -0
- package/vitest.config.ts +8 -0
package/dist/utils.d.ts
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Returns a new set representing the union of a and b.
|
|
3
|
+
*/
|
|
4
|
+
export declare function union<T>(a: Set<T>, b: Iterable<T>): Set<T>;
|
|
5
|
+
/**
|
|
6
|
+
* Adds all items from the set b to a.
|
|
7
|
+
*/
|
|
8
|
+
export declare function addAll<T>(target: Set<T>, source: Iterable<T>): void;
|
|
9
|
+
/**
|
|
10
|
+
* Returns whether two sets are equal
|
|
11
|
+
*/
|
|
12
|
+
export declare function equal<T>(a: Set<T>, b: Set<T>): boolean;
|
package/dist/utils.js
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Returns a new set representing the union of a and b.
|
|
3
|
+
*/
|
|
4
|
+
export function union(a, b) {
|
|
5
|
+
const result = new Set(a);
|
|
6
|
+
addAll(result, b);
|
|
7
|
+
return result;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Adds all items from the set b to a.
|
|
11
|
+
*/
|
|
12
|
+
export function addAll(target, source) {
|
|
13
|
+
for (const item of source) {
|
|
14
|
+
target.add(item);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Returns whether two sets are equal
|
|
19
|
+
*/
|
|
20
|
+
export function equal(a, b) {
|
|
21
|
+
if (a === b) {
|
|
22
|
+
return true;
|
|
23
|
+
}
|
|
24
|
+
if (a.size !== b.size) {
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
for (const item of a) {
|
|
28
|
+
if (!b.has(item)) {
|
|
29
|
+
return false;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return true;
|
|
33
|
+
}
|
|
34
|
+
//# sourceMappingURL=utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.js","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,UAAU,KAAK,CAAI,CAAS,EAAE,CAAc;IAChD,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;IAC1B,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAClB,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,MAAM,CAAI,MAAc,EAAE,MAAmB;IAC3D,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE,CAAC;QAC1B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACnB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,KAAK,CAAI,CAAS,EAAE,CAAS;IAC3C,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QACZ,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;QACtB,OAAO,KAAK,CAAC;IACf,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,CAAC,EAAE,CAAC;QACrB,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YACjB,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@chr33s/pdf-dfa",
|
|
3
|
+
"version": "5.0.0",
|
|
4
|
+
"description": "Deterministic finite automata compiler",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"sideEffects": false,
|
|
7
|
+
"exports": {
|
|
8
|
+
".": {
|
|
9
|
+
"types": "./dist/index.d.ts",
|
|
10
|
+
"default": "./dist/index.js"
|
|
11
|
+
}
|
|
12
|
+
},
|
|
13
|
+
"scripts": {
|
|
14
|
+
"build": "npm run clean && npm run generate && tsc",
|
|
15
|
+
"clean": "rm -rf dist src/grammar.js",
|
|
16
|
+
"generate": "node scripts/build-grammar.ts",
|
|
17
|
+
"test": "vitest run",
|
|
18
|
+
"typecheck": "tsc --project tsconfig.typecheck.json"
|
|
19
|
+
},
|
|
20
|
+
"repository": {
|
|
21
|
+
"type": "git",
|
|
22
|
+
"url": "https://github.com/chr33s/pdf.git"
|
|
23
|
+
},
|
|
24
|
+
"keywords": [
|
|
25
|
+
"state",
|
|
26
|
+
"machine",
|
|
27
|
+
"compiler"
|
|
28
|
+
],
|
|
29
|
+
"author": "Devon Govett <devongovett@gmail.com>",
|
|
30
|
+
"bugs": {
|
|
31
|
+
"url": "https://github.com/chr33s/pdf/issues"
|
|
32
|
+
},
|
|
33
|
+
"license": "MIT",
|
|
34
|
+
"homepage": "https://github.com/chr33s/pdf",
|
|
35
|
+
"devDependencies": {
|
|
36
|
+
"@types/node": "24.10.1",
|
|
37
|
+
"peggy": "5.0.6",
|
|
38
|
+
"typescript": "5.9.3",
|
|
39
|
+
"vitest": "4.0.15"
|
|
40
|
+
}
|
|
41
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { readFile, writeFile } from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { fileURLToPath } from "node:url";
|
|
4
|
+
import peggy from "peggy";
|
|
5
|
+
|
|
6
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
7
|
+
const __dirname = path.dirname(__filename);
|
|
8
|
+
const projectRoot = path.resolve(__dirname, "..");
|
|
9
|
+
const srcDir = path.join(projectRoot, "src");
|
|
10
|
+
const grammarPath = path.join(srcDir, "grammar.peg");
|
|
11
|
+
const outputPath = path.join(srcDir, "grammar.js");
|
|
12
|
+
|
|
13
|
+
async function buildGrammar(): Promise<void> {
|
|
14
|
+
const grammar = await readFile(grammarPath, "utf8");
|
|
15
|
+
|
|
16
|
+
const parserSource = peggy.generate(grammar, {
|
|
17
|
+
cache: true,
|
|
18
|
+
output: "source",
|
|
19
|
+
format: "es",
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
const banner = [
|
|
23
|
+
"// @ts-nocheck",
|
|
24
|
+
"// This file is generated by scripts/build-grammar.ts. Do not edit by hand.\n",
|
|
25
|
+
].join("\n");
|
|
26
|
+
await writeFile(outputPath, `${banner}${parserSource}`);
|
|
27
|
+
console.log(`Generated ${path.relative(projectRoot, outputPath)}`);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
buildGrammar().catch((error) => {
|
|
31
|
+
console.error("Failed to build grammar:", error);
|
|
32
|
+
process.exitCode = 1;
|
|
33
|
+
});
|
package/src/compile.ts
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import buildDFA, { DFAState } from "./dfa.js";
|
|
2
|
+
import { parse as parseGrammar } from "./grammar.js";
|
|
3
|
+
import * as nodes from "./nodes.js";
|
|
4
|
+
import StateMachine, { StateMachineConfig } from "./state-machine.js";
|
|
5
|
+
import SymbolTable from "./symbol-table.js";
|
|
6
|
+
|
|
7
|
+
export type ExternalSymbols = Record<string, number>;
|
|
8
|
+
|
|
9
|
+
export function parse(source: string, externalSymbols: ExternalSymbols = {}): SymbolTable {
|
|
10
|
+
const ast = parseGrammar(source, { nodes });
|
|
11
|
+
return new SymbolTable(ast, externalSymbols);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export function build(symbolTable: SymbolTable): StateMachine {
|
|
15
|
+
const states: DFAState[] = buildDFA(symbolTable.main, symbolTable.size);
|
|
16
|
+
|
|
17
|
+
const config: StateMachineConfig = {
|
|
18
|
+
stateTable: states.map((state) => Array.from(state.transitions)),
|
|
19
|
+
accepting: states.map((state) => state.accepting),
|
|
20
|
+
tags: states.map((state) => Array.from(state.tags)),
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
return new StateMachine(config);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export default function compile(
|
|
27
|
+
source: string,
|
|
28
|
+
externalSymbols: ExternalSymbols = {},
|
|
29
|
+
): StateMachine {
|
|
30
|
+
return build(parse(source, externalSymbols));
|
|
31
|
+
}
|
package/src/dfa.ts
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import { Concatenation, EndMarker, ExpressionNode, Literal, PositionNode, Tag } from "./nodes.js";
|
|
2
|
+
import { addAll, equal } from "./utils.js";
|
|
3
|
+
|
|
4
|
+
const END_MARKER = new EndMarker();
|
|
5
|
+
|
|
6
|
+
export interface DFAState {
|
|
7
|
+
positions: Set<PositionNode>;
|
|
8
|
+
transitions: Uint16Array;
|
|
9
|
+
accepting: boolean;
|
|
10
|
+
marked: boolean;
|
|
11
|
+
tags: Set<string>;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* This is an implementation of the direct regular expression to DFA algorithm described
|
|
16
|
+
* in section 3.9.5 of "Compilers: Principles, Techniques, and Tools" by Aho,
|
|
17
|
+
* Lam, Sethi, and Ullman. http://dragonbook.stanford.edu
|
|
18
|
+
* There is a PDF of the book here:
|
|
19
|
+
* http://www.informatik.uni-bremen.de/agbkb/lehre/ccfl/Material/ALSUdragonbook.pdf
|
|
20
|
+
*/
|
|
21
|
+
export default function buildDFA(root: ExpressionNode, numSymbols: number): DFAState[] {
|
|
22
|
+
const augmentedRoot = new Concatenation(root, END_MARKER);
|
|
23
|
+
augmentedRoot.calcFollowpos();
|
|
24
|
+
|
|
25
|
+
const failState = new State(new Set<PositionNode>(), numSymbols);
|
|
26
|
+
const initialState = new State(augmentedRoot.firstpos, numSymbols);
|
|
27
|
+
const dstates: State[] = [failState, initialState];
|
|
28
|
+
|
|
29
|
+
// while there is an unmarked state S in dstates
|
|
30
|
+
while (true) {
|
|
31
|
+
let s: State | null = null;
|
|
32
|
+
|
|
33
|
+
for (let j = 1; j < dstates.length; j++) {
|
|
34
|
+
if (!dstates[j].marked) {
|
|
35
|
+
s = dstates[j];
|
|
36
|
+
break;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (s == null) {
|
|
41
|
+
break;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// mark S
|
|
45
|
+
s.marked = true;
|
|
46
|
+
|
|
47
|
+
// for each input symbol a
|
|
48
|
+
for (let a = 0; a < numSymbols; a++) {
|
|
49
|
+
// let U be the union of followpos(p) for all
|
|
50
|
+
// p in S that correspond to a
|
|
51
|
+
const u = new Set<PositionNode>();
|
|
52
|
+
for (const p of s.positions) {
|
|
53
|
+
if (p instanceof Literal && p.value === a) {
|
|
54
|
+
addAll(u, p.followpos);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (u.size === 0) {
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// if U is not in dstates
|
|
63
|
+
let ux = -1;
|
|
64
|
+
for (let i = 0; i < dstates.length; i++) {
|
|
65
|
+
if (equal(u, dstates[i].positions)) {
|
|
66
|
+
ux = i;
|
|
67
|
+
break;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (ux === -1) {
|
|
72
|
+
// Add U as an unmarked state to dstates
|
|
73
|
+
dstates.push(new State(u, numSymbols));
|
|
74
|
+
ux = dstates.length - 1;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
s.transitions[a] = ux;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return dstates;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
class State implements DFAState {
|
|
85
|
+
readonly positions: Set<PositionNode>;
|
|
86
|
+
readonly transitions: Uint16Array;
|
|
87
|
+
readonly accepting: boolean;
|
|
88
|
+
marked: boolean;
|
|
89
|
+
readonly tags: Set<string>;
|
|
90
|
+
|
|
91
|
+
constructor(positions: Set<PositionNode>, len: number) {
|
|
92
|
+
this.positions = positions;
|
|
93
|
+
this.transitions = new Uint16Array(len);
|
|
94
|
+
this.accepting = positions.has(END_MARKER);
|
|
95
|
+
this.marked = false;
|
|
96
|
+
this.tags = new Set<string>();
|
|
97
|
+
|
|
98
|
+
for (const pos of positions) {
|
|
99
|
+
if (pos instanceof Tag) {
|
|
100
|
+
this.tags.add(pos.name);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|