@chr33s/pdf-dfa 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +91 -0
  2. package/dfa.d.ts +44 -0
  3. package/dist/compile.d.ts +6 -0
  4. package/dist/compile.js +22 -0
  5. package/dist/compile.js.map +1 -0
  6. package/dist/dfa.d.ts +16 -0
  7. package/dist/dfa.js +81 -0
  8. package/dist/dfa.js.map +1 -0
  9. package/dist/grammar.d.ts +11 -0
  10. package/dist/grammar.js +1266 -0
  11. package/dist/grammar.js.map +1 -0
  12. package/dist/index.d.ts +3 -0
  13. package/dist/index.js +4 -0
  14. package/dist/index.js.map +1 -0
  15. package/dist/nodes.d.ts +113 -0
  16. package/dist/nodes.js +241 -0
  17. package/dist/nodes.js.map +1 -0
  18. package/dist/state-machine.d.ts +29 -0
  19. package/dist/state-machine.js +71 -0
  20. package/dist/state-machine.js.map +1 -0
  21. package/dist/symbol-table.d.ts +17 -0
  22. package/dist/symbol-table.js +64 -0
  23. package/dist/symbol-table.js.map +1 -0
  24. package/dist/utils.d.ts +12 -0
  25. package/dist/utils.js +34 -0
  26. package/dist/utils.js.map +1 -0
  27. package/package.json +41 -0
  28. package/scripts/build-grammar.ts +33 -0
  29. package/src/compile.ts +31 -0
  30. package/src/dfa.ts +104 -0
  31. package/src/grammar.js +1312 -0
  32. package/src/grammar.peg +72 -0
  33. package/src/index.ts +9 -0
  34. package/src/nodes.ts +308 -0
  35. package/src/state-machine.ts +94 -0
  36. package/src/symbol-table.ts +78 -0
  37. package/src/utils.ts +38 -0
  38. package/test/compile.test.ts +131 -0
  39. package/test/dfa.test.ts +87 -0
  40. package/test/nodes.test.ts +324 -0
  41. package/test/parse-build.test.ts +50 -0
  42. package/test/state-machine.test.ts +132 -0
  43. package/test/symbol-table.test.ts +69 -0
  44. package/test/utils.test.ts +108 -0
  45. package/tsconfig.json +16 -0
  46. package/tsconfig.test.json +8 -0
  47. package/tsconfig.typecheck.json +16 -0
  48. package/vitest.config.ts +8 -0
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Returns a new set representing the union of a and b.
3
+ */
4
+ export declare function union<T>(a: Set<T>, b: Iterable<T>): Set<T>;
5
+ /**
6
+ * Adds all items from the set b to a.
7
+ */
8
+ export declare function addAll<T>(target: Set<T>, source: Iterable<T>): void;
9
+ /**
10
+ * Returns whether two sets are equal
11
+ */
12
+ export declare function equal<T>(a: Set<T>, b: Set<T>): boolean;
package/dist/utils.js ADDED
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Returns a new set representing the union of a and b.
3
+ */
4
+ export function union(a, b) {
5
+ const result = new Set(a);
6
+ addAll(result, b);
7
+ return result;
8
+ }
9
+ /**
10
+ * Adds all items from the set b to a.
11
+ */
12
+ export function addAll(target, source) {
13
+ for (const item of source) {
14
+ target.add(item);
15
+ }
16
+ }
17
+ /**
18
+ * Returns whether two sets are equal
19
+ */
20
+ export function equal(a, b) {
21
+ if (a === b) {
22
+ return true;
23
+ }
24
+ if (a.size !== b.size) {
25
+ return false;
26
+ }
27
+ for (const item of a) {
28
+ if (!b.has(item)) {
29
+ return false;
30
+ }
31
+ }
32
+ return true;
33
+ }
34
+ //# sourceMappingURL=utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.js","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,UAAU,KAAK,CAAI,CAAS,EAAE,CAAc;IAChD,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;IAC1B,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAClB,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,MAAM,CAAI,MAAc,EAAE,MAAmB;IAC3D,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE,CAAC;QAC1B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACnB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,KAAK,CAAI,CAAS,EAAE,CAAS;IAC3C,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QACZ,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;QACtB,OAAO,KAAK,CAAC;IACf,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,CAAC,EAAE,CAAC;QACrB,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YACjB,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC"}
package/package.json ADDED
@@ -0,0 +1,41 @@
1
+ {
2
+ "name": "@chr33s/pdf-dfa",
3
+ "version": "5.0.0",
4
+ "description": "Deterministic finite automata compiler",
5
+ "type": "module",
6
+ "sideEffects": false,
7
+ "exports": {
8
+ ".": {
9
+ "types": "./dist/index.d.ts",
10
+ "default": "./dist/index.js"
11
+ }
12
+ },
13
+ "scripts": {
14
+ "build": "npm run clean && npm run generate && tsc",
15
+ "clean": "rm -rf dist src/grammar.js",
16
+ "generate": "node scripts/build-grammar.ts",
17
+ "test": "vitest run",
18
+ "typecheck": "tsc --project tsconfig.typecheck.json"
19
+ },
20
+ "repository": {
21
+ "type": "git",
22
+ "url": "https://github.com/chr33s/pdf.git"
23
+ },
24
+ "keywords": [
25
+ "state",
26
+ "machine",
27
+ "compiler"
28
+ ],
29
+ "author": "Devon Govett <devongovett@gmail.com>",
30
+ "bugs": {
31
+ "url": "https://github.com/chr33s/pdf/issues"
32
+ },
33
+ "license": "MIT",
34
+ "homepage": "https://github.com/chr33s/pdf",
35
+ "devDependencies": {
36
+ "@types/node": "24.10.1",
37
+ "peggy": "5.0.6",
38
+ "typescript": "5.9.3",
39
+ "vitest": "4.0.15"
40
+ }
41
+ }
@@ -0,0 +1,33 @@
1
+ import { readFile, writeFile } from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { fileURLToPath } from "node:url";
4
+ import peggy from "peggy";
5
+
6
+ const __filename = fileURLToPath(import.meta.url);
7
+ const __dirname = path.dirname(__filename);
8
+ const projectRoot = path.resolve(__dirname, "..");
9
+ const srcDir = path.join(projectRoot, "src");
10
+ const grammarPath = path.join(srcDir, "grammar.peg");
11
+ const outputPath = path.join(srcDir, "grammar.js");
12
+
13
+ async function buildGrammar(): Promise<void> {
14
+ const grammar = await readFile(grammarPath, "utf8");
15
+
16
+ const parserSource = peggy.generate(grammar, {
17
+ cache: true,
18
+ output: "source",
19
+ format: "es",
20
+ });
21
+
22
+ const banner = [
23
+ "// @ts-nocheck",
24
+ "// This file is generated by scripts/build-grammar.ts. Do not edit by hand.\n",
25
+ ].join("\n");
26
+ await writeFile(outputPath, `${banner}${parserSource}`);
27
+ console.log(`Generated ${path.relative(projectRoot, outputPath)}`);
28
+ }
29
+
30
+ buildGrammar().catch((error) => {
31
+ console.error("Failed to build grammar:", error);
32
+ process.exitCode = 1;
33
+ });
package/src/compile.ts ADDED
@@ -0,0 +1,31 @@
1
+ import buildDFA, { DFAState } from "./dfa.js";
2
+ import { parse as parseGrammar } from "./grammar.js";
3
+ import * as nodes from "./nodes.js";
4
+ import StateMachine, { StateMachineConfig } from "./state-machine.js";
5
+ import SymbolTable from "./symbol-table.js";
6
+
7
+ export type ExternalSymbols = Record<string, number>;
8
+
9
+ export function parse(source: string, externalSymbols: ExternalSymbols = {}): SymbolTable {
10
+ const ast = parseGrammar(source, { nodes });
11
+ return new SymbolTable(ast, externalSymbols);
12
+ }
13
+
14
+ export function build(symbolTable: SymbolTable): StateMachine {
15
+ const states: DFAState[] = buildDFA(symbolTable.main, symbolTable.size);
16
+
17
+ const config: StateMachineConfig = {
18
+ stateTable: states.map((state) => Array.from(state.transitions)),
19
+ accepting: states.map((state) => state.accepting),
20
+ tags: states.map((state) => Array.from(state.tags)),
21
+ };
22
+
23
+ return new StateMachine(config);
24
+ }
25
+
26
+ export default function compile(
27
+ source: string,
28
+ externalSymbols: ExternalSymbols = {},
29
+ ): StateMachine {
30
+ return build(parse(source, externalSymbols));
31
+ }
package/src/dfa.ts ADDED
@@ -0,0 +1,104 @@
1
+ import { Concatenation, EndMarker, ExpressionNode, Literal, PositionNode, Tag } from "./nodes.js";
2
+ import { addAll, equal } from "./utils.js";
3
+
4
+ const END_MARKER = new EndMarker();
5
+
6
+ export interface DFAState {
7
+ positions: Set<PositionNode>;
8
+ transitions: Uint16Array;
9
+ accepting: boolean;
10
+ marked: boolean;
11
+ tags: Set<string>;
12
+ }
13
+
14
+ /**
15
+ * This is an implementation of the direct regular expression to DFA algorithm described
16
+ * in section 3.9.5 of "Compilers: Principles, Techniques, and Tools" by Aho,
17
+ * Lam, Sethi, and Ullman. http://dragonbook.stanford.edu
18
+ * There is a PDF of the book here:
19
+ * http://www.informatik.uni-bremen.de/agbkb/lehre/ccfl/Material/ALSUdragonbook.pdf
20
+ */
21
+ export default function buildDFA(root: ExpressionNode, numSymbols: number): DFAState[] {
22
+ const augmentedRoot = new Concatenation(root, END_MARKER);
23
+ augmentedRoot.calcFollowpos();
24
+
25
+ const failState = new State(new Set<PositionNode>(), numSymbols);
26
+ const initialState = new State(augmentedRoot.firstpos, numSymbols);
27
+ const dstates: State[] = [failState, initialState];
28
+
29
+ // while there is an unmarked state S in dstates
30
+ while (true) {
31
+ let s: State | null = null;
32
+
33
+ for (let j = 1; j < dstates.length; j++) {
34
+ if (!dstates[j].marked) {
35
+ s = dstates[j];
36
+ break;
37
+ }
38
+ }
39
+
40
+ if (s == null) {
41
+ break;
42
+ }
43
+
44
+ // mark S
45
+ s.marked = true;
46
+
47
+ // for each input symbol a
48
+ for (let a = 0; a < numSymbols; a++) {
49
+ // let U be the union of followpos(p) for all
50
+ // p in S that correspond to a
51
+ const u = new Set<PositionNode>();
52
+ for (const p of s.positions) {
53
+ if (p instanceof Literal && p.value === a) {
54
+ addAll(u, p.followpos);
55
+ }
56
+ }
57
+
58
+ if (u.size === 0) {
59
+ continue;
60
+ }
61
+
62
+ // if U is not in dstates
63
+ let ux = -1;
64
+ for (let i = 0; i < dstates.length; i++) {
65
+ if (equal(u, dstates[i].positions)) {
66
+ ux = i;
67
+ break;
68
+ }
69
+ }
70
+
71
+ if (ux === -1) {
72
+ // Add U as an unmarked state to dstates
73
+ dstates.push(new State(u, numSymbols));
74
+ ux = dstates.length - 1;
75
+ }
76
+
77
+ s.transitions[a] = ux;
78
+ }
79
+ }
80
+
81
+ return dstates;
82
+ }
83
+
84
+ class State implements DFAState {
85
+ readonly positions: Set<PositionNode>;
86
+ readonly transitions: Uint16Array;
87
+ readonly accepting: boolean;
88
+ marked: boolean;
89
+ readonly tags: Set<string>;
90
+
91
+ constructor(positions: Set<PositionNode>, len: number) {
92
+ this.positions = positions;
93
+ this.transitions = new Uint16Array(len);
94
+ this.accepting = positions.has(END_MARKER);
95
+ this.marked = false;
96
+ this.tags = new Set<string>();
97
+
98
+ for (const pos of positions) {
99
+ if (pos instanceof Tag) {
100
+ this.tags.add(pos.name);
101
+ }
102
+ }
103
+ }
104
+ }