ogopego 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ogopego-0.1.7/.gitignore +45 -0
- ogopego-0.1.7/CUT.md +194 -0
- ogopego-0.1.7/LICENSE +201 -0
- ogopego-0.1.7/PKG-INFO +130 -0
- ogopego-0.1.7/README.md +121 -0
- ogopego-0.1.7/SYNTAX.md +947 -0
- ogopego-0.1.7/api/api.go +167 -0
- ogopego-0.1.7/api/api_test.go +72 -0
- ogopego-0.1.7/api/pyapi.go +190 -0
- ogopego-0.1.7/cmd/cli/cli.go +131 -0
- ogopego-0.1.7/cmd/cli/cli_main.go +348 -0
- ogopego-0.1.7/cmd/cli/progress.go +160 -0
- ogopego-0.1.7/cmd/cli/pygmentize.go +19 -0
- ogopego-0.1.7/cmd/doc.go +5 -0
- ogopego-0.1.7/cmd/main.go +46 -0
- ogopego-0.1.7/doc.go +6 -0
- ogopego-0.1.7/go.mod +22 -0
- ogopego-0.1.7/go.sum +34 -0
- ogopego-0.1.7/grammar/calc.ebnf +38 -0
- ogopego-0.1.7/grammar/calc.json +347 -0
- ogopego-0.1.7/grammar/tatsu.ebnf +290 -0
- ogopego-0.1.7/grammar/tatsu.json +4096 -0
- ogopego-0.1.7/ogopego.go +16 -0
- ogopego-0.1.7/pkg/asjson/asjson.go +115 -0
- ogopego-0.1.7/pkg/asjson/asjson_test.go +178 -0
- ogopego-0.1.7/pkg/asjson/doc.go +4 -0
- ogopego-0.1.7/pkg/asjson/error.go +18 -0
- ogopego-0.1.7/pkg/config/config.go +143 -0
- ogopego-0.1.7/pkg/config/doc.go +6 -0
- ogopego-0.1.7/pkg/context/context.go +15 -0
- ogopego-0.1.7/pkg/context/ctx.go +98 -0
- ogopego-0.1.7/pkg/context/ctx_core.go +416 -0
- ogopego-0.1.7/pkg/context/ctx_test.go +277 -0
- ogopego-0.1.7/pkg/context/doc.go +4 -0
- ogopego-0.1.7/pkg/context/error.go +55 -0
- ogopego-0.1.7/pkg/context/memento.go +84 -0
- ogopego-0.1.7/pkg/context/memento_test.go +104 -0
- ogopego-0.1.7/pkg/context/memo.go +37 -0
- ogopego-0.1.7/pkg/context/memo_test.go +83 -0
- ogopego-0.1.7/pkg/context/state.go +100 -0
- ogopego-0.1.7/pkg/context/trace.go +203 -0
- ogopego-0.1.7/pkg/context/trace_test.go +225 -0
- ogopego-0.1.7/pkg/input/configure_test.go +91 -0
- ogopego-0.1.7/pkg/input/cursor.go +81 -0
- ogopego-0.1.7/pkg/input/cursor_str.go +449 -0
- ogopego-0.1.7/pkg/input/cursor_str_test.go +384 -0
- ogopego-0.1.7/pkg/input/doc.go +5 -0
- ogopego-0.1.7/pkg/input/input.go +17 -0
- ogopego-0.1.7/pkg/input/patterns.go +79 -0
- ogopego-0.1.7/pkg/peg/analysis.go +94 -0
- ogopego-0.1.7/pkg/peg/asjson_test.go +37 -0
- ogopego-0.1.7/pkg/peg/boot.go +58 -0
- ogopego-0.1.7/pkg/peg/boot_test.go +35 -0
- ogopego-0.1.7/pkg/peg/compile.go +631 -0
- ogopego-0.1.7/pkg/peg/doc.go +6 -0
- ogopego-0.1.7/pkg/peg/export.go +191 -0
- ogopego-0.1.7/pkg/peg/import.go +593 -0
- ogopego-0.1.7/pkg/peg/import_test.go +86 -0
- ogopego-0.1.7/pkg/peg/leftrec.go +224 -0
- ogopego-0.1.7/pkg/peg/link.go +98 -0
- ogopego-0.1.7/pkg/peg/link_test.go +199 -0
- ogopego-0.1.7/pkg/peg/link_validate.go +90 -0
- ogopego-0.1.7/pkg/peg/model.go +82 -0
- ogopego-0.1.7/pkg/peg/model_call.go +128 -0
- ogopego-0.1.7/pkg/peg/model_choice.go +59 -0
- ogopego-0.1.7/pkg/peg/model_choice_go.go +51 -0
- ogopego-0.1.7/pkg/peg/model_closure.go +27 -0
- ogopego-0.1.7/pkg/peg/model_constant.go +26 -0
- ogopego-0.1.7/pkg/peg/model_cut.go +20 -0
- ogopego-0.1.7/pkg/peg/model_dot.go +24 -0
- ogopego-0.1.7/pkg/peg/model_emptyclosure.go +18 -0
- ogopego-0.1.7/pkg/peg/model_eof.go +27 -0
- ogopego-0.1.7/pkg/peg/model_eol.go +26 -0
- ogopego-0.1.7/pkg/peg/model_fail.go +14 -0
- ogopego-0.1.7/pkg/peg/model_grammar.go +159 -0
- ogopego-0.1.7/pkg/peg/model_grammar_directives_test.go +54 -0
- ogopego-0.1.7/pkg/peg/model_group.go +16 -0
- ogopego-0.1.7/pkg/peg/model_join.go +54 -0
- ogopego-0.1.7/pkg/peg/model_lookahead.go +55 -0
- ogopego-0.1.7/pkg/peg/model_named.go +38 -0
- ogopego-0.1.7/pkg/peg/model_null.go +15 -0
- ogopego-0.1.7/pkg/peg/model_optional.go +31 -0
- ogopego-0.1.7/pkg/peg/model_override.go +39 -0
- ogopego-0.1.7/pkg/peg/model_pattern.go +23 -0
- ogopego-0.1.7/pkg/peg/model_repr.go +186 -0
- ogopego-0.1.7/pkg/peg/model_rule.go +99 -0
- ogopego-0.1.7/pkg/peg/model_ruleinclude.go +24 -0
- ogopego-0.1.7/pkg/peg/model_sequence.go +39 -0
- ogopego-0.1.7/pkg/peg/model_skipgroup.go +19 -0
- ogopego-0.1.7/pkg/peg/model_skipto.go +34 -0
- ogopego-0.1.7/pkg/peg/model_synth.go +16 -0
- ogopego-0.1.7/pkg/peg/model_token.go +26 -0
- ogopego-0.1.7/pkg/peg/model_void.go +16 -0
- ogopego-0.1.7/pkg/peg/node.go +135 -0
- ogopego-0.1.7/pkg/peg/node_test.go +247 -0
- ogopego-0.1.7/pkg/peg/nullability.go +103 -0
- ogopego-0.1.7/pkg/peg/optimize.go +115 -0
- ogopego-0.1.7/pkg/peg/parse_repeat.go +103 -0
- ogopego-0.1.7/pkg/peg/parse_test.go +558 -0
- ogopego-0.1.7/pkg/peg/peg.go +10 -0
- ogopego-0.1.7/pkg/peg/repr_gen.go +30 -0
- ogopego-0.1.7/pkg/peg/repr_pretty.go +382 -0
- ogopego-0.1.7/pkg/peg/repr_pretty_test.go +533 -0
- ogopego-0.1.7/pkg/peg/repr_rails.go +474 -0
- ogopego-0.1.7/pkg/tool/model_repr.go +327 -0
- ogopego-0.1.7/pkg/trees/doc.go +4 -0
- ogopego-0.1.7/pkg/trees/tree.go +204 -0
- ogopego-0.1.7/pkg/trees/tree_leaf.go +80 -0
- ogopego-0.1.7/pkg/trees/tree_list.go +34 -0
- ogopego-0.1.7/pkg/trees/tree_map.go +13 -0
- ogopego-0.1.7/pkg/trees/tree_named.go +59 -0
- ogopego-0.1.7/pkg/trees/tree_node.go +13 -0
- ogopego-0.1.7/pkg/trees/tree_test.go +279 -0
- ogopego-0.1.7/pkg/util/container/boundedmap.go +160 -0
- ogopego-0.1.7/pkg/util/container/boundedmap_test.go +213 -0
- ogopego-0.1.7/pkg/util/countlines.go +45 -0
- ogopego-0.1.7/pkg/util/countlines_test.go +88 -0
- ogopego-0.1.7/pkg/util/doc.go +5 -0
- ogopego-0.1.7/pkg/util/heartbeat/heartbeat.go +9 -0
- ogopego-0.1.7/pkg/util/id.go +37 -0
- ogopego-0.1.7/pkg/util/indent.go +4 -0
- ogopego-0.1.7/pkg/util/misc.go +61 -0
- ogopego-0.1.7/pkg/util/newlines/newlines.go +112 -0
- ogopego-0.1.7/pkg/util/newlines/newlines_test.go +39 -0
- ogopego-0.1.7/pkg/util/pathts.go +30 -0
- ogopego-0.1.7/pkg/util/pubmap.go +63 -0
- ogopego-0.1.7/pkg/util/pyre/pyre.go +20 -0
- ogopego-0.1.7/pkg/util/pyre/types.go +31 -0
- ogopego-0.1.7/pkg/util/pyre/ximpl_goregexp.go +251 -0
- ogopego-0.1.7/pkg/util/repr.go +192 -0
- ogopego-0.1.7/pkg/util/repr_test.go +136 -0
- ogopego-0.1.7/pkg/util/strings.go +71 -0
- ogopego-0.1.7/pkg/util/version.go +42 -0
- ogopego-0.1.7/pyproject.toml +46 -0
- ogopego-0.1.7/python/hatch_build.py +56 -0
- ogopego-0.1.7/python/ogopego/__init__.py +0 -0
- ogopego-0.1.7/python/ogopego/__main__.py +17 -0
- ogopego-0.1.7/python/ogopego/_ogopego.py +298 -0
- ogopego-0.1.7/python/ogopego/build.py +206 -0
- ogopego-0.1.7/python/ogopego/getgo.py +94 -0
- ogopego-0.1.7/python/release.py +72 -0
- ogopego-0.1.7/test/basic_grammar_test.go +44 -0
- ogopego-0.1.7/test/cli_test.go +83 -0
- ogopego-0.1.7/test/complex_grammars_test.go +67 -0
- ogopego-0.1.7/test/constraints_test.go +41 -0
- ogopego-0.1.7/test/defines_test.go +26 -0
- ogopego-0.1.7/test/directive_test.go +14 -0
- ogopego-0.1.7/test/directives_test.go +79 -0
- ogopego-0.1.7/test/ebnf_test.go +24 -0
- ogopego-0.1.7/test/eol_test.go +72 -0
- ogopego-0.1.7/test/error_handling_test.go +31 -0
- ogopego-0.1.7/test/error_test.go +17 -0
- ogopego-0.1.7/test/explore_tree_test.go +93 -0
- ogopego-0.1.7/test/grammar_structure_test.go +35 -0
- ogopego-0.1.7/test/input_positions_test.go +14 -0
- ogopego-0.1.7/test/java_end_to_end_test.go +504 -0
- ogopego-0.1.7/test/java_grammar_test.go +33 -0
- ogopego-0.1.7/test/join_test.go +17 -0
- ogopego-0.1.7/test/keyword_test.go +14 -0
- ogopego-0.1.7/test/left_recursion_test.go +18 -0
- ogopego-0.1.7/test/lookahead_test.go +14 -0
- ogopego-0.1.7/test/misc_test.go +17 -0
- ogopego-0.1.7/test/model_repr_test.go +119 -0
- ogopego-0.1.7/test/model_test.go +23 -0
- ogopego-0.1.7/test/naming_test.go +21 -0
- ogopego-0.1.7/test/parsing_test.go +60 -0
- ogopego-0.1.7/test/patterns_test.go +45 -0
- ogopego-0.1.7/test/pretty_test.go +30 -0
- ogopego-0.1.7/test/pyre_test.go +96 -0
- ogopego-0.1.7/test/roundtrip_test.go +129 -0
- ogopego-0.1.7/test/special_forms_test.go +52 -0
- ogopego-0.1.7/test/string_test.go +18 -0
- ogopego-0.1.7/test/testutil.go +61 -0
- ogopego-0.1.7/test/tokens_and_sequences_test.go +54 -0
ogopego-0.1.7/.gitignore
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# If you prefer the allow list template instead of the deny list, see community template:
|
|
2
|
+
# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
|
|
3
|
+
#
|
|
4
|
+
# Binaries for programs and plugins
|
|
5
|
+
*.exe
|
|
6
|
+
*.exe~
|
|
7
|
+
*.dll
|
|
8
|
+
*.so
|
|
9
|
+
*.dylib
|
|
10
|
+
|
|
11
|
+
# Test binary, built with `go test -c`
|
|
12
|
+
*.test
|
|
13
|
+
|
|
14
|
+
# Code coverage profiles and other test artifacts
|
|
15
|
+
*.out
|
|
16
|
+
coverage.*
|
|
17
|
+
*.coverprofile
|
|
18
|
+
profile.cov
|
|
19
|
+
|
|
20
|
+
# Dependency directories (remove the comment below to include it)
|
|
21
|
+
# vendor/
|
|
22
|
+
|
|
23
|
+
# Go workspace file
|
|
24
|
+
go.work
|
|
25
|
+
go.work.sum
|
|
26
|
+
|
|
27
|
+
# env file
|
|
28
|
+
.env
|
|
29
|
+
|
|
30
|
+
# Editor/IDE
|
|
31
|
+
.idea/
|
|
32
|
+
.vscode/
|
|
33
|
+
|
|
34
|
+
# Filters for build distributions
|
|
35
|
+
grammar/java.*
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# Other projects symlinked
|
|
39
|
+
x/
|
|
40
|
+
.corpus/
|
|
41
|
+
tmp/
|
|
42
|
+
.aider*
|
|
43
|
+
|
|
44
|
+
_out/
|
|
45
|
+
*.pprof
|
ogopego-0.1.7/CUT.md
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
You are likely thinking of **Mizushima, Maeda, and Yamaguchi** (2010), who introduced the formal concept of the **"Cut" operator** in PEG parsing to control backtracking and prevent the worst-case exponential time complexity of Packrat parsing, specifically addressing memory consumption. Another notable researcher in this exact space is **Roberto Ierusalimschy** (the creator of Lua and LPeg), who wrote extensively on syntactic sugar and semantic actions in PEGs.
|
|
2
|
+
|
|
3
|
+
Your empirical discovery is entirely correct: **a Cut executed on a successful path must not propagate upward past the choice sequence that successfully consumed it.**
|
|
4
|
+
|
|
5
|
+
Honoring cut propagation on a success path breaks the core mathematical properties of PEGs. Here is the structural breakdown of why this happens and why your Java grammar is failing.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## The Core Semantics of Cut
|
|
10
|
+
|
|
11
|
+
The purpose of a Cut (often written as `↑` or `cut`) is to signal a **point of no return**. It explicitly tells the parser: *"If you have made it this far, you are in the correct syntactic lane. If a parsing failure happens after this point, do not attempt alternative choices; fail the entire enclosing sequence immediately."*
|
|
12
|
+
|
|
13
|
+
### The Divergence: Success vs. Failure Paths
|
|
14
|
+
|
|
15
|
+
The critical distinction lies in how the cut flag interacts with the parser’s stack when a rule finishes evaluation:
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
[ Parser evaluates Rule containing Cut ]
|
|
19
|
+
|
|
|
20
|
+
---------------------------------------
|
|
21
|
+
| |
|
|
22
|
+
[ Path Fails ] [ Path Succeeds ]
|
|
23
|
+
| |
|
|
24
|
+
Propagate Cut Upward! Clear / Consume the Cut!
|
|
25
|
+
(Aborts alternative branches) (Commit sequence is finalized)
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### 1. The Failure Path (Where Cut works)
|
|
30
|
+
|
|
31
|
+
When a sequence fails *after* a cut has been encountered, the cut status **must propagate**. It acts as a hard abort. It bubbles up to the nearest ordered choice operator (`/`), completely clears its alternative branches, and forces that choice to fail immediately instead of trying the next option.
|
|
32
|
+
|
|
33
|
+
### 2. The Success Path (Where Cut must die)
|
|
34
|
+
|
|
35
|
+
When a sequence containing a cut successfully matches its entire expression, **the cut has done its job and is resolved.** The parser has successfully moved past the ambiguity.
|
|
36
|
+
|
|
37
|
+
If you propagate the cut status onward after a *success*, you are carrying a "poison pill" into subsequent parsing steps.
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## Why "Cut on Success" Breaks the Java Grammar
|
|
42
|
+
|
|
43
|
+
Consider a classic grammar pattern found heavily in complex languages like Java, such as distinguishing between a qualified type name, a method invocation, or an expression statement:
|
|
44
|
+
|
|
45
|
+
$$Expression \leftarrow StatementExpression \;/\; VariableDeclaration$$
|
|
46
|
+
|
|
47
|
+
Inside $StatementExpression$, you might have a rule like:
|
|
48
|
+
|
|
49
|
+
$$StatementExpression \leftarrow PrimaryExpr \; \uparrow \; \text{'.' } \; Identifier \; \text{'(' } \; Arguments \; \text{')'}$$
|
|
50
|
+
|
|
51
|
+
### What happens if Cut propagates on Success:
|
|
52
|
+
|
|
53
|
+
1. The parser tries the first choice: $StatementExpression$.
|
|
54
|
+
2. It evaluates $PrimaryExpr$. It succeeds.
|
|
55
|
+
3. It hits the Cut ($\uparrow$). The cut flag is activated.
|
|
56
|
+
4. The parser successfully matches the rest of the line: `.` , `Identifier`, `(`, `Arguments`, `)`.
|
|
57
|
+
5. **The rule succeeds.**
|
|
58
|
+
|
|
59
|
+
If your engine **propagates** the cut on success, the top-level expression rule receives a status of `(Success, Cut=True)`.
|
|
60
|
+
|
|
61
|
+
Now imagine this statement was just one element inside a larger choice array, like a Java block statement:
|
|
62
|
+
|
|
63
|
+
$$BlockStatement \leftarrow Expression \;/\; LocalVariableDeclaration$$
|
|
64
|
+
|
|
65
|
+
Because the previous statement returned `Cut=True` upon success, it accidentally pins the parser. If a *subsequent* independent rule down the line fails, the engine looks at the lingering `Cut=True` flag, misinterprets it as a localized commitment failure, and aborts the entire parse instead of backtracking cleanly to try $LocalVariableDeclaration$.
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## The Repair
|
|
70
|
+
|
|
71
|
+
To fix the engine semantics in both Go and Rust, the cut flag must be scoped explicitly to the **ordered choice operator (`/`) on the failure path only**.
|
|
72
|
+
|
|
73
|
+
1. **On Entering a Choice (`/`):** Save the current cut state of the parser.
|
|
74
|
+
2. **On Evaluating Choices:** Try Choice A.
|
|
75
|
+
3. **If Choice A Fails:**
|
|
76
|
+
* Check if the cut flag was set *during* Choice A's execution.
|
|
77
|
+
* If `Cut == true`, **do not try Choice B**. Propagate the failure immediately up the stack.
|
|
78
|
+
* If `Cut == false`, reset the flag to the saved state and try Choice B.
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
4. **If Choice A Succeeds:** **Clear the cut flag** (or revert it to the state it was in before entering the choice). A successful match neutralizes any internal cuts encountered within that branch.
|
|
82
|
+
|
|
83
|
+
By killing the cut flag on a successful path return, your Go and Rust engines will mirror the deterministic behavior expected by the Java grammar.
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
It is an incredible realization when the architecture simplifies itself by shedding unneeded weight. Having a success path return *strictly* a tree (or AST node) is the clean, monomorphic design Go and Rust thrive on.
|
|
88
|
+
|
|
89
|
+
To understand why the backwards pass of cut must belong exclusively to the failure path, we have to look at the exact mechanism of the Cut operator: **it is a filter on choice, not a property of grammar matching.**
|
|
90
|
+
|
|
91
|
+
### The Geometry of Backtracking
|
|
92
|
+
|
|
93
|
+
In a PEG, the only thing that creates alternative futures is the ordered choice operator (`/`). When the parser executes a choice, it opens a speculative branch.
|
|
94
|
+
|
|
95
|
+
A Cut does not change how tokens match. It does only one thing: it **destroys alternative futures**. It deletes the backtracking checkpoints that the ordered choice operator just created.
|
|
96
|
+
|
|
97
|
+
Because of this, the cut flag is structurally an *exception mechanism* for the control flow.
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
### Why Success and Cut Are Mutually Exclusive
|
|
102
|
+
|
|
103
|
+
When a sequence containing a cut completes successfully, the parser has successfully escaped the zone of ambiguity. The speculative branch has now become history—it is a closed, resolved path.
|
|
104
|
+
|
|
105
|
+
If you pass the cut status backward on a successful path, you are telling the parent rule: *"I succeeded, but if you or any of your siblings fail in the future, you are forbidden from backtracking."*
|
|
106
|
+
|
|
107
|
+
This breaks the parser for two structural reasons:
|
|
108
|
+
|
|
109
|
+
#### 1. It leaks lexical scope
|
|
110
|
+
|
|
111
|
+
A cut is meant to commit the parser to a *specific* rule context (e.g., "We saw the `if` keyword, so we are definitely parsing an if-statement").
|
|
112
|
+
|
|
113
|
+
If that if-statement parses successfully, the block enclosing it shouldn't care that a cut happened inside. If you leak that `cut=true` state backward out of the successful if-statement, the parent block parser will accidentally freeze its own unrelated choice paths.
|
|
114
|
+
|
|
115
|
+
#### 2. It violates the "Parsing is Function Composition" rule
|
|
116
|
+
|
|
117
|
+
In a pure PEG, every rule is a deterministic function with a clean contract: it either consumes input and returns a tree, or it fails and consumes nothing.
|
|
118
|
+
|
|
119
|
+
If a success path can return a hidden side-effect (`Cut=True`), rules are no longer composable. A rule's behavior would depend on whether a completely different rule executed a cut three steps ago, destroying the mathematical purity of the parser.
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
### The Elegance of the Failure-Only Pass
|
|
124
|
+
|
|
125
|
+
By limiting the backward pass of cut entirely to the failure path, the engine achieves a beautiful separation of concerns:
|
|
126
|
+
|
|
127
|
+
* **The Success Path:** Is purely **data-driven**. It moves the input pointer forward and passes back a Tree. It has zero knowledge of control flow optimizations.
|
|
128
|
+
* **The Failure Path:** Is purely **control-driven**. It passes back an error context. If that error context contains a `Cut`, the engine uses it as an immediate `break` statement for the current choice loop.
|
|
129
|
+
|
|
130
|
+
The fact that doing the right thing stripped the complexity out of your Rust and Go engines is the ultimate proof that the semantics match the math. The success path is freed from carrying control-flow baggage, leaving you with a fast, elegant tree-delivery system.
|
|
131
|
+
|
|
132
|
+
## Bottom Entries
|
|
133
|
+
|
|
134
|
+
You remembered accurately. Looking back at his 2010 paper (*"Packrat Parsers Can Handle Practical Grammars in Mostly Constant Space"*), Mizushima maps this out explicitly.
|
|
135
|
+
|
|
136
|
+
When a Cut evaluates, it clears out alternative paths on the backtracking stack. Mizushima demonstrates that if the backtracking stack becomes empty up to token index $n$, **every single memoization entry for any position less than $n$ can be completely discarded.**
|
|
137
|
+
|
|
138
|
+
The "Bottom entries" you are remembering are exactly what keeps this from breaking. Because his algorithm wipes the table linearly up to the current safe index $n$, anything *ahead* of the current structural pin (the lower-level lexical atoms and tokens that haven't been finalized yet) stays in the table.
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
### Mizushima's Cache-Flushing Rule
|
|
143
|
+
|
|
144
|
+
Here is how his structural mechanism scales down the matrix:
|
|
145
|
+
|
|
146
|
+
```
|
|
147
|
+
UNWIND PIN (n)
|
|
148
|
+
│
|
|
149
|
+
[Input Tokens]: a + b + a ;
|
|
150
|
+
0 1 2 3 4 5
|
|
151
|
+
┌─────────────────┐
|
|
152
|
+
Rule: Method │ ✂ ✂ ✂ ✂ │ ? │ <-- Wiped! (No backtracking past 'b')
|
|
153
|
+
Rule: Expr │ ✂ ✂ ✂ ✂ │ ? │ <-- Wiped!
|
|
154
|
+
├─────────────────┤
|
|
155
|
+
Rule: Token │ ✂ ✂ ✂ ✂ │ Succ │ <-- Kept if position >= n
|
|
156
|
+
Rule: Ident │ ✂ ✂ ✂ ✂ │ Succ │ <-- Kept if position >= n
|
|
157
|
+
└─────────────────┘
|
|
158
|
+
▲
|
|
159
|
+
Discards everything
|
|
160
|
+
behind position n
|
|
161
|
+
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
When your Cut stack tracks these thresholds, you get the exact "mostly constant space" matrix he proved for Java.
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
### Merging the Concepts in Go
|
|
169
|
+
|
|
170
|
+
By combining Mizushima's index-sliding cleanup rule with your two-pass map optimization, your Go pruning logic becomes incredibly deterministic.
|
|
171
|
+
|
|
172
|
+
If a Cut asserts that the parser will never backtrack before token index `cutpoint`, your first-pass counter filters elements using that exact sliding window:
|
|
173
|
+
|
|
174
|
+
```go
|
|
175
|
+
// Pass 1: Count survivors based on the Cut-sliding window
|
|
176
|
+
for k, v := range cache {
|
|
177
|
+
// Keep it if it's ahead of the cut point OR if it's a critical bottom token
|
|
178
|
+
if k.Mark >= cutpoint || isBottomEntry(v) {
|
|
179
|
+
n++
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Pass 2: Reallocate a perfectly packed map
|
|
184
|
+
newCache := make(map[CacheKey]any, n)
|
|
185
|
+
for k, v := range cache {
|
|
186
|
+
if k.Mark >= cutpoint || isBottomEntry(v) {
|
|
187
|
+
newCache[k] = v
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
cache = newCache
|
|
191
|
+
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
This structural architecture guarantees that your Go implementation doesn't just run with $O(n)$ time complexity, but also keeps its memory profile tightly bounded—exactly as Mizushima intended.
|
ogopego-0.1.7/LICENSE
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
6
|
+
|
|
7
|
+
1. Definitions.
|
|
8
|
+
|
|
9
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
|
10
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
|
11
|
+
|
|
12
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
|
13
|
+
the copyright owner that is granting the License.
|
|
14
|
+
|
|
15
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
|
16
|
+
other entities that control, are controlled by, or are under common
|
|
17
|
+
control with that entity. For the purposes of this definition,
|
|
18
|
+
"control" means (i) the power, direct or indirect, to cause the
|
|
19
|
+
direction or management of such entity, whether by contract or
|
|
20
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
21
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
22
|
+
|
|
23
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
|
24
|
+
exercising permissions granted by this License.
|
|
25
|
+
|
|
26
|
+
"Source" form shall mean the preferred form for making modifications,
|
|
27
|
+
including but not limited to software source code, documentation
|
|
28
|
+
source, and configuration files.
|
|
29
|
+
|
|
30
|
+
"Object" form shall mean any form resulting from mechanical
|
|
31
|
+
transformation or translation of a Source form, including but
|
|
32
|
+
not limited to compiled object code, generated documentation,
|
|
33
|
+
and conversions to other media types.
|
|
34
|
+
|
|
35
|
+
"Work" shall mean the work of authorship, whether in Source or
|
|
36
|
+
Object form, made available under the License, as indicated by a
|
|
37
|
+
copyright notice that is included in or attached to the work
|
|
38
|
+
(an example is provided in the Appendix below).
|
|
39
|
+
|
|
40
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
|
41
|
+
form, that is based on (or derived from) the Work and for which the
|
|
42
|
+
editorial revisions, annotations, elaborations, or other modifications
|
|
43
|
+
represent, as a whole, an original work of authorship. For the purposes
|
|
44
|
+
of this License, Derivative Works shall not include works that remain
|
|
45
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
|
46
|
+
the Work and Derivative Works thereof.
|
|
47
|
+
|
|
48
|
+
"Contribution" shall mean any work of authorship, including
|
|
49
|
+
the original version of the Work and any modifications or additions
|
|
50
|
+
to that Work or Derivative Works thereof, that is intentionally
|
|
51
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
52
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
|
53
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
|
54
|
+
means any form of electronic, verbal, or written communication sent
|
|
55
|
+
to the Licensor or its representatives, including but not limited to
|
|
56
|
+
communication on electronic mailing lists, source code control systems,
|
|
57
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
|
58
|
+
Licensor for the purpose of discussing and improving the Work, but
|
|
59
|
+
excluding communication that is conspicuously marked or otherwise
|
|
60
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
|
61
|
+
|
|
62
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
63
|
+
on behalf of whom a Contribution has been received by Licensor and
|
|
64
|
+
subsequently incorporated within the Work.
|
|
65
|
+
|
|
66
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
67
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
68
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
69
|
+
copyright license to reproduce, prepare Derivative Works of,
|
|
70
|
+
publicly display, publicly perform, sublicense, and distribute the
|
|
71
|
+
Work and such Derivative Works in Source or Object form.
|
|
72
|
+
|
|
73
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
|
74
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
75
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
76
|
+
(except as stated in this section) patent license to make, have made,
|
|
77
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
78
|
+
where such license applies only to those patent claims licensable
|
|
79
|
+
by such Contributor that are necessarily infringed by their
|
|
80
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
|
81
|
+
with the Work to which such Contribution(s) was submitted. If You
|
|
82
|
+
institute patent litigation against any entity (including a
|
|
83
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
84
|
+
or a Contribution incorporated within the Work constitutes direct
|
|
85
|
+
or contributory patent infringement, then any patent licenses
|
|
86
|
+
granted to You under this License for that Work shall terminate
|
|
87
|
+
as of the date such litigation is filed.
|
|
88
|
+
|
|
89
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
|
90
|
+
Work or Derivative Works thereof in any medium, with or without
|
|
91
|
+
modifications, and in Source or Object form, provided that You
|
|
92
|
+
meet the following conditions:
|
|
93
|
+
|
|
94
|
+
(a) You must give any other recipients of the Work or
|
|
95
|
+
Derivative Works a copy of this License; and
|
|
96
|
+
|
|
97
|
+
(b) You must cause any modified files to carry prominent notices
|
|
98
|
+
stating that You changed the files; and
|
|
99
|
+
|
|
100
|
+
(c) You must retain, in the Source form of any Derivative Works
|
|
101
|
+
that You distribute, all copyright, patent, trademark, and
|
|
102
|
+
attribution notices from the Source form of the Work,
|
|
103
|
+
excluding those notices that do not pertain to any part of
|
|
104
|
+
the Derivative Works; and
|
|
105
|
+
|
|
106
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
|
107
|
+
distribution, then any Derivative Works that You distribute must
|
|
108
|
+
include a readable copy of the attribution notices contained
|
|
109
|
+
within such NOTICE file, excluding those notices that do not
|
|
110
|
+
pertain to any part of the Derivative Works, in at least one
|
|
111
|
+
of the following places: within a NOTICE text file distributed
|
|
112
|
+
as part of the Derivative Works; within the Source form or
|
|
113
|
+
documentation, if provided along with the Derivative Works; or,
|
|
114
|
+
within a display generated by the Derivative Works, if and
|
|
115
|
+
wherever such third-party notices normally appear. The contents
|
|
116
|
+
of the NOTICE file are for informational purposes only and
|
|
117
|
+
do not modify the License. You may add Your own attribution
|
|
118
|
+
notices within Derivative Works that You distribute, alongside
|
|
119
|
+
or as an addendum to the NOTICE text from the Work, provided
|
|
120
|
+
that such additional attribution notices cannot be construed
|
|
121
|
+
as modifying the License.
|
|
122
|
+
|
|
123
|
+
You may add Your own copyright statement to Your modifications and
|
|
124
|
+
may provide additional or different license terms and conditions
|
|
125
|
+
for use, reproduction, or distribution of Your modifications, or
|
|
126
|
+
for any such Derivative Works as a whole, provided Your use,
|
|
127
|
+
reproduction, and distribution of the Work otherwise complies with
|
|
128
|
+
the conditions stated in this License.
|
|
129
|
+
|
|
130
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
131
|
+
any Contribution intentionally submitted for inclusion in the Work
|
|
132
|
+
by You to the Licensor shall be under the terms and conditions of
|
|
133
|
+
this License, without any additional terms or conditions.
|
|
134
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
|
135
|
+
the terms of any separate license agreement you may have executed
|
|
136
|
+
with Licensor regarding such Contributions.
|
|
137
|
+
|
|
138
|
+
6. Trademarks. This License does not grant permission to use the trade
|
|
139
|
+
names, trademarks, service marks, or product names of the Licensor,
|
|
140
|
+
except as required for reasonable and customary use in describing the
|
|
141
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
|
142
|
+
|
|
143
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
144
|
+
agreed to in writing, Licensor provides the Work (and each
|
|
145
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
146
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
147
|
+
implied, including, without limitation, any warranties or conditions
|
|
148
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
149
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
150
|
+
appropriateness of using or redistributing the Work and assume any
|
|
151
|
+
risks associated with Your exercise of permissions under this License.
|
|
152
|
+
|
|
153
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
|
154
|
+
whether in tort (including negligence), contract, or otherwise,
|
|
155
|
+
unless required by applicable law (such as deliberate and grossly
|
|
156
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
|
157
|
+
liable to You for damages, including any direct, indirect, special,
|
|
158
|
+
incidental, or consequential damages of any character arising as a
|
|
159
|
+
result of this License or out of the use or inability to use the
|
|
160
|
+
Work (including but not limited to damages for loss of goodwill,
|
|
161
|
+
work stoppage, computer failure or malfunction, or any and all
|
|
162
|
+
other commercial damages or losses), even if such Contributor
|
|
163
|
+
has been advised of the possibility of such damages.
|
|
164
|
+
|
|
165
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
|
166
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
|
167
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
|
168
|
+
or other liability obligations and/or rights consistent with this
|
|
169
|
+
License. However, in accepting such obligations, You may act only
|
|
170
|
+
on Your own behalf and on your sole responsibility, not on behalf
|
|
171
|
+
of any other Contributor, and only if You agree to indemnify,
|
|
172
|
+
defend, and hold each Contributor harmless for any liability
|
|
173
|
+
incurred by, or claims asserted against, such Contributor by reason
|
|
174
|
+
of your accepting any such warranty or additional liability.
|
|
175
|
+
|
|
176
|
+
END OF TERMS AND CONDITIONS
|
|
177
|
+
|
|
178
|
+
APPENDIX: How to apply the Apache License to your work.
|
|
179
|
+
|
|
180
|
+
To apply the Apache License to your work, attach the following
|
|
181
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
|
182
|
+
replaced with your own identifying information. (Don't include
|
|
183
|
+
the brackets!) The text should be enclosed in the appropriate
|
|
184
|
+
comment syntax for the file format. We also recommend that a
|
|
185
|
+
file or class name and description of purpose be included on the
|
|
186
|
+
same "printed page" as the copyright notice for easier
|
|
187
|
+
identification within third-party archives.
|
|
188
|
+
|
|
189
|
+
Copyright [yyyy] [name of copyright owner]
|
|
190
|
+
|
|
191
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
192
|
+
you may not use this file except in compliance with the License.
|
|
193
|
+
You may obtain a copy of the License at
|
|
194
|
+
|
|
195
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
196
|
+
|
|
197
|
+
Unless required by applicable law or agreed to in writing, software
|
|
198
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
199
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
200
|
+
See the License for the specific language governing permissions and
|
|
201
|
+
limitations under the License.
|
ogopego-0.1.7/PKG-INFO
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ogopego
|
|
3
|
+
Version: 0.1.7
|
|
4
|
+
Summary: A PEG Parser generator in Go
|
|
5
|
+
Author-email: Juancarlo Añez <apalala@gmail.com>
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
|
|
10
|
+
# ⻰OGoPEGo
|
|
11
|
+
|
|
12
|
+
A PEG parser generator in Go.
|
|
13
|
+
|
|
14
|
+
**⻰OGoPEGo** is the Go sibling of [竜TatSu] (Python) and [铁修TieXiu] (Rust).
|
|
15
|
+
It is functionally complete and passes the same test suite as its siblings.
|
|
16
|
+
|
|
17
|
+
[竜TatSu]: https://tatsu.readthedocs.io/
|
|
18
|
+
[铁修TieXiu]: https://github.com/neogeny/TieXiu
|
|
19
|
+
|
|
20
|
+
Refer to the [竜TatSu documentation] for grammar syntax, semantics, and usage. The local [SYNTAX.md](SYNTAX.md) describes the grammar format.
|
|
21
|
+
|
|
22
|
+
[竜TatSu documentation]: https://tatsu.readthedocs.io/
|
|
23
|
+
|
|
24
|
+
The CLI tool is a great way to explored the features offered by the library:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
$ ogo --help
|
|
28
|
+
Usage: ogo <command> [flags]
|
|
29
|
+
|
|
30
|
+
ogopego: A PEG parser generator in Go
|
|
31
|
+
|
|
32
|
+
Flags:
|
|
33
|
+
-h, --help Show context-sensitive help.
|
|
34
|
+
-o, --output=STRING Output to a file instead of stdout
|
|
35
|
+
-C, --color="auto" Control colorized output for API results
|
|
36
|
+
-t, --trace Display a detailed trace of the parsing process
|
|
37
|
+
-v, --version Print version information
|
|
38
|
+
|
|
39
|
+
Commands:
|
|
40
|
+
run <grammar> <inputs> ... [flags]
|
|
41
|
+
Execute a grammar against one or more input files
|
|
42
|
+
|
|
43
|
+
boot [flags]
|
|
44
|
+
The internal boot grammar
|
|
45
|
+
|
|
46
|
+
grammar <grammar> [flags]
|
|
47
|
+
Grammar transformations
|
|
48
|
+
|
|
49
|
+
Run "ogo <command> --help" for more information on a command.
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Installation
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
go install github.com/neogeny/ogopego/cmd/ogo@latest
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Library API
|
|
59
|
+
|
|
60
|
+
```go
|
|
61
|
+
import "github.com/neogeny/ogopego/api"
|
|
62
|
+
|
|
63
|
+
// Compile a grammar string into a Grammar object.
|
|
64
|
+
g, err := api.Compile(grammar, cfg)
|
|
65
|
+
|
|
66
|
+
// Parse input with a compiled Grammar.
|
|
67
|
+
tree, err := api.ParseInput(g, input, cfg)
|
|
68
|
+
|
|
69
|
+
// Compile and parse in one step.
|
|
70
|
+
tree, err := api.ParseGrammar(grammar, cfg)
|
|
71
|
+
|
|
72
|
+
// Compile to JSON-compatible output.
|
|
73
|
+
json, err := api.CompileToJSON(grammar, cfg)
|
|
74
|
+
|
|
75
|
+
// Parse input to JSON-compatible output.
|
|
76
|
+
json, err := api.ParseInputToJSON(g, input, cfg)
|
|
77
|
+
|
|
78
|
+
// JSON roundtrip via peg package.
|
|
79
|
+
jsonStr := peg.SerializeGrammar(g)
|
|
80
|
+
g2, err := peg.ParseGrammar([]byte(jsonStr))
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Grammar object
|
|
84
|
+
|
|
85
|
+
```go
|
|
86
|
+
import "github.com/neogeny/ogopego/peg"
|
|
87
|
+
|
|
88
|
+
// A compiled grammar. Create one with api.Compile.
|
|
89
|
+
type Grammar struct {
|
|
90
|
+
Name string // grammar name
|
|
91
|
+
Directives *asjson.OrderedMap // @@directives
|
|
92
|
+
Keywords []string // @@keyword declarations
|
|
93
|
+
Rules []*Rule // grammar rules
|
|
94
|
+
Analyzed bool // true after Initialize()
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Parse input text with this grammar (use api.ParseInput).
|
|
98
|
+
result, err := api.ParseInput(g, text, cfg)
|
|
99
|
+
|
|
100
|
+
// Prepare grammar for parsing (link rules, detect left recursion).
|
|
101
|
+
err := g.Initialize()
|
|
102
|
+
|
|
103
|
+
// Serialize.
|
|
104
|
+
jsonStr := g.AsJSONStr() // indented JSON
|
|
105
|
+
jsonStr := peg.SerializeGrammar(g) // clean JSON (recommended)
|
|
106
|
+
data, err := peg.ParseGrammar([]byte(jsonStr)) // deserialize
|
|
107
|
+
|
|
108
|
+
// Display.
|
|
109
|
+
fmt.Println(g.PrettyPrint()) // EBNF pretty-print
|
|
110
|
+
fmt.Println(g.Railroads()) // railroad diagram
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Features
|
|
114
|
+
|
|
115
|
+
* [x] Generation of source code with an object model for deifinitions in the grammar is complete. The model generator defines the types specified in the input grammar and creates the transformation from the `Tree` result of a call to `Parse()` to the object model.
|
|
116
|
+
* [x] Code generation of a parser recently moved in **竜TatSu** to the loading of a model of the Grammar and using it as parser. **⻰OGoPEGo** is cabable of generating a model of the `Grammar` constructor for a grammar that can be compiled by Go for blazing boot times.
|
|
117
|
+
* **⻰OGoPEGo** also knows how to load _fast_ a `Grammar` model from **竜TatSu**-format JSON.
|
|
118
|
+
* [x] Semantic actions (transformations) during parse are implemented through a `SemanticsFunc(Tree) Tree` configuration entry. Transformations are limited to `Tree->Tree`, so a walker must be used as post-processor if a different AST type is desidred. The AST model generation available through the CLI tool generates such a walker.
|
|
119
|
+
* [ ] Interpolation and evaluation of _\`constant\`_ expressions hasn't had any known use cases with **竜TatSu**. They will not be implemented in **⻰OGoPEGo** until a use case appears.
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
## License
|
|
123
|
+
|
|
124
|
+
Licensed under the Apache License, Version 2.0 ([LICENSE](LICENSE) or http://www.apache.org/licenses/LICENSE-2.0).
|
|
125
|
+
|
|
126
|
+
### Contribution
|
|
127
|
+
|
|
128
|
+
Unless explicitly stated otherwise, any contribution intentionally submitted
|
|
129
|
+
for inclusion in the work, as defined in the Apache-2.0 license, shall be
|
|
130
|
+
licensed as above, without any additional terms or conditions.
|