pygbnf 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygbnf-0.2.0/.gitignore +103 -0
- pygbnf-0.2.0/CHANGELOG.md +34 -0
- pygbnf-0.2.0/JSON +0 -0
- pygbnf-0.2.0/LICENSE +21 -0
- pygbnf-0.2.0/PKG-INFO +418 -0
- pygbnf-0.2.0/README.md +394 -0
- pygbnf-0.2.0/examples/arithmetic.py +61 -0
- pygbnf-0.2.0/examples/csv_grammar.py +66 -0
- pygbnf-0.2.0/examples/demo_enum_select.py +74 -0
- pygbnf-0.2.0/examples/demo_schema.py +149 -0
- pygbnf-0.2.0/examples/demo_simple_lang.py +240 -0
- pygbnf-0.2.0/examples/demo_vision.py +171 -0
- pygbnf-0.2.0/examples/img/test.png +0 -0
- pygbnf-0.2.0/examples/json_grammar.py +123 -0
- pygbnf-0.2.0/examples/quickstart.py +32 -0
- pygbnf-0.2.0/examples/simple_lang.py +198 -0
- pygbnf-0.2.0/examples/token_demo.py +25 -0
- pygbnf-0.2.0/pygbnf/__init__.py +165 -0
- pygbnf-0.2.0/pygbnf/combinators.py +91 -0
- pygbnf-0.2.0/pygbnf/gbnf_codegen.py +283 -0
- pygbnf-0.2.0/pygbnf/grammar.py +468 -0
- pygbnf-0.2.0/pygbnf/helpers.py +184 -0
- pygbnf-0.2.0/pygbnf/llm.py +383 -0
- pygbnf-0.2.0/pygbnf/matcher.py +433 -0
- pygbnf-0.2.0/pygbnf/nodes.py +143 -0
- pygbnf-0.2.0/pygbnf/optimizations.py +196 -0
- pygbnf-0.2.0/pygbnf/py.typed +1 -0
- pygbnf-0.2.0/pygbnf/schema.py +560 -0
- pygbnf-0.2.0/pygbnf/tokens.py +59 -0
- pygbnf-0.2.0/pygbnf/toolkit.py +199 -0
- pygbnf-0.2.0/pyproject.toml +40 -0
- pygbnf-0.2.0/tests/__init__.py +0 -0
- pygbnf-0.2.0/tests/test_full.py +1136 -0
- pygbnf-0.2.0/tests/test_matcher.py +221 -0
- pygbnf-0.2.0/tests/test_pygbnf.py +330 -0
pygbnf-0.2.0/.gitignore
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
pycache/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# macOS
|
|
10
|
+
.DS_Store
|
|
11
|
+
|
|
12
|
+
# Distribution / packaging
|
|
13
|
+
.Python
|
|
14
|
+
build/
|
|
15
|
+
develop-eggs/
|
|
16
|
+
dist/
|
|
17
|
+
downloads/
|
|
18
|
+
eggs/
|
|
19
|
+
.eggs/
|
|
20
|
+
lib/
|
|
21
|
+
lib64/
|
|
22
|
+
parts/
|
|
23
|
+
sdist/
|
|
24
|
+
var/
|
|
25
|
+
wheels/
|
|
26
|
+
share/python-wheels/
|
|
27
|
+
*.egg-info/
|
|
28
|
+
.installed.cfg
|
|
29
|
+
*.egg
|
|
30
|
+
MANIFEST
|
|
31
|
+
|
|
32
|
+
# PyInstaller
|
|
33
|
+
*.manifest
|
|
34
|
+
.spec
|
|
35
|
+
|
|
36
|
+
# Installer logs
|
|
37
|
+
pip-log.txt
|
|
38
|
+
pip-delete-this-directory.txt
|
|
39
|
+
|
|
40
|
+
# Unit test / coverage
|
|
41
|
+
htmlcov/
|
|
42
|
+
.tox/
|
|
43
|
+
.nox/
|
|
44
|
+
.coverage
|
|
45
|
+
.coverage.
|
|
46
|
+
.cache
|
|
47
|
+
nosetests.xml
|
|
48
|
+
coverage.xml
|
|
49
|
+
*.cover
|
|
50
|
+
.py,cover
|
|
51
|
+
.hypothesis/
|
|
52
|
+
.pytest_cache/
|
|
53
|
+
|
|
54
|
+
# Type checkers
|
|
55
|
+
.mypy_cache/
|
|
56
|
+
.pyre/
|
|
57
|
+
.pytype/
|
|
58
|
+
|
|
59
|
+
# Jupyter Notebook
|
|
60
|
+
.ipynb_checkpoints
|
|
61
|
+
|
|
62
|
+
# Environments
|
|
63
|
+
.env
|
|
64
|
+
.env.
|
|
65
|
+
.venv
|
|
66
|
+
venv/
|
|
67
|
+
env/
|
|
68
|
+
ENV/
|
|
69
|
+
env.bak/
|
|
70
|
+
venv.bak/
|
|
71
|
+
|
|
72
|
+
# IDEs / editors
|
|
73
|
+
.vscode/
|
|
74
|
+
.idea/
|
|
75
|
+
*.swp
|
|
76
|
+
*.swo
|
|
77
|
+
*~
|
|
78
|
+
|
|
79
|
+
# Documentation
|
|
80
|
+
docs/_build/
|
|
81
|
+
|
|
82
|
+
# mkdocs
|
|
83
|
+
/site
|
|
84
|
+
|
|
85
|
+
# PyCharm
|
|
86
|
+
.idea/
|
|
87
|
+
|
|
88
|
+
# Ruff / linters
|
|
89
|
+
.ruff_cache/
|
|
90
|
+
|
|
91
|
+
# Poetry
|
|
92
|
+
poetry.lock
|
|
93
|
+
|
|
94
|
+
# PDM
|
|
95
|
+
.pdm.toml
|
|
96
|
+
.pdm-python
|
|
97
|
+
.pdm-build/
|
|
98
|
+
|
|
99
|
+
# UV
|
|
100
|
+
.uv/
|
|
101
|
+
|
|
102
|
+
# Local config
|
|
103
|
+
*.local
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
## [0.2.0] — 2026-03-06
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- `schema` module — auto-generate grammars from Python types, dataclasses, and function signatures
|
|
9
|
+
- `grammar_from_type()`, `grammar_from_function()`, `grammar_from_args()`
|
|
10
|
+
- `Grammar.from_type()`, `Grammar.from_function_return()`, `Grammar.from_function_args()` for composition
|
|
11
|
+
- Optional dataclass fields (fields with `default` / `default_factory`) are properly omissible
|
|
12
|
+
- `WS()`, `ws_required()` whitespace helpers
|
|
13
|
+
- `float_number()` helper
|
|
14
|
+
- `spaced_comma_list()` helper
|
|
15
|
+
- Bounded `repeat(item, min, max)` combinator
|
|
16
|
+
- `token()`, `token_id()`, `not_token()`, `not_token_id()` for llama.cpp token-level constraints
|
|
17
|
+
- Left-recursion detection with `Grammar.detect_left_recursion()`
|
|
18
|
+
- Dependency graph with `Grammar.dependency_graph()`
|
|
19
|
+
- Pretty-print with `Grammar.pretty_print()`
|
|
20
|
+
|
|
21
|
+
### Fixed
|
|
22
|
+
- Character class dash rendering (`[+-]` instead of `[+\-]`)
|
|
23
|
+
- Multi-line alternative formatting (trailing `|`)
|
|
24
|
+
- Root rule duplication when start name equals `"root"`
|
|
25
|
+
|
|
26
|
+
## [0.1.0] — 2026-03-01
|
|
27
|
+
|
|
28
|
+
### Added
|
|
29
|
+
- Initial release
|
|
30
|
+
- Core AST nodes: `Literal`, `CharacterClass`, `Sequence`, `Alternative`, `Repeat`, `RuleReference`, `TokenReference`, `Group`, `Optional_`
|
|
31
|
+
- DSL combinators: `select()`, `one_or_more()`, `zero_or_more()`, `optional()`, `group()`
|
|
32
|
+
- Grammar container with `@g.rule` decorator and named rules
|
|
33
|
+
- GBNF code generation with 5 optimization passes
|
|
34
|
+
- Prebuilt helpers: `identifier()`, `number()`, `string_literal()`, `comma_list()`, `between()`, `separated_by()`, `keyword()`
|
pygbnf-0.2.0/JSON
ADDED
|
File without changes
|
pygbnf-0.2.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 al
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
pygbnf-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pygbnf
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: A composable Python DSL for building GBNF grammars compatible with llama.cpp
|
|
5
|
+
Project-URL: Homepage, https://github.com/al/pygbnf
|
|
6
|
+
Project-URL: Repository, https://github.com/al/pygbnf
|
|
7
|
+
Project-URL: Issues, https://github.com/al/pygbnf/issues
|
|
8
|
+
Author: alban perli
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: constrained-generation,dsl,gbnf,grammar,llama-cpp,llm
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Topic :: Software Development :: Code Generators
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Python: >=3.8
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# pygbnf
|
|
26
|
+
|
|
27
|
+
A composable Python DSL for building **GBNF grammars** compatible with [llama.cpp](https://github.com/ggml-org/llama.cpp).
|
|
28
|
+
|
|
29
|
+
1) Define [context-free grammars](https://en.wikipedia.org/wiki/Context-free_grammar) using expressive Python functions,
|
|
30
|
+
|
|
31
|
+
2) Compile them into valid `G`[BNF](https://en.wikipedia.org/wiki/Backus–Naur_form) strings for constrained LLM generation.
|
|
32
|
+
|
|
33
|
+
3) Real-time rule matching during inference.
|
|
34
|
+
|
|
35
|
+
## Quick Start
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from pygbnf import Grammar, GrammarLLM, select
|
|
39
|
+
|
|
40
|
+
g = Grammar()
|
|
41
|
+
|
|
42
|
+
@g.rule
|
|
43
|
+
def answer():
|
|
44
|
+
return select(["yes", "no", "maybe"])
|
|
45
|
+
|
|
46
|
+
g.start("answer")
|
|
47
|
+
|
|
48
|
+
llm = GrammarLLM("http://localhost:8080/v1")
|
|
49
|
+
|
|
50
|
+
for token, _ in llm.stream(
|
|
51
|
+
messages=[{"role": "user", "content": "Is the sky blue?"}],
|
|
52
|
+
grammar=g,
|
|
53
|
+
):
|
|
54
|
+
print(token, end="", flush=True)
|
|
55
|
+
print()
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
The grammar constrains the LLM output — it can only produce `yes`, `no`, or `maybe`.
|
|
59
|
+
|
|
60
|
+
## Guidance-Style GBNF
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
import pygbnf as cfg
|
|
64
|
+
from pygbnf import select, one_or_more, zero_or_more
|
|
65
|
+
|
|
66
|
+
g = cfg.Grammar()
|
|
67
|
+
|
|
68
|
+
@g.rule
|
|
69
|
+
def number():
|
|
70
|
+
n = one_or_more(select("0123456789"))
|
|
71
|
+
return select(['-' + n, n])
|
|
72
|
+
|
|
73
|
+
@g.rule
|
|
74
|
+
def operator():
|
|
75
|
+
return select(['+', '*', '**', '/', '-'])
|
|
76
|
+
|
|
77
|
+
@g.rule
|
|
78
|
+
def expression():
|
|
79
|
+
return select([
|
|
80
|
+
number(),
|
|
81
|
+
expression() + zero_or_more(" ") + operator()
|
|
82
|
+
+ zero_or_more(" ") + expression(),
|
|
83
|
+
"(" + expression() + ")"
|
|
84
|
+
])
|
|
85
|
+
|
|
86
|
+
g.start("expression")
|
|
87
|
+
print(g.to_gbnf())
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Output:
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
root ::= expression
|
|
94
|
+
|
|
95
|
+
number ::= "-" [0123456789]+ | [0123456789]+
|
|
96
|
+
operator ::= "+" | "*" | "**" | "/" | "-"
|
|
97
|
+
expression ::=
|
|
98
|
+
number
|
|
99
|
+
| expression " "* operator " "* expression
|
|
100
|
+
| "(" expression ")"
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Installation
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
pip install pygbnf
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Or install from source:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
git clone https://github.com/al/pygbnf.git
|
|
113
|
+
cd pygbnf
|
|
114
|
+
pip install -e .
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## LLM Usage
|
|
118
|
+
|
|
119
|
+
pygbnf includes `GrammarLLM`, a thin wrapper around any OpenAI-compatible endpoint (llama.cpp, vLLM, Ollama…) that injects the GBNF grammar automatically.
|
|
120
|
+
|
|
121
|
+
### Streaming with rule matching
|
|
122
|
+
|
|
123
|
+
Enable `match=True` (or pass `only`/`exclude`) to get real-time `RuleEvent`s as the LLM generates tokens:
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from pygbnf import Grammar, GrammarLLM, select, one_or_more
|
|
127
|
+
|
|
128
|
+
g = Grammar()
|
|
129
|
+
|
|
130
|
+
@g.rule
|
|
131
|
+
def name():
|
|
132
|
+
"""A person's name."""
|
|
133
|
+
return one_or_more(select("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ "))
|
|
134
|
+
|
|
135
|
+
@g.rule
|
|
136
|
+
def greeting():
|
|
137
|
+
"""A greeting message."""
|
|
138
|
+
return select(["hello", "hi", "hey"]) + " " + name()
|
|
139
|
+
|
|
140
|
+
g.start("greeting")
|
|
141
|
+
|
|
142
|
+
llm = GrammarLLM("http://localhost:8080/v1")
|
|
143
|
+
|
|
144
|
+
for token, events in llm.stream(
|
|
145
|
+
messages=[{"role": "user", "content": "Greet Alice."}],
|
|
146
|
+
grammar=g,
|
|
147
|
+
match=True,
|
|
148
|
+
):
|
|
149
|
+
print(token, end="", flush=True)
|
|
150
|
+
if events:
|
|
151
|
+
for ev in events:
|
|
152
|
+
print(f"\n ← [{ev.rule}] {ev.text!r} (doc: {ev.doc})")
|
|
153
|
+
print()
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Each `RuleEvent` carries:
|
|
157
|
+
- `rule` — the matched rule name
|
|
158
|
+
- `text` — the matched text
|
|
159
|
+
- `fn` — the original Python function
|
|
160
|
+
- `doc` — the function's docstring
|
|
161
|
+
|
|
162
|
+
### Non-streaming completion
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
text, events = llm.complete(
|
|
166
|
+
messages=[{"role": "user", "content": "Is the sky blue?"}],
|
|
167
|
+
grammar=g,
|
|
168
|
+
match=True,
|
|
169
|
+
)
|
|
170
|
+
print(text)
|
|
171
|
+
for ev in events:
|
|
172
|
+
print(f" [{ev.rule}] {ev.text!r}")
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Schema-based grammar with LLM
|
|
176
|
+
|
|
177
|
+
Combine `grammar_from_type` with `GrammarLLM` to constrain output to a JSON schema:
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
from dataclasses import dataclass
|
|
181
|
+
from pygbnf import grammar_from_type, GrammarLLM
|
|
182
|
+
|
|
183
|
+
@dataclass
|
|
184
|
+
class City:
|
|
185
|
+
name: str
|
|
186
|
+
country: str
|
|
187
|
+
population: int
|
|
188
|
+
|
|
189
|
+
g = grammar_from_type(City)
|
|
190
|
+
llm = GrammarLLM("http://localhost:8080/v1")
|
|
191
|
+
|
|
192
|
+
text, _ = llm.complete(
|
|
193
|
+
messages=[{"role": "user", "content": "Describe Tokyo in JSON."}],
|
|
194
|
+
grammar=g,
|
|
195
|
+
)
|
|
196
|
+
print(text)
|
|
197
|
+
# → {"name": "Tokyo", "country": "Japan", "population": 13960000}
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Tool calling with Toolkit
|
|
201
|
+
|
|
202
|
+
`Toolkit` is a decorator-based tool registry. Register functions with `@toolkit.tool`, then pass the toolkit to `llm.stream()` or `llm.complete()` — the grammar and system prompt are injected automatically.
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
import enum
|
|
206
|
+
from pygbnf import GrammarLLM, Toolkit
|
|
207
|
+
|
|
208
|
+
toolkit = Toolkit()
|
|
209
|
+
|
|
210
|
+
class Units(enum.Enum):
|
|
211
|
+
CELSIUS = "celsius"
|
|
212
|
+
FAHRENHEIT = "fahrenheit"
|
|
213
|
+
|
|
214
|
+
@toolkit.tool
|
|
215
|
+
def get_weather(city: str, units: Units = Units.CELSIUS) -> str:
|
|
216
|
+
"""Get current weather for a city."""
|
|
217
|
+
return f"22° {units.value} in {city}"
|
|
218
|
+
|
|
219
|
+
@toolkit.tool
|
|
220
|
+
def search_web(query: str, max_results: int = 5) -> str:
|
|
221
|
+
"""Search the web."""
|
|
222
|
+
return f"Found {max_results} results for {query!r}"
|
|
223
|
+
|
|
224
|
+
llm = GrammarLLM("http://localhost:8080/v1")
|
|
225
|
+
|
|
226
|
+
# Stream with toolkit — grammar + system prompt auto-injected
|
|
227
|
+
result = ""
|
|
228
|
+
for token, _ in llm.stream(
|
|
229
|
+
messages=[{"role": "user", "content": "Weather in Tokyo?"}],
|
|
230
|
+
toolkit=toolkit,
|
|
231
|
+
):
|
|
232
|
+
print(token, end="", flush=True)
|
|
233
|
+
result += token
|
|
234
|
+
|
|
235
|
+
# Dispatch the JSON result to the matching function
|
|
236
|
+
output = toolkit.dispatch(result)
|
|
237
|
+
print(output) # → "22° celsius in Tokyo"
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
The toolkit:
|
|
241
|
+
- **Builds a GBNF grammar** constraining the LLM to produce `{"function": "...", "arguments": {...}}` with only registered tool names and typed arguments
|
|
242
|
+
- **Generates a system prompt** listing available tools with signatures and docstrings
|
|
243
|
+
- **Dispatches** the parsed JSON to the right function, converting enum strings back to Python `Enum` instances automatically
|
|
244
|
+
|
|
245
|
+
You can also use `llm.tool_call()` as a one-liner that streams + dispatches:
|
|
246
|
+
|
|
247
|
+
```python
|
|
248
|
+
output = llm.tool_call(toolkit, "Weather in Tokyo?")
|
|
249
|
+
print(output) # → "22° celsius in Tokyo"
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
> **Note:** `GrammarLLM` requires the `openai` package: `pip install openai`.
|
|
253
|
+
> The LLM server must support the `grammar` field in its API (llama.cpp does natively).
|
|
254
|
+
|
|
255
|
+
## Architecture
|
|
256
|
+
|
|
257
|
+
### AST Nodes
|
|
258
|
+
|
|
259
|
+
Every grammar construct is a frozen dataclass node. Nodes compose via `+` (sequence) and `|` (alternative):
|
|
260
|
+
|
|
261
|
+
| Node | Description | GBNF |
|
|
262
|
+
|------|------------|------|
|
|
263
|
+
| `Literal` | Double-quoted string | `"hello"` |
|
|
264
|
+
| `CharacterClass` | Character class | `[0-9]` |
|
|
265
|
+
| `Sequence` | Ordered concatenation | `a b c` |
|
|
266
|
+
| `Alternative` | Choice between options | `a \| b \| c` |
|
|
267
|
+
| `Repeat` | Quantified repetition | `x+`, `x*`, `x?`, `x{2,5}` |
|
|
268
|
+
| `RuleReference` | Reference to named rule | `expression` |
|
|
269
|
+
| `TokenReference` | Token-level constraint | `<think>`, `<[1000]>` |
|
|
270
|
+
| `Group` | Parenthesised group | `(a b)` |
|
|
271
|
+
| `Optional_` | Optional element | `x?` |
|
|
272
|
+
|
|
273
|
+
### DSL Combinators
|
|
274
|
+
|
|
275
|
+
```python
|
|
276
|
+
from pygbnf import select, one_or_more, zero_or_more, optional, repeat, group
|
|
277
|
+
|
|
278
|
+
# Character class from string
|
|
279
|
+
select("0123456789") # → [0123456789]
|
|
280
|
+
|
|
281
|
+
# Alternative from list
|
|
282
|
+
select(["+", "-", "*"]) # → "+" | "-" | "*"
|
|
283
|
+
|
|
284
|
+
# Repetition
|
|
285
|
+
one_or_more(x) # → x+
|
|
286
|
+
zero_or_more(x) # → x*
|
|
287
|
+
optional(x) # → x?
|
|
288
|
+
repeat(x, 2, 5) # → x{2,5}
|
|
289
|
+
|
|
290
|
+
# Grouping
|
|
291
|
+
group(a + b) # → (a b)
|
|
292
|
+
|
|
293
|
+
# Operators
|
|
294
|
+
a + b # → a b (sequence)
|
|
295
|
+
a | b # → a | b (alternative)
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
### Rule Definition
|
|
299
|
+
|
|
300
|
+
Rules are defined with the `@g.rule` decorator. Calling a rule function inside another rule creates a **rule reference** (not an inline expansion):
|
|
301
|
+
|
|
302
|
+
```python
|
|
303
|
+
g = cfg.Grammar()
|
|
304
|
+
|
|
305
|
+
@g.rule
|
|
306
|
+
def digit():
|
|
307
|
+
return select("0123456789")
|
|
308
|
+
|
|
309
|
+
@g.rule
|
|
310
|
+
def number():
|
|
311
|
+
return one_or_more(digit()) # → digit+ (reference, not inlined)
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
Forward references work naturally — rules can reference rules defined later.
|
|
315
|
+
|
|
316
|
+
### Token Constraints
|
|
317
|
+
|
|
318
|
+
llama.cpp supports token-level matching:
|
|
319
|
+
|
|
320
|
+
```python
|
|
321
|
+
from pygbnf import token, token_id, not_token, not_token_id
|
|
322
|
+
|
|
323
|
+
token("think") # → <think>
|
|
324
|
+
token_id(1000) # → <[1000]>
|
|
325
|
+
not_token("think") # → !<think>
|
|
326
|
+
not_token_id(1001) # → !<[1001]>
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
### Grammar Helpers
|
|
330
|
+
|
|
331
|
+
Common patterns prebuilt:
|
|
332
|
+
|
|
333
|
+
```python
|
|
334
|
+
from pygbnf import (
|
|
335
|
+
WS, ws, ws_required, # whitespace
|
|
336
|
+
keyword, identifier, number, # basic tokens
|
|
337
|
+
float_number, string_literal, # complex tokens
|
|
338
|
+
comma_list, between, # structural patterns
|
|
339
|
+
separated_by, spaced_comma_list,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
comma_list(identifier()) # → ident ("," " "* ident)*
|
|
343
|
+
between("(", expr, ")") # → "(" expr ")"
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
## Recursion Analysis
|
|
347
|
+
|
|
348
|
+
Detect left recursion in your grammar:
|
|
349
|
+
|
|
350
|
+
```python
|
|
351
|
+
cycles = g.detect_left_recursion()
|
|
352
|
+
# Warns: "Left recursion detected: expression -> expression"
|
|
353
|
+
# Suggests: rewrite as base (op base)*
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
## Examples
|
|
357
|
+
|
|
358
|
+
See the `examples/` directory:
|
|
359
|
+
|
|
360
|
+
| File | Description |
|
|
361
|
+
|------|------------|
|
|
362
|
+
| `quickstart.py` | The quick-start example from this README |
|
|
363
|
+
| `arithmetic.py` | Arithmetic expressions with operator precedence |
|
|
364
|
+
| `csv_grammar.py` | CSV file format |
|
|
365
|
+
| `json_grammar.py` | Full JSON grammar |
|
|
366
|
+
| `simple_lang.py` | A small programming language |
|
|
367
|
+
| `token_demo.py` | Token-level constraints |
|
|
368
|
+
| `demo_llm.py` | LLM constrained generation (requires llama-server) |
|
|
369
|
+
| `demo_schema.py` | Schema → grammar examples |
|
|
370
|
+
| `demo_hybrid.py` | DSL + Python types mixed |
|
|
371
|
+
| `demo_simple_lang.py` | Mini-language generation with LLM |
|
|
372
|
+
| `demo_vision.py` | Vision + grammar: solve math from an image |
|
|
373
|
+
|
|
374
|
+
Run any example:
|
|
375
|
+
|
|
376
|
+
```bash
|
|
377
|
+
python examples/arithmetic.py
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
## Schema Generation
|
|
381
|
+
|
|
382
|
+
Auto-generate grammars from Python types and dataclasses:
|
|
383
|
+
|
|
384
|
+
```python
|
|
385
|
+
from dataclasses import dataclass
|
|
386
|
+
from pygbnf import grammar_from_type
|
|
387
|
+
|
|
388
|
+
@dataclass
|
|
389
|
+
class Movie:
|
|
390
|
+
title: str
|
|
391
|
+
year: int
|
|
392
|
+
rating: float
|
|
393
|
+
|
|
394
|
+
g = grammar_from_type(Movie)
|
|
395
|
+
print(g.to_gbnf())
|
|
396
|
+
```
|
|
397
|
+
|
|
398
|
+
Also supports function signatures:
|
|
399
|
+
|
|
400
|
+
```python
|
|
401
|
+
from pygbnf import grammar_from_args
|
|
402
|
+
|
|
403
|
+
def search(query: str, limit: int = 10):
|
|
404
|
+
...
|
|
405
|
+
|
|
406
|
+
g = grammar_from_args(search)
|
|
407
|
+
print(g.to_gbnf())
|
|
408
|
+
```
|
|
409
|
+
|
|
410
|
+
## Requirements
|
|
411
|
+
|
|
412
|
+
- Python 3.8+
|
|
413
|
+
- No external dependencies
|
|
414
|
+
|
|
415
|
+
## Acknowledgements
|
|
416
|
+
|
|
417
|
+
- [guidance-ai](https://github.com/guidance-ai/guidance) — pygbnf's composable API is inspired by their approach to constrained generation
|
|
418
|
+
- [llama.cpp](https://github.com/ggml-org/llama.cpp) — for the GBNF format and the underlying inference engine
|