gcf-python 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gcf_python-0.1.0/.github/workflows/ci.yml +21 -0
- gcf_python-0.1.0/.github/workflows/publish.yml +24 -0
- gcf_python-0.1.0/.gitignore +10 -0
- gcf_python-0.1.0/LICENSE +21 -0
- gcf_python-0.1.0/PKG-INFO +197 -0
- gcf_python-0.1.0/README.md +172 -0
- gcf_python-0.1.0/pyproject.toml +42 -0
- gcf_python-0.1.0/src/gcf/__init__.py +60 -0
- gcf_python-0.1.0/src/gcf/cli.py +155 -0
- gcf_python-0.1.0/src/gcf/constants.py +24 -0
- gcf_python-0.1.0/src/gcf/decode.py +181 -0
- gcf_python-0.1.0/src/gcf/delta.py +54 -0
- gcf_python-0.1.0/src/gcf/encode.py +86 -0
- gcf_python-0.1.0/src/gcf/session.py +137 -0
- gcf_python-0.1.0/src/gcf/types.py +63 -0
- gcf_python-0.1.0/tests/__init__.py +0 -0
- gcf_python-0.1.0/tests/test_decode.py +191 -0
- gcf_python-0.1.0/tests/test_delta.py +149 -0
- gcf_python-0.1.0/tests/test_encode.py +187 -0
- gcf_python-0.1.0/tests/test_roundtrip.py +135 -0
- gcf_python-0.1.0/tests/test_session.py +142 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
- uses: actions/setup-python@v5
|
|
18
|
+
with:
|
|
19
|
+
python-version: ${{ matrix.python-version }}
|
|
20
|
+
- run: pip install -e ".[dev]" || pip install -e . && pip install pytest
|
|
21
|
+
- run: pytest tests/ -v
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
name: Publish
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags: ['v*']
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
publish:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
permissions:
|
|
11
|
+
contents: read
|
|
12
|
+
id-token: write
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
- uses: actions/setup-python@v5
|
|
16
|
+
with:
|
|
17
|
+
python-version: '3.12'
|
|
18
|
+
- run: pip install build twine
|
|
19
|
+
- run: pip install -e . && pip install pytest && pytest tests/ -v
|
|
20
|
+
- run: python -m build
|
|
21
|
+
- run: twine upload --verbose dist/*
|
|
22
|
+
env:
|
|
23
|
+
TWINE_USERNAME: __token__
|
|
24
|
+
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
|
gcf_python-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Blackwell Systems
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gcf-python
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python implementation of GCF (Graph Compact Format): token-optimized wire format for LLM tool responses
|
|
5
|
+
Project-URL: Homepage, https://github.com/blackwell-systems/gcf-python
|
|
6
|
+
Project-URL: Documentation, https://blackwell-systems.github.io/gcf/
|
|
7
|
+
Project-URL: Specification, https://github.com/blackwell-systems/gcf
|
|
8
|
+
Author: Blackwell Systems
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: gcf,graph,llm,mcp,token-efficient,wire-format
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
22
|
+
Classifier: Typing :: Typed
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
|
|
26
|
+
<p align="center">
|
|
27
|
+
<a href="https://github.com/blackwell-systems"><img src="https://raw.githubusercontent.com/blackwell-systems/blackwell-docs-theme/main/badge-trademark.svg" alt="Blackwell Systems"></a>
|
|
28
|
+
<a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue.svg" alt="License"></a>
|
|
29
|
+
</p>
|
|
30
|
+
|
|
31
|
+
# gcf-python
|
|
32
|
+
|
|
33
|
+
Python implementation of [GCF (Graph Compact Format)](https://github.com/blackwell-systems/gcf).
|
|
34
|
+
|
|
35
|
+
**84% fewer tokens than JSON. 32% fewer than TOON. 100% LLM comprehension accuracy at 500 symbols, where JSON fails.**
|
|
36
|
+
|
|
37
|
+
## Install
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
pip install gcf-py
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Zero dependencies. Pure Python. Python 3.9+. Includes CLI.
|
|
44
|
+
|
|
45
|
+
## CLI
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
gcf encode < payload.json # JSON to GCF
|
|
49
|
+
gcf decode < payload.gcf # GCF to JSON
|
|
50
|
+
gcf stats < payload.json # token comparison with visual bar
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
```
|
|
54
|
+
Payload: 50 symbols, 20 edges
|
|
55
|
+
|
|
56
|
+
JSON ██████████████████████████████ 4,200 tokens
|
|
57
|
+
GCF ████████░░░░░░░░░░░░░░░░░░░░░░ 1,150 tokens
|
|
58
|
+
|
|
59
|
+
Savings: 73% fewer tokens with GCF
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Library
|
|
63
|
+
|
|
64
|
+
### Quick Start
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from gcf import encode, Payload, Symbol, Edge
|
|
68
|
+
|
|
69
|
+
p = Payload(
|
|
70
|
+
tool="context_for_task",
|
|
71
|
+
token_budget=5000,
|
|
72
|
+
tokens_used=1847,
|
|
73
|
+
symbols=[
|
|
74
|
+
Symbol(qualified_name="pkg.AuthMiddleware", kind="function", score=0.78, provenance="lsp_resolved", distance=0),
|
|
75
|
+
Symbol(qualified_name="pkg.NewServer", kind="function", score=0.54, provenance="lsp_resolved", distance=1),
|
|
76
|
+
],
|
|
77
|
+
edges=[
|
|
78
|
+
Edge(source="pkg.NewServer", target="pkg.AuthMiddleware", edge_type="calls"),
|
|
79
|
+
],
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
output = encode(p)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Output:
|
|
86
|
+
```
|
|
87
|
+
GCF tool=context_for_task budget=5000 tokens=1847 symbols=2
|
|
88
|
+
## targets
|
|
89
|
+
@0 fn pkg.AuthMiddleware 0.78 lsp_resolved
|
|
90
|
+
## related
|
|
91
|
+
@1 fn pkg.NewServer 0.54 lsp_resolved
|
|
92
|
+
## edges
|
|
93
|
+
@0<@1 calls
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Decode
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from gcf import decode
|
|
100
|
+
|
|
101
|
+
p = decode(input_text)
|
|
102
|
+
print(p.tool, len(p.symbols), "symbols", len(p.edges), "edges")
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Session Deduplication
|
|
106
|
+
|
|
107
|
+
Track transmitted symbols across multiple tool responses. Previously-sent symbols become bare references instead of full declarations:
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
from gcf import encode_with_session, Session, Payload, Symbol
|
|
111
|
+
|
|
112
|
+
sess = Session()
|
|
113
|
+
|
|
114
|
+
out1 = encode_with_session(payload1, sess) # full declarations
|
|
115
|
+
out2 = encode_with_session(payload2, sess) # reused symbols as "@N # previously transmitted"
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
By the 5th call in a session: 92.7% token savings vs JSON.
|
|
119
|
+
|
|
120
|
+
## Delta Encoding
|
|
121
|
+
|
|
122
|
+
When the consumer already has a prior context pack, send only what changed:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from gcf import encode_delta, DeltaPayload, Symbol, Edge
|
|
126
|
+
|
|
127
|
+
delta = DeltaPayload(
|
|
128
|
+
tool="context_for_task",
|
|
129
|
+
base_root="aaa111",
|
|
130
|
+
new_root="bbb222",
|
|
131
|
+
removed=[Symbol(qualified_name="pkg.OldFunc", kind="function")],
|
|
132
|
+
added=[Symbol(qualified_name="pkg.NewFunc", kind="function", score=0.85, provenance="rwr")],
|
|
133
|
+
delta_tokens=30,
|
|
134
|
+
full_tokens=200,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
output = encode_delta(delta)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
81.2% savings on re-queries where the pack changed slightly.
|
|
141
|
+
|
|
142
|
+
## API
|
|
143
|
+
|
|
144
|
+
| Function | Description |
|
|
145
|
+
|----------|-------------|
|
|
146
|
+
| `encode(p: Payload) -> str` | Encode a payload to GCF text |
|
|
147
|
+
| `decode(input_text: str) -> Payload` | Parse GCF text back to a Payload |
|
|
148
|
+
| `encode_with_session(p: Payload, s: Session) -> str` | Encode with session deduplication |
|
|
149
|
+
| `encode_delta(d: DeltaPayload) -> str` | Encode a delta (added/removed only) |
|
|
150
|
+
| `Session()` | Create a new session tracker (thread-safe) |
|
|
151
|
+
|
|
152
|
+
## Types
|
|
153
|
+
|
|
154
|
+
| Type | Purpose |
|
|
155
|
+
|------|---------|
|
|
156
|
+
| `Payload` | Full GCF payload: tool, budget, symbols, edges, pack root |
|
|
157
|
+
| `Symbol` | Graph node: qualified name, kind, score, provenance, distance |
|
|
158
|
+
| `Edge` | Directed relationship: source, target, edge type |
|
|
159
|
+
| `DeltaPayload` | Diff between two packs: added/removed symbols and edges |
|
|
160
|
+
| `Session` | Thread-safe tracker for multi-call deduplication |
|
|
161
|
+
| `KIND_ABBREV` / `KIND_EXPAND` | Bidirectional kind abbreviation dicts |
|
|
162
|
+
|
|
163
|
+
## Comprehension Eval
|
|
164
|
+
|
|
165
|
+
Rigorous 3-way benchmark (GCF vs TOON vs JSON) at 500 symbols, 200 edges. Six structured extraction questions sent to an LLM:
|
|
166
|
+
|
|
167
|
+
| Format | Accuracy | Tokens | vs JSON |
|
|
168
|
+
|--------|----------|--------|---------|
|
|
169
|
+
| **GCF** | **100%** (6/6) | **11,090** | **79% fewer** |
|
|
170
|
+
| TOON | 100% (6/6) | 16,378 | 69% fewer |
|
|
171
|
+
| JSON | 66.7% (4/6) | 53,341 | baseline |
|
|
172
|
+
|
|
173
|
+
JSON failed on counting tasks. GCF and TOON both achieved perfect accuracy. GCF does it in 32% fewer tokens.
|
|
174
|
+
|
|
175
|
+
## Token Efficiency (TOON's Own Benchmark)
|
|
176
|
+
|
|
177
|
+
Running [TOON's benchmark harness](https://github.com/blackwell-systems/toon/tree/gcf-comparison) with GCF inserted (their datasets, their tokenizer):
|
|
178
|
+
|
|
179
|
+
| Track | GCF | TOON | Result |
|
|
180
|
+
|-------|-----|------|--------|
|
|
181
|
+
| Mixed-structure (nested, semi-uniform) | 169,554 | 227,896 | **GCF 34% smaller** |
|
|
182
|
+
| Flat-only (tabular) | 66,026 | 67,837 | **GCF 3% smaller** |
|
|
183
|
+
| Semi-uniform event logs | 107,269 | 154,032 | **GCF 44% smaller** |
|
|
184
|
+
|
|
185
|
+
GCF wins on every dataset except deeply nested config (75 tokens on a 618-token payload). On semi-uniform data, GCF uses 44% fewer tokens than TOON.
|
|
186
|
+
|
|
187
|
+
Reproducible: [blackwell-systems/toon@gcf-comparison](https://github.com/blackwell-systems/toon/tree/gcf-comparison)
|
|
188
|
+
|
|
189
|
+
## Other Implementations
|
|
190
|
+
|
|
191
|
+
- **Go**: [github.com/blackwell-systems/gcf-go](https://github.com/blackwell-systems/gcf-go)
|
|
192
|
+
- **TypeScript**: [github.com/blackwell-systems/gcf-typescript](https://github.com/blackwell-systems/gcf-typescript)
|
|
193
|
+
- **Specification**: [github.com/blackwell-systems/gcf](https://github.com/blackwell-systems/gcf)
|
|
194
|
+
|
|
195
|
+
## License
|
|
196
|
+
|
|
197
|
+
MIT
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<a href="https://github.com/blackwell-systems"><img src="https://raw.githubusercontent.com/blackwell-systems/blackwell-docs-theme/main/badge-trademark.svg" alt="Blackwell Systems"></a>
|
|
3
|
+
<a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue.svg" alt="License"></a>
|
|
4
|
+
</p>
|
|
5
|
+
|
|
6
|
+
# gcf-python
|
|
7
|
+
|
|
8
|
+
Python implementation of [GCF (Graph Compact Format)](https://github.com/blackwell-systems/gcf).
|
|
9
|
+
|
|
10
|
+
**84% fewer tokens than JSON. 32% fewer than TOON. 100% LLM comprehension accuracy at 500 symbols, where JSON fails.**
|
|
11
|
+
|
|
12
|
+
## Install
|
|
13
|
+
|
|
14
|
+
```
|
|
15
|
+
pip install gcf-py
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
Zero dependencies. Pure Python. Python 3.9+. Includes CLI.
|
|
19
|
+
|
|
20
|
+
## CLI
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
gcf encode < payload.json # JSON to GCF
|
|
24
|
+
gcf decode < payload.gcf # GCF to JSON
|
|
25
|
+
gcf stats < payload.json # token comparison with visual bar
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
Payload: 50 symbols, 20 edges
|
|
30
|
+
|
|
31
|
+
JSON ██████████████████████████████ 4,200 tokens
|
|
32
|
+
GCF ████████░░░░░░░░░░░░░░░░░░░░░░ 1,150 tokens
|
|
33
|
+
|
|
34
|
+
Savings: 73% fewer tokens with GCF
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Library
|
|
38
|
+
|
|
39
|
+
### Quick Start
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from gcf import encode, Payload, Symbol, Edge
|
|
43
|
+
|
|
44
|
+
p = Payload(
|
|
45
|
+
tool="context_for_task",
|
|
46
|
+
token_budget=5000,
|
|
47
|
+
tokens_used=1847,
|
|
48
|
+
symbols=[
|
|
49
|
+
Symbol(qualified_name="pkg.AuthMiddleware", kind="function", score=0.78, provenance="lsp_resolved", distance=0),
|
|
50
|
+
Symbol(qualified_name="pkg.NewServer", kind="function", score=0.54, provenance="lsp_resolved", distance=1),
|
|
51
|
+
],
|
|
52
|
+
edges=[
|
|
53
|
+
Edge(source="pkg.NewServer", target="pkg.AuthMiddleware", edge_type="calls"),
|
|
54
|
+
],
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
output = encode(p)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Output:
|
|
61
|
+
```
|
|
62
|
+
GCF tool=context_for_task budget=5000 tokens=1847 symbols=2
|
|
63
|
+
## targets
|
|
64
|
+
@0 fn pkg.AuthMiddleware 0.78 lsp_resolved
|
|
65
|
+
## related
|
|
66
|
+
@1 fn pkg.NewServer 0.54 lsp_resolved
|
|
67
|
+
## edges
|
|
68
|
+
@0<@1 calls
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Decode
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from gcf import decode
|
|
75
|
+
|
|
76
|
+
p = decode(input_text)
|
|
77
|
+
print(p.tool, len(p.symbols), "symbols", len(p.edges), "edges")
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Session Deduplication
|
|
81
|
+
|
|
82
|
+
Track transmitted symbols across multiple tool responses. Previously-sent symbols become bare references instead of full declarations:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
from gcf import encode_with_session, Session, Payload, Symbol
|
|
86
|
+
|
|
87
|
+
sess = Session()
|
|
88
|
+
|
|
89
|
+
out1 = encode_with_session(payload1, sess) # full declarations
|
|
90
|
+
out2 = encode_with_session(payload2, sess) # reused symbols as "@N # previously transmitted"
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
By the 5th call in a session: 92.7% token savings vs JSON.
|
|
94
|
+
|
|
95
|
+
## Delta Encoding
|
|
96
|
+
|
|
97
|
+
When the consumer already has a prior context pack, send only what changed:
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from gcf import encode_delta, DeltaPayload, Symbol, Edge
|
|
101
|
+
|
|
102
|
+
delta = DeltaPayload(
|
|
103
|
+
tool="context_for_task",
|
|
104
|
+
base_root="aaa111",
|
|
105
|
+
new_root="bbb222",
|
|
106
|
+
removed=[Symbol(qualified_name="pkg.OldFunc", kind="function")],
|
|
107
|
+
added=[Symbol(qualified_name="pkg.NewFunc", kind="function", score=0.85, provenance="rwr")],
|
|
108
|
+
delta_tokens=30,
|
|
109
|
+
full_tokens=200,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
output = encode_delta(delta)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
81.2% savings on re-queries where the pack changed slightly.
|
|
116
|
+
|
|
117
|
+
## API
|
|
118
|
+
|
|
119
|
+
| Function | Description |
|
|
120
|
+
|----------|-------------|
|
|
121
|
+
| `encode(p: Payload) -> str` | Encode a payload to GCF text |
|
|
122
|
+
| `decode(input_text: str) -> Payload` | Parse GCF text back to a Payload |
|
|
123
|
+
| `encode_with_session(p: Payload, s: Session) -> str` | Encode with session deduplication |
|
|
124
|
+
| `encode_delta(d: DeltaPayload) -> str` | Encode a delta (added/removed only) |
|
|
125
|
+
| `Session()` | Create a new session tracker (thread-safe) |
|
|
126
|
+
|
|
127
|
+
## Types
|
|
128
|
+
|
|
129
|
+
| Type | Purpose |
|
|
130
|
+
|------|---------|
|
|
131
|
+
| `Payload` | Full GCF payload: tool, budget, symbols, edges, pack root |
|
|
132
|
+
| `Symbol` | Graph node: qualified name, kind, score, provenance, distance |
|
|
133
|
+
| `Edge` | Directed relationship: source, target, edge type |
|
|
134
|
+
| `DeltaPayload` | Diff between two packs: added/removed symbols and edges |
|
|
135
|
+
| `Session` | Thread-safe tracker for multi-call deduplication |
|
|
136
|
+
| `KIND_ABBREV` / `KIND_EXPAND` | Bidirectional kind abbreviation dicts |
|
|
137
|
+
|
|
138
|
+
## Comprehension Eval
|
|
139
|
+
|
|
140
|
+
Rigorous 3-way benchmark (GCF vs TOON vs JSON) at 500 symbols, 200 edges. Six structured extraction questions sent to an LLM:
|
|
141
|
+
|
|
142
|
+
| Format | Accuracy | Tokens | vs JSON |
|
|
143
|
+
|--------|----------|--------|---------|
|
|
144
|
+
| **GCF** | **100%** (6/6) | **11,090** | **79% fewer** |
|
|
145
|
+
| TOON | 100% (6/6) | 16,378 | 69% fewer |
|
|
146
|
+
| JSON | 66.7% (4/6) | 53,341 | baseline |
|
|
147
|
+
|
|
148
|
+
JSON failed on counting tasks. GCF and TOON both achieved perfect accuracy. GCF does it in 32% fewer tokens.
|
|
149
|
+
|
|
150
|
+
## Token Efficiency (TOON's Own Benchmark)
|
|
151
|
+
|
|
152
|
+
Running [TOON's benchmark harness](https://github.com/blackwell-systems/toon/tree/gcf-comparison) with GCF inserted (their datasets, their tokenizer):
|
|
153
|
+
|
|
154
|
+
| Track | GCF | TOON | Result |
|
|
155
|
+
|-------|-----|------|--------|
|
|
156
|
+
| Mixed-structure (nested, semi-uniform) | 169,554 | 227,896 | **GCF 34% smaller** |
|
|
157
|
+
| Flat-only (tabular) | 66,026 | 67,837 | **GCF 3% smaller** |
|
|
158
|
+
| Semi-uniform event logs | 107,269 | 154,032 | **GCF 44% smaller** |
|
|
159
|
+
|
|
160
|
+
GCF wins on every dataset except deeply nested config (75 tokens on a 618-token payload). On semi-uniform data, GCF uses 44% fewer tokens than TOON.
|
|
161
|
+
|
|
162
|
+
Reproducible: [blackwell-systems/toon@gcf-comparison](https://github.com/blackwell-systems/toon/tree/gcf-comparison)
|
|
163
|
+
|
|
164
|
+
## Other Implementations
|
|
165
|
+
|
|
166
|
+
- **Go**: [github.com/blackwell-systems/gcf-go](https://github.com/blackwell-systems/gcf-go)
|
|
167
|
+
- **TypeScript**: [github.com/blackwell-systems/gcf-typescript](https://github.com/blackwell-systems/gcf-typescript)
|
|
168
|
+
- **Specification**: [github.com/blackwell-systems/gcf](https://github.com/blackwell-systems/gcf)
|
|
169
|
+
|
|
170
|
+
## License
|
|
171
|
+
|
|
172
|
+
MIT
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "gcf-python"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Python implementation of GCF (Graph Compact Format): token-optimized wire format for LLM tool responses"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Blackwell Systems" },
|
|
14
|
+
]
|
|
15
|
+
keywords = ["gcf", "llm", "mcp", "token-efficient", "graph", "wire-format"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.9",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Programming Language :: Python :: 3.13",
|
|
26
|
+
"Topic :: Software Development :: Libraries",
|
|
27
|
+
"Typing :: Typed",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.scripts]
|
|
31
|
+
gcf = "gcf.cli:main"
|
|
32
|
+
|
|
33
|
+
[project.urls]
|
|
34
|
+
Homepage = "https://github.com/blackwell-systems/gcf-python"
|
|
35
|
+
Documentation = "https://blackwell-systems.github.io/gcf/"
|
|
36
|
+
Specification = "https://github.com/blackwell-systems/gcf"
|
|
37
|
+
|
|
38
|
+
[tool.hatch.build.targets.wheel]
|
|
39
|
+
packages = ["src/gcf"]
|
|
40
|
+
|
|
41
|
+
[tool.pytest.ini_options]
|
|
42
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""GCF (Graph Compact Format): token-optimized wire format for LLM tool responses.
|
|
2
|
+
|
|
3
|
+
84% fewer tokens than JSON. 32% fewer than TOON. 100% LLM comprehension accuracy.
|
|
4
|
+
|
|
5
|
+
Encode a payload:
|
|
6
|
+
|
|
7
|
+
from gcf import encode, Payload, Symbol
|
|
8
|
+
|
|
9
|
+
p = Payload(
|
|
10
|
+
tool="context_for_task",
|
|
11
|
+
token_budget=5000,
|
|
12
|
+
tokens_used=1847,
|
|
13
|
+
symbols=[Symbol(qualified_name="pkg.Func", kind="function", score=0.9, provenance="lsp_resolved")],
|
|
14
|
+
)
|
|
15
|
+
output = encode(p)
|
|
16
|
+
|
|
17
|
+
Decode a payload:
|
|
18
|
+
|
|
19
|
+
from gcf import decode
|
|
20
|
+
p = decode(input_text)
|
|
21
|
+
|
|
22
|
+
Session deduplication:
|
|
23
|
+
|
|
24
|
+
from gcf import encode_with_session, Session
|
|
25
|
+
sess = Session()
|
|
26
|
+
out1 = encode_with_session(payload1, sess) # full declarations
|
|
27
|
+
out2 = encode_with_session(payload2, sess) # reused symbols as bare refs
|
|
28
|
+
|
|
29
|
+
Delta encoding:
|
|
30
|
+
|
|
31
|
+
from gcf import encode_delta, DeltaPayload
|
|
32
|
+
out = encode_delta(DeltaPayload(...))
|
|
33
|
+
|
|
34
|
+
Specification: https://github.com/blackwell-systems/gcf
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from .constants import KIND_ABBREV, KIND_EXPAND
|
|
38
|
+
from .decode import DecodeError, decode
|
|
39
|
+
from .delta import encode_delta
|
|
40
|
+
from .encode import encode
|
|
41
|
+
from .session import Session, encode_with_session
|
|
42
|
+
from .types import Components, DeltaPayload, Edge, Payload, Symbol
|
|
43
|
+
|
|
44
|
+
__all__ = [
|
|
45
|
+
"Components",
|
|
46
|
+
"DecodeError",
|
|
47
|
+
"DeltaPayload",
|
|
48
|
+
"Edge",
|
|
49
|
+
"KIND_ABBREV",
|
|
50
|
+
"KIND_EXPAND",
|
|
51
|
+
"Payload",
|
|
52
|
+
"Session",
|
|
53
|
+
"Symbol",
|
|
54
|
+
"decode",
|
|
55
|
+
"encode",
|
|
56
|
+
"encode_delta",
|
|
57
|
+
"encode_with_session",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
__version__ = "0.1.0"
|