gcf-python 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - uses: actions/setup-python@v5
18
+ with:
19
+ python-version: ${{ matrix.python-version }}
20
+ - run: pip install -e ".[dev]" || pip install -e . && pip install pytest
21
+ - run: pytest tests/ -v
@@ -0,0 +1,24 @@
1
+ name: Publish
2
+
3
+ on:
4
+ push:
5
+ tags: ['v*']
6
+
7
+ jobs:
8
+ publish:
9
+ runs-on: ubuntu-latest
10
+ permissions:
11
+ contents: read
12
+ id-token: write
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ - uses: actions/setup-python@v5
16
+ with:
17
+ python-version: '3.12'
18
+ - run: pip install build twine
19
+ - run: pip install -e . && pip install pytest && pytest tests/ -v
20
+ - run: python -m build
21
+ - run: twine upload --verbose dist/*
22
+ env:
23
+ TWINE_USERNAME: __token__
24
+ TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
@@ -0,0 +1,10 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .venv/
8
+ .pytest_cache/
9
+ *.egg
10
+ .eggs/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Blackwell Systems
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,197 @@
1
+ Metadata-Version: 2.4
2
+ Name: gcf-python
3
+ Version: 0.1.0
4
+ Summary: Python implementation of GCF (Graph Compact Format): token-optimized wire format for LLM tool responses
5
+ Project-URL: Homepage, https://github.com/blackwell-systems/gcf-python
6
+ Project-URL: Documentation, https://blackwell-systems.github.io/gcf/
7
+ Project-URL: Specification, https://github.com/blackwell-systems/gcf
8
+ Author: Blackwell Systems
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: gcf,graph,llm,mcp,token-efficient,wire-format
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Software Development :: Libraries
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.9
24
+ Description-Content-Type: text/markdown
25
+
26
+ <p align="center">
27
+ <a href="https://github.com/blackwell-systems"><img src="https://raw.githubusercontent.com/blackwell-systems/blackwell-docs-theme/main/badge-trademark.svg" alt="Blackwell Systems"></a>
28
+ <a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue.svg" alt="License"></a>
29
+ </p>
30
+
31
+ # gcf-python
32
+
33
+ Python implementation of [GCF (Graph Compact Format)](https://github.com/blackwell-systems/gcf).
34
+
35
+ **84% fewer tokens than JSON. 32% fewer than TOON. 100% LLM comprehension accuracy at 500 symbols, where JSON fails.**
36
+
37
+ ## Install
38
+
39
+ ```
40
+ pip install gcf-py
41
+ ```
42
+
43
+ Zero dependencies. Pure Python. Python 3.9+. Includes CLI.
44
+
45
+ ## CLI
46
+
47
+ ```bash
48
+ gcf encode < payload.json # JSON to GCF
49
+ gcf decode < payload.gcf # GCF to JSON
50
+ gcf stats < payload.json # token comparison with visual bar
51
+ ```
52
+
53
+ ```
54
+ Payload: 50 symbols, 20 edges
55
+
56
+ JSON ██████████████████████████████ 4,200 tokens
57
+ GCF ████████░░░░░░░░░░░░░░░░░░░░░░ 1,150 tokens
58
+
59
+ Savings: 73% fewer tokens with GCF
60
+ ```
61
+
62
+ ## Library
63
+
64
+ ### Quick Start
65
+
66
+ ```python
67
+ from gcf import encode, Payload, Symbol, Edge
68
+
69
+ p = Payload(
70
+ tool="context_for_task",
71
+ token_budget=5000,
72
+ tokens_used=1847,
73
+ symbols=[
74
+ Symbol(qualified_name="pkg.AuthMiddleware", kind="function", score=0.78, provenance="lsp_resolved", distance=0),
75
+ Symbol(qualified_name="pkg.NewServer", kind="function", score=0.54, provenance="lsp_resolved", distance=1),
76
+ ],
77
+ edges=[
78
+ Edge(source="pkg.NewServer", target="pkg.AuthMiddleware", edge_type="calls"),
79
+ ],
80
+ )
81
+
82
+ output = encode(p)
83
+ ```
84
+
85
+ Output:
86
+ ```
87
+ GCF tool=context_for_task budget=5000 tokens=1847 symbols=2
88
+ ## targets
89
+ @0 fn pkg.AuthMiddleware 0.78 lsp_resolved
90
+ ## related
91
+ @1 fn pkg.NewServer 0.54 lsp_resolved
92
+ ## edges
93
+ @0<@1 calls
94
+ ```
95
+
96
+ ## Decode
97
+
98
+ ```python
99
+ from gcf import decode
100
+
101
+ p = decode(input_text)
102
+ print(p.tool, len(p.symbols), "symbols", len(p.edges), "edges")
103
+ ```
104
+
105
+ ## Session Deduplication
106
+
107
+ Track transmitted symbols across multiple tool responses. Previously-sent symbols become bare references instead of full declarations:
108
+
109
+ ```python
110
+ from gcf import encode_with_session, Session, Payload, Symbol
111
+
112
+ sess = Session()
113
+
114
+ out1 = encode_with_session(payload1, sess) # full declarations
115
+ out2 = encode_with_session(payload2, sess) # reused symbols as "@N # previously transmitted"
116
+ ```
117
+
118
+ By the 5th call in a session: 92.7% token savings vs JSON.
119
+
120
+ ## Delta Encoding
121
+
122
+ When the consumer already has a prior context pack, send only what changed:
123
+
124
+ ```python
125
+ from gcf import encode_delta, DeltaPayload, Symbol, Edge
126
+
127
+ delta = DeltaPayload(
128
+ tool="context_for_task",
129
+ base_root="aaa111",
130
+ new_root="bbb222",
131
+ removed=[Symbol(qualified_name="pkg.OldFunc", kind="function")],
132
+ added=[Symbol(qualified_name="pkg.NewFunc", kind="function", score=0.85, provenance="rwr")],
133
+ delta_tokens=30,
134
+ full_tokens=200,
135
+ )
136
+
137
+ output = encode_delta(delta)
138
+ ```
139
+
140
+ 81.2% savings on re-queries where the pack changed slightly.
141
+
142
+ ## API
143
+
144
+ | Function | Description |
145
+ |----------|-------------|
146
+ | `encode(p: Payload) -> str` | Encode a payload to GCF text |
147
+ | `decode(input_text: str) -> Payload` | Parse GCF text back to a Payload |
148
+ | `encode_with_session(p: Payload, s: Session) -> str` | Encode with session deduplication |
149
+ | `encode_delta(d: DeltaPayload) -> str` | Encode a delta (added/removed only) |
150
+ | `Session()` | Create a new session tracker (thread-safe) |
151
+
152
+ ## Types
153
+
154
+ | Type | Purpose |
155
+ |------|---------|
156
+ | `Payload` | Full GCF payload: tool, budget, symbols, edges, pack root |
157
+ | `Symbol` | Graph node: qualified name, kind, score, provenance, distance |
158
+ | `Edge` | Directed relationship: source, target, edge type |
159
+ | `DeltaPayload` | Diff between two packs: added/removed symbols and edges |
160
+ | `Session` | Thread-safe tracker for multi-call deduplication |
161
+ | `KIND_ABBREV` / `KIND_EXPAND` | Bidirectional kind abbreviation dicts |
162
+
163
+ ## Comprehension Eval
164
+
165
+ Rigorous 3-way benchmark (GCF vs TOON vs JSON) at 500 symbols, 200 edges. Six structured extraction questions sent to an LLM:
166
+
167
+ | Format | Accuracy | Tokens | vs JSON |
168
+ |--------|----------|--------|---------|
169
+ | **GCF** | **100%** (6/6) | **11,090** | **79% fewer** |
170
+ | TOON | 100% (6/6) | 16,378 | 69% fewer |
171
+ | JSON | 66.7% (4/6) | 53,341 | baseline |
172
+
173
+ JSON failed on counting tasks. GCF and TOON both achieved perfect accuracy. GCF does it in 32% fewer tokens.
174
+
175
+ ## Token Efficiency (TOON's Own Benchmark)
176
+
177
+ Running [TOON's benchmark harness](https://github.com/blackwell-systems/toon/tree/gcf-comparison) with GCF inserted (their datasets, their tokenizer):
178
+
179
+ | Track | GCF | TOON | Result |
180
+ |-------|-----|------|--------|
181
+ | Mixed-structure (nested, semi-uniform) | 169,554 | 227,896 | **GCF 34% smaller** |
182
+ | Flat-only (tabular) | 66,026 | 67,837 | **GCF 3% smaller** |
183
+ | Semi-uniform event logs | 107,269 | 154,032 | **GCF 44% smaller** |
184
+
185
+ GCF wins on every dataset except deeply nested config (75 tokens on a 618-token payload). On semi-uniform data, GCF uses 44% fewer tokens than TOON.
186
+
187
+ Reproducible: [blackwell-systems/toon@gcf-comparison](https://github.com/blackwell-systems/toon/tree/gcf-comparison)
188
+
189
+ ## Other Implementations
190
+
191
+ - **Go**: [github.com/blackwell-systems/gcf-go](https://github.com/blackwell-systems/gcf-go)
192
+ - **TypeScript**: [github.com/blackwell-systems/gcf-typescript](https://github.com/blackwell-systems/gcf-typescript)
193
+ - **Specification**: [github.com/blackwell-systems/gcf](https://github.com/blackwell-systems/gcf)
194
+
195
+ ## License
196
+
197
+ MIT
@@ -0,0 +1,172 @@
1
+ <p align="center">
2
+ <a href="https://github.com/blackwell-systems"><img src="https://raw.githubusercontent.com/blackwell-systems/blackwell-docs-theme/main/badge-trademark.svg" alt="Blackwell Systems"></a>
3
+ <a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue.svg" alt="License"></a>
4
+ </p>
5
+
6
+ # gcf-python
7
+
8
+ Python implementation of [GCF (Graph Compact Format)](https://github.com/blackwell-systems/gcf).
9
+
10
+ **84% fewer tokens than JSON. 32% fewer than TOON. 100% LLM comprehension accuracy at 500 symbols, where JSON fails.**
11
+
12
+ ## Install
13
+
14
+ ```
15
+ pip install gcf-py
16
+ ```
17
+
18
+ Zero dependencies. Pure Python. Python 3.9+. Includes CLI.
19
+
20
+ ## CLI
21
+
22
+ ```bash
23
+ gcf encode < payload.json # JSON to GCF
24
+ gcf decode < payload.gcf # GCF to JSON
25
+ gcf stats < payload.json # token comparison with visual bar
26
+ ```
27
+
28
+ ```
29
+ Payload: 50 symbols, 20 edges
30
+
31
+ JSON ██████████████████████████████ 4,200 tokens
32
+ GCF ████████░░░░░░░░░░░░░░░░░░░░░░ 1,150 tokens
33
+
34
+ Savings: 73% fewer tokens with GCF
35
+ ```
36
+
37
+ ## Library
38
+
39
+ ### Quick Start
40
+
41
+ ```python
42
+ from gcf import encode, Payload, Symbol, Edge
43
+
44
+ p = Payload(
45
+ tool="context_for_task",
46
+ token_budget=5000,
47
+ tokens_used=1847,
48
+ symbols=[
49
+ Symbol(qualified_name="pkg.AuthMiddleware", kind="function", score=0.78, provenance="lsp_resolved", distance=0),
50
+ Symbol(qualified_name="pkg.NewServer", kind="function", score=0.54, provenance="lsp_resolved", distance=1),
51
+ ],
52
+ edges=[
53
+ Edge(source="pkg.NewServer", target="pkg.AuthMiddleware", edge_type="calls"),
54
+ ],
55
+ )
56
+
57
+ output = encode(p)
58
+ ```
59
+
60
+ Output:
61
+ ```
62
+ GCF tool=context_for_task budget=5000 tokens=1847 symbols=2
63
+ ## targets
64
+ @0 fn pkg.AuthMiddleware 0.78 lsp_resolved
65
+ ## related
66
+ @1 fn pkg.NewServer 0.54 lsp_resolved
67
+ ## edges
68
+ @0<@1 calls
69
+ ```
70
+
71
+ ## Decode
72
+
73
+ ```python
74
+ from gcf import decode
75
+
76
+ p = decode(input_text)
77
+ print(p.tool, len(p.symbols), "symbols", len(p.edges), "edges")
78
+ ```
79
+
80
+ ## Session Deduplication
81
+
82
+ Track transmitted symbols across multiple tool responses. Previously-sent symbols become bare references instead of full declarations:
83
+
84
+ ```python
85
+ from gcf import encode_with_session, Session, Payload, Symbol
86
+
87
+ sess = Session()
88
+
89
+ out1 = encode_with_session(payload1, sess) # full declarations
90
+ out2 = encode_with_session(payload2, sess) # reused symbols as "@N # previously transmitted"
91
+ ```
92
+
93
+ By the 5th call in a session: 92.7% token savings vs JSON.
94
+
95
+ ## Delta Encoding
96
+
97
+ When the consumer already has a prior context pack, send only what changed:
98
+
99
+ ```python
100
+ from gcf import encode_delta, DeltaPayload, Symbol, Edge
101
+
102
+ delta = DeltaPayload(
103
+ tool="context_for_task",
104
+ base_root="aaa111",
105
+ new_root="bbb222",
106
+ removed=[Symbol(qualified_name="pkg.OldFunc", kind="function")],
107
+ added=[Symbol(qualified_name="pkg.NewFunc", kind="function", score=0.85, provenance="rwr")],
108
+ delta_tokens=30,
109
+ full_tokens=200,
110
+ )
111
+
112
+ output = encode_delta(delta)
113
+ ```
114
+
115
+ 81.2% savings on re-queries where the pack changed slightly.
116
+
117
+ ## API
118
+
119
+ | Function | Description |
120
+ |----------|-------------|
121
+ | `encode(p: Payload) -> str` | Encode a payload to GCF text |
122
+ | `decode(input_text: str) -> Payload` | Parse GCF text back to a Payload |
123
+ | `encode_with_session(p: Payload, s: Session) -> str` | Encode with session deduplication |
124
+ | `encode_delta(d: DeltaPayload) -> str` | Encode a delta (added/removed only) |
125
+ | `Session()` | Create a new session tracker (thread-safe) |
126
+
127
+ ## Types
128
+
129
+ | Type | Purpose |
130
+ |------|---------|
131
+ | `Payload` | Full GCF payload: tool, budget, symbols, edges, pack root |
132
+ | `Symbol` | Graph node: qualified name, kind, score, provenance, distance |
133
+ | `Edge` | Directed relationship: source, target, edge type |
134
+ | `DeltaPayload` | Diff between two packs: added/removed symbols and edges |
135
+ | `Session` | Thread-safe tracker for multi-call deduplication |
136
+ | `KIND_ABBREV` / `KIND_EXPAND` | Bidirectional kind abbreviation dicts |
137
+
138
+ ## Comprehension Eval
139
+
140
+ Rigorous 3-way benchmark (GCF vs TOON vs JSON) at 500 symbols, 200 edges. Six structured extraction questions sent to an LLM:
141
+
142
+ | Format | Accuracy | Tokens | vs JSON |
143
+ |--------|----------|--------|---------|
144
+ | **GCF** | **100%** (6/6) | **11,090** | **79% fewer** |
145
+ | TOON | 100% (6/6) | 16,378 | 69% fewer |
146
+ | JSON | 66.7% (4/6) | 53,341 | baseline |
147
+
148
+ JSON failed on counting tasks. GCF and TOON both achieved perfect accuracy. GCF does it in 32% fewer tokens.
149
+
150
+ ## Token Efficiency (TOON's Own Benchmark)
151
+
152
+ Running [TOON's benchmark harness](https://github.com/blackwell-systems/toon/tree/gcf-comparison) with GCF inserted (their datasets, their tokenizer):
153
+
154
+ | Track | GCF | TOON | Result |
155
+ |-------|-----|------|--------|
156
+ | Mixed-structure (nested, semi-uniform) | 169,554 | 227,896 | **GCF 34% smaller** |
157
+ | Flat-only (tabular) | 66,026 | 67,837 | **GCF 3% smaller** |
158
+ | Semi-uniform event logs | 107,269 | 154,032 | **GCF 44% smaller** |
159
+
160
+ GCF wins on every dataset except deeply nested config (75 tokens on a 618-token payload). On semi-uniform data, GCF uses 44% fewer tokens than TOON.
161
+
162
+ Reproducible: [blackwell-systems/toon@gcf-comparison](https://github.com/blackwell-systems/toon/tree/gcf-comparison)
163
+
164
+ ## Other Implementations
165
+
166
+ - **Go**: [github.com/blackwell-systems/gcf-go](https://github.com/blackwell-systems/gcf-go)
167
+ - **TypeScript**: [github.com/blackwell-systems/gcf-typescript](https://github.com/blackwell-systems/gcf-typescript)
168
+ - **Specification**: [github.com/blackwell-systems/gcf](https://github.com/blackwell-systems/gcf)
169
+
170
+ ## License
171
+
172
+ MIT
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "gcf-python"
7
+ version = "0.1.0"
8
+ description = "Python implementation of GCF (Graph Compact Format): token-optimized wire format for LLM tool responses"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.9"
12
+ authors = [
13
+ { name = "Blackwell Systems" },
14
+ ]
15
+ keywords = ["gcf", "llm", "mcp", "token-efficient", "graph", "wire-format"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
+ "Topic :: Software Development :: Libraries",
27
+ "Typing :: Typed",
28
+ ]
29
+
30
+ [project.scripts]
31
+ gcf = "gcf.cli:main"
32
+
33
+ [project.urls]
34
+ Homepage = "https://github.com/blackwell-systems/gcf-python"
35
+ Documentation = "https://blackwell-systems.github.io/gcf/"
36
+ Specification = "https://github.com/blackwell-systems/gcf"
37
+
38
+ [tool.hatch.build.targets.wheel]
39
+ packages = ["src/gcf"]
40
+
41
+ [tool.pytest.ini_options]
42
+ testpaths = ["tests"]
@@ -0,0 +1,60 @@
1
+ """GCF (Graph Compact Format): token-optimized wire format for LLM tool responses.
2
+
3
+ 84% fewer tokens than JSON. 32% fewer than TOON. 100% LLM comprehension accuracy.
4
+
5
+ Encode a payload:
6
+
7
+ from gcf import encode, Payload, Symbol
8
+
9
+ p = Payload(
10
+ tool="context_for_task",
11
+ token_budget=5000,
12
+ tokens_used=1847,
13
+ symbols=[Symbol(qualified_name="pkg.Func", kind="function", score=0.9, provenance="lsp_resolved")],
14
+ )
15
+ output = encode(p)
16
+
17
+ Decode a payload:
18
+
19
+ from gcf import decode
20
+ p = decode(input_text)
21
+
22
+ Session deduplication:
23
+
24
+ from gcf import encode_with_session, Session
25
+ sess = Session()
26
+ out1 = encode_with_session(payload1, sess) # full declarations
27
+ out2 = encode_with_session(payload2, sess) # reused symbols as bare refs
28
+
29
+ Delta encoding:
30
+
31
+ from gcf import encode_delta, DeltaPayload
32
+ out = encode_delta(DeltaPayload(...))
33
+
34
+ Specification: https://github.com/blackwell-systems/gcf
35
+ """
36
+
37
+ from .constants import KIND_ABBREV, KIND_EXPAND
38
+ from .decode import DecodeError, decode
39
+ from .delta import encode_delta
40
+ from .encode import encode
41
+ from .session import Session, encode_with_session
42
+ from .types import Components, DeltaPayload, Edge, Payload, Symbol
43
+
44
+ __all__ = [
45
+ "Components",
46
+ "DecodeError",
47
+ "DeltaPayload",
48
+ "Edge",
49
+ "KIND_ABBREV",
50
+ "KIND_EXPAND",
51
+ "Payload",
52
+ "Session",
53
+ "Symbol",
54
+ "decode",
55
+ "encode",
56
+ "encode_delta",
57
+ "encode_with_session",
58
+ ]
59
+
60
+ __version__ = "0.1.0"