gcf-python 0.2.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gcf_python-0.2.0 → gcf_python-0.3.1}/CHANGELOG.md +4 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/PKG-INFO +14 -12
- {gcf_python-0.2.0 → gcf_python-0.3.1}/README.md +13 -11
- {gcf_python-0.2.0 → gcf_python-0.3.1}/pyproject.toml +1 -1
- {gcf_python-0.2.0 → gcf_python-0.3.1}/src/gcf/generic.py +4 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/.github/workflows/ci.yml +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/.github/workflows/publish.yml +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/.gitignore +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/LICENSE +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/src/gcf/__init__.py +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/src/gcf/cli.py +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/src/gcf/constants.py +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/src/gcf/decode.py +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/src/gcf/delta.py +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/src/gcf/encode.py +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/src/gcf/session.py +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/src/gcf/types.py +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/tests/__init__.py +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/tests/test_decode.py +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/tests/test_delta.py +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/tests/test_encode.py +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/tests/test_generic.py +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/tests/test_roundtrip.py +0 -0
- {gcf_python-0.2.0 → gcf_python-0.3.1}/tests/test_session.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gcf-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Python implementation of GCF (Graph Compact Format): token-optimized wire format for LLM tool responses
|
|
5
5
|
Project-URL: Homepage, https://github.com/blackwell-systems/gcf-python
|
|
6
6
|
Project-URL: Documentation, https://blackwell-systems.github.io/gcf/
|
|
@@ -32,7 +32,7 @@ Description-Content-Type: text/markdown
|
|
|
32
32
|
|
|
33
33
|
Python implementation of [GCF (Graph Compact Format)](https://gcformat.com/) — the most token-efficient wire format for LLMs. A drop-in alternative to JSON and TOON for any structured data.
|
|
34
34
|
|
|
35
|
-
**79% fewer input tokens than JSON. 75% fewer output tokens. 52% smaller than TOON. 100% LLM comprehension at 500 symbols, where JSON
|
|
35
|
+
**79% fewer input tokens than JSON. 75% fewer output tokens. 52% smaller than TOON. 100% LLM comprehension at 500 symbols, where JSON scores 76.9% and TOON scores 92.3%.**
|
|
36
36
|
|
|
37
37
|
Docs: [gcformat.com](https://gcformat.com/) · [Playground](https://gcformat.com/playground.html) · [GCF vs TOON](https://gcformat.com/guide/vs-toon.html)
|
|
38
38
|
|
|
@@ -189,15 +189,17 @@ Works on dicts, lists, and primitives. Lists of uniform dicts get tabular rows.
|
|
|
189
189
|
|
|
190
190
|
## Comprehension Eval
|
|
191
191
|
|
|
192
|
-
Rigorous 3-way benchmark (GCF vs TOON vs JSON) at 500 symbols, 200 edges.
|
|
192
|
+
Rigorous 3-way benchmark (GCF vs TOON vs JSON) at 500 symbols, 200 edges. 13 structured extraction questions sent to an LLM with zero format instructions:
|
|
193
193
|
|
|
194
194
|
| Format | Accuracy | Tokens | vs JSON |
|
|
195
195
|
|--------|----------|--------|---------|
|
|
196
|
-
| **GCF** | **100%** (
|
|
197
|
-
| TOON |
|
|
198
|
-
| JSON |
|
|
196
|
+
| **GCF** | **100%** (13/13) | **11,090** | **79% fewer** |
|
|
197
|
+
| TOON | 92.3% (12/13) | 16,378 | 69% fewer |
|
|
198
|
+
| JSON | 76.9% (10/13) | 53,341 | baseline |
|
|
199
199
|
|
|
200
|
-
|
|
200
|
+
GCF is the only format with perfect accuracy at scale, at 32% fewer tokens than TOON.
|
|
201
|
+
|
|
202
|
+
Reproduce: `git clone https://github.com/blackwell-systems/gcf-go && cd gcf-go/eval && GOWORK=off go test -run TestComprehension -v -timeout 0`
|
|
201
203
|
|
|
202
204
|
## Token Efficiency (TOON's Own Benchmark)
|
|
203
205
|
|
|
@@ -205,13 +207,13 @@ Running [TOON's benchmark harness](https://github.com/blackwell-systems/toon/tre
|
|
|
205
207
|
|
|
206
208
|
| Track | GCF | TOON | Result |
|
|
207
209
|
|-------|-----|------|--------|
|
|
208
|
-
| Mixed-structure (nested, semi-uniform) |
|
|
209
|
-
| Flat-only (tabular) | 66,
|
|
210
|
-
| Semi-uniform event logs |
|
|
210
|
+
| Mixed-structure (nested, semi-uniform) | 170,367 | 227,896 | **GCF 34% smaller** |
|
|
211
|
+
| Flat-only (tabular) | 66,029 | 67,837 | **GCF 3% smaller** |
|
|
212
|
+
| Semi-uniform event logs | 108,158 | 154,032 | **GCF 42% smaller** |
|
|
211
213
|
|
|
212
|
-
GCF wins
|
|
214
|
+
GCF wins all 6 datasets. On semi-uniform data (the most common real-world pattern), GCF uses 42% fewer tokens than TOON.
|
|
213
215
|
|
|
214
|
-
|
|
216
|
+
Reproduce: `git clone https://github.com/blackwell-systems/toon && cd toon && git checkout gcf-comparison && cd benchmarks && pnpm install && pnpm benchmark:tokens`
|
|
215
217
|
|
|
216
218
|
## Links
|
|
217
219
|
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
Python implementation of [GCF (Graph Compact Format)](https://gcformat.com/) — the most token-efficient wire format for LLMs. A drop-in alternative to JSON and TOON for any structured data.
|
|
9
9
|
|
|
10
|
-
**79% fewer input tokens than JSON. 75% fewer output tokens. 52% smaller than TOON. 100% LLM comprehension at 500 symbols, where JSON
|
|
10
|
+
**79% fewer input tokens than JSON. 75% fewer output tokens. 52% smaller than TOON. 100% LLM comprehension at 500 symbols, where JSON scores 76.9% and TOON scores 92.3%.**
|
|
11
11
|
|
|
12
12
|
Docs: [gcformat.com](https://gcformat.com/) · [Playground](https://gcformat.com/playground.html) · [GCF vs TOON](https://gcformat.com/guide/vs-toon.html)
|
|
13
13
|
|
|
@@ -164,15 +164,17 @@ Works on dicts, lists, and primitives. Lists of uniform dicts get tabular rows.
|
|
|
164
164
|
|
|
165
165
|
## Comprehension Eval
|
|
166
166
|
|
|
167
|
-
Rigorous 3-way benchmark (GCF vs TOON vs JSON) at 500 symbols, 200 edges.
|
|
167
|
+
Rigorous 3-way benchmark (GCF vs TOON vs JSON) at 500 symbols, 200 edges. 13 structured extraction questions sent to an LLM with zero format instructions:
|
|
168
168
|
|
|
169
169
|
| Format | Accuracy | Tokens | vs JSON |
|
|
170
170
|
|--------|----------|--------|---------|
|
|
171
|
-
| **GCF** | **100%** (
|
|
172
|
-
| TOON |
|
|
173
|
-
| JSON |
|
|
171
|
+
| **GCF** | **100%** (13/13) | **11,090** | **79% fewer** |
|
|
172
|
+
| TOON | 92.3% (12/13) | 16,378 | 69% fewer |
|
|
173
|
+
| JSON | 76.9% (10/13) | 53,341 | baseline |
|
|
174
174
|
|
|
175
|
-
|
|
175
|
+
GCF is the only format with perfect accuracy at scale, at 32% fewer tokens than TOON.
|
|
176
|
+
|
|
177
|
+
Reproduce: `git clone https://github.com/blackwell-systems/gcf-go && cd gcf-go/eval && GOWORK=off go test -run TestComprehension -v -timeout 0`
|
|
176
178
|
|
|
177
179
|
## Token Efficiency (TOON's Own Benchmark)
|
|
178
180
|
|
|
@@ -180,13 +182,13 @@ Running [TOON's benchmark harness](https://github.com/blackwell-systems/toon/tre
|
|
|
180
182
|
|
|
181
183
|
| Track | GCF | TOON | Result |
|
|
182
184
|
|-------|-----|------|--------|
|
|
183
|
-
| Mixed-structure (nested, semi-uniform) |
|
|
184
|
-
| Flat-only (tabular) | 66,
|
|
185
|
-
| Semi-uniform event logs |
|
|
185
|
+
| Mixed-structure (nested, semi-uniform) | 170,367 | 227,896 | **GCF 34% smaller** |
|
|
186
|
+
| Flat-only (tabular) | 66,029 | 67,837 | **GCF 3% smaller** |
|
|
187
|
+
| Semi-uniform event logs | 108,158 | 154,032 | **GCF 42% smaller** |
|
|
186
188
|
|
|
187
|
-
GCF wins
|
|
189
|
+
GCF wins all 6 datasets. On semi-uniform data (the most common real-world pattern), GCF uses 42% fewer tokens than TOON.
|
|
188
190
|
|
|
189
|
-
|
|
191
|
+
Reproduce: `git clone https://github.com/blackwell-systems/toon && cd toon && git checkout gcf-comparison && cd benchmarks && pnpm install && pnpm benchmark:tokens`
|
|
190
192
|
|
|
191
193
|
## Links
|
|
192
194
|
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "gcf-python"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.1"
|
|
8
8
|
description = "Python implementation of GCF (Graph Compact Format): token-optimized wire format for LLM tool responses"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "MIT"}
|
|
@@ -59,6 +59,10 @@ def _encode_array(items: list, name: str, lines: list[str], depth: int) -> None:
|
|
|
59
59
|
|
|
60
60
|
if _is_uniform_dict_list(items):
|
|
61
61
|
_encode_tabular(items, name, lines, depth)
|
|
62
|
+
elif all(not isinstance(item, (dict, list)) for item in items):
|
|
63
|
+
# Primitive array: inline as comma-separated values.
|
|
64
|
+
vals = ",".join(_format_value(item) for item in items)
|
|
65
|
+
lines.append(f"{prefix}{name}[{len(items)}]: {vals}")
|
|
62
66
|
else:
|
|
63
67
|
lines.append(f"{prefix}## {name} [{len(items)}]")
|
|
64
68
|
for i, item in enumerate(items):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|