gcf-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gcf/__init__.py +60 -0
- gcf/cli.py +155 -0
- gcf/constants.py +24 -0
- gcf/decode.py +181 -0
- gcf/delta.py +54 -0
- gcf/encode.py +86 -0
- gcf/session.py +137 -0
- gcf/types.py +63 -0
- gcf_python-0.1.0.dist-info/METADATA +197 -0
- gcf_python-0.1.0.dist-info/RECORD +13 -0
- gcf_python-0.1.0.dist-info/WHEEL +4 -0
- gcf_python-0.1.0.dist-info/entry_points.txt +2 -0
- gcf_python-0.1.0.dist-info/licenses/LICENSE +21 -0
gcf/__init__.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""GCF (Graph Compact Format): token-optimized wire format for LLM tool responses.
|
|
2
|
+
|
|
3
|
+
84% fewer tokens than JSON. 32% fewer than TOON. 100% LLM comprehension accuracy.
|
|
4
|
+
|
|
5
|
+
Encode a payload:
|
|
6
|
+
|
|
7
|
+
from gcf import encode, Payload, Symbol
|
|
8
|
+
|
|
9
|
+
p = Payload(
|
|
10
|
+
tool="context_for_task",
|
|
11
|
+
token_budget=5000,
|
|
12
|
+
tokens_used=1847,
|
|
13
|
+
symbols=[Symbol(qualified_name="pkg.Func", kind="function", score=0.9, provenance="lsp_resolved")],
|
|
14
|
+
)
|
|
15
|
+
output = encode(p)
|
|
16
|
+
|
|
17
|
+
Decode a payload:
|
|
18
|
+
|
|
19
|
+
from gcf import decode
|
|
20
|
+
p = decode(input_text)
|
|
21
|
+
|
|
22
|
+
Session deduplication:
|
|
23
|
+
|
|
24
|
+
from gcf import encode_with_session, Session
|
|
25
|
+
sess = Session()
|
|
26
|
+
out1 = encode_with_session(payload1, sess) # full declarations
|
|
27
|
+
out2 = encode_with_session(payload2, sess) # reused symbols as bare refs
|
|
28
|
+
|
|
29
|
+
Delta encoding:
|
|
30
|
+
|
|
31
|
+
from gcf import encode_delta, DeltaPayload
|
|
32
|
+
out = encode_delta(DeltaPayload(...))
|
|
33
|
+
|
|
34
|
+
Specification: https://github.com/blackwell-systems/gcf
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from .constants import KIND_ABBREV, KIND_EXPAND
|
|
38
|
+
from .decode import DecodeError, decode
|
|
39
|
+
from .delta import encode_delta
|
|
40
|
+
from .encode import encode
|
|
41
|
+
from .session import Session, encode_with_session
|
|
42
|
+
from .types import Components, DeltaPayload, Edge, Payload, Symbol
|
|
43
|
+
|
|
44
|
+
__all__ = [
|
|
45
|
+
"Components",
|
|
46
|
+
"DecodeError",
|
|
47
|
+
"DeltaPayload",
|
|
48
|
+
"Edge",
|
|
49
|
+
"KIND_ABBREV",
|
|
50
|
+
"KIND_EXPAND",
|
|
51
|
+
"Payload",
|
|
52
|
+
"Session",
|
|
53
|
+
"Symbol",
|
|
54
|
+
"decode",
|
|
55
|
+
"encode",
|
|
56
|
+
"encode_delta",
|
|
57
|
+
"encode_with_session",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
__version__ = "0.1.0"
|
gcf/cli.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""GCF command-line interface: encode, decode, stats."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
from .decode import decode
|
|
7
|
+
from .encode import encode
|
|
8
|
+
from .types import Edge, Payload, Symbol
|
|
9
|
+
|
|
10
|
+
USAGE = """gcf - token-optimized wire format for LLM tool responses
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
gcf encode [file] Encode JSON payload to GCF (stdin if no file)
|
|
14
|
+
gcf decode [file] Decode GCF text to JSON (stdin if no file)
|
|
15
|
+
gcf stats [file] Compare token counts: JSON vs GCF (stdin if no file)
|
|
16
|
+
gcf version Print version
|
|
17
|
+
|
|
18
|
+
Examples:
|
|
19
|
+
gcf encode < payload.json
|
|
20
|
+
gcf decode < payload.gcf
|
|
21
|
+
gcf stats payload.json
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def main() -> None:
|
|
26
|
+
args = sys.argv[1:]
|
|
27
|
+
if not args or args[0] in ("-h", "--help", "help"):
|
|
28
|
+
print(USAGE, end="")
|
|
29
|
+
sys.exit(0 if args else 1)
|
|
30
|
+
|
|
31
|
+
cmd = args[0]
|
|
32
|
+
file_args = args[1:]
|
|
33
|
+
|
|
34
|
+
if cmd == "encode":
|
|
35
|
+
data = _read_input(file_args)
|
|
36
|
+
_do_encode(data)
|
|
37
|
+
elif cmd == "decode":
|
|
38
|
+
data = _read_input(file_args)
|
|
39
|
+
_do_decode(data)
|
|
40
|
+
elif cmd == "stats":
|
|
41
|
+
data = _read_input(file_args)
|
|
42
|
+
_do_stats(data)
|
|
43
|
+
elif cmd == "version":
|
|
44
|
+
print("gcf 0.1.0")
|
|
45
|
+
else:
|
|
46
|
+
print(f"unknown command: {cmd}\n", file=sys.stderr)
|
|
47
|
+
print(USAGE, file=sys.stderr, end="")
|
|
48
|
+
sys.exit(1)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _read_input(args: list[str]) -> str:
|
|
52
|
+
if args and args[0] != "-":
|
|
53
|
+
with open(args[0]) as f:
|
|
54
|
+
return f.read()
|
|
55
|
+
return sys.stdin.read()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _payload_from_json(data: str) -> Payload:
|
|
59
|
+
obj = json.loads(data)
|
|
60
|
+
symbols = [
|
|
61
|
+
Symbol(
|
|
62
|
+
qualified_name=s["qualifiedName"],
|
|
63
|
+
kind=s["kind"],
|
|
64
|
+
score=s["score"],
|
|
65
|
+
provenance=s["provenance"],
|
|
66
|
+
distance=s.get("distance", 0),
|
|
67
|
+
)
|
|
68
|
+
for s in obj.get("symbols", [])
|
|
69
|
+
]
|
|
70
|
+
edges = [
|
|
71
|
+
Edge(
|
|
72
|
+
source=e["source"],
|
|
73
|
+
target=e["target"],
|
|
74
|
+
edge_type=e["edgeType"],
|
|
75
|
+
status=e.get("status", ""),
|
|
76
|
+
)
|
|
77
|
+
for e in obj.get("edges", [])
|
|
78
|
+
]
|
|
79
|
+
return Payload(
|
|
80
|
+
tool=obj.get("tool", ""),
|
|
81
|
+
token_budget=obj.get("tokenBudget", 0),
|
|
82
|
+
tokens_used=obj.get("tokensUsed", 0),
|
|
83
|
+
pack_root=obj.get("packRoot", ""),
|
|
84
|
+
symbols=symbols,
|
|
85
|
+
edges=edges,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _payload_to_json(p: Payload) -> str:
|
|
90
|
+
obj = {
|
|
91
|
+
"tool": p.tool,
|
|
92
|
+
"tokensUsed": p.tokens_used,
|
|
93
|
+
"tokenBudget": p.token_budget,
|
|
94
|
+
"packRoot": p.pack_root,
|
|
95
|
+
"symbols": [
|
|
96
|
+
{
|
|
97
|
+
"qualifiedName": s.qualified_name,
|
|
98
|
+
"kind": s.kind,
|
|
99
|
+
"score": s.score,
|
|
100
|
+
"provenance": s.provenance,
|
|
101
|
+
"distance": s.distance,
|
|
102
|
+
}
|
|
103
|
+
for s in p.symbols
|
|
104
|
+
],
|
|
105
|
+
"edges": [
|
|
106
|
+
{
|
|
107
|
+
"source": e.source,
|
|
108
|
+
"target": e.target,
|
|
109
|
+
"edgeType": e.edge_type,
|
|
110
|
+
**({"status": e.status} if e.status else {}),
|
|
111
|
+
}
|
|
112
|
+
for e in p.edges
|
|
113
|
+
],
|
|
114
|
+
}
|
|
115
|
+
return json.dumps(obj, indent=2)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _do_encode(data: str) -> None:
|
|
119
|
+
try:
|
|
120
|
+
p = _payload_from_json(data)
|
|
121
|
+
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
|
122
|
+
print(f"error: invalid JSON: {e}", file=sys.stderr)
|
|
123
|
+
sys.exit(1)
|
|
124
|
+
print(encode(p), end="")
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _do_decode(data: str) -> None:
|
|
128
|
+
p = decode(data)
|
|
129
|
+
print(_payload_to_json(p))
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _do_stats(data: str) -> None:
|
|
133
|
+
try:
|
|
134
|
+
p = _payload_from_json(data)
|
|
135
|
+
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
|
136
|
+
print(f"error: invalid JSON: {e}", file=sys.stderr)
|
|
137
|
+
sys.exit(1)
|
|
138
|
+
|
|
139
|
+
gcf_output = encode(p)
|
|
140
|
+
json_tokens = len(data.strip()) // 4
|
|
141
|
+
gcf_tokens = len(gcf_output.strip()) // 4
|
|
142
|
+
|
|
143
|
+
savings = 0.0
|
|
144
|
+
if json_tokens > 0:
|
|
145
|
+
savings = 100.0 * (1.0 - gcf_tokens / json_tokens)
|
|
146
|
+
|
|
147
|
+
bar_width = 30
|
|
148
|
+
json_bar = "█" * bar_width
|
|
149
|
+
gcf_filled = (gcf_tokens * bar_width) // json_tokens if json_tokens > 0 else 0
|
|
150
|
+
gcf_bar = "█" * gcf_filled + "░" * (bar_width - gcf_filled)
|
|
151
|
+
|
|
152
|
+
print(f"Payload: {len(p.symbols)} symbols, {len(p.edges)} edges\n")
|
|
153
|
+
print(f" JSON {json_bar} {json_tokens} tokens")
|
|
154
|
+
print(f" GCF {gcf_bar} {gcf_tokens} tokens")
|
|
155
|
+
print(f"\n Savings: {savings:.0f}% fewer tokens with GCF")
|
gcf/constants.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Kind abbreviation mappings for GCF encoding/decoding."""
|
|
2
|
+
|
|
3
|
+
# Maps full kind names to short GCF abbreviations.
|
|
4
|
+
KIND_ABBREV: dict[str, str] = {
|
|
5
|
+
"function": "fn",
|
|
6
|
+
"type": "type",
|
|
7
|
+
"method": "method",
|
|
8
|
+
"interface": "iface",
|
|
9
|
+
"var": "var",
|
|
10
|
+
"const": "const",
|
|
11
|
+
"resource": "resource",
|
|
12
|
+
"table": "table",
|
|
13
|
+
"class": "class",
|
|
14
|
+
"selector": "selector",
|
|
15
|
+
"field": "field",
|
|
16
|
+
"route_handler": "route",
|
|
17
|
+
"external": "ext",
|
|
18
|
+
"file": "file",
|
|
19
|
+
"package": "pkg",
|
|
20
|
+
"service": "svc",
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
# Maps short GCF abbreviations to full kind names.
|
|
24
|
+
KIND_EXPAND: dict[str, str] = {v: k for k, v in KIND_ABBREV.items()}
|
gcf/decode.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"""GCF decoder: parses GCF text back into a Payload."""
|
|
2
|
+
|
|
3
|
+
from .constants import KIND_EXPAND
|
|
4
|
+
from .types import Edge, Payload, Symbol
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DecodeError(Exception):
|
|
8
|
+
"""Raised when GCF text cannot be parsed."""
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def decode(input_text: str) -> Payload:
|
|
12
|
+
"""Parse GCF text back into a Payload.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
input_text: GCF-formatted text string.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Parsed Payload.
|
|
19
|
+
|
|
20
|
+
Raises:
|
|
21
|
+
DecodeError: If the input is not valid GCF.
|
|
22
|
+
"""
|
|
23
|
+
lines = input_text.split("\n")
|
|
24
|
+
if not lines:
|
|
25
|
+
raise DecodeError("empty input")
|
|
26
|
+
|
|
27
|
+
p = Payload()
|
|
28
|
+
|
|
29
|
+
# Parse header.
|
|
30
|
+
header = lines[0]
|
|
31
|
+
if not header.startswith("GCF "):
|
|
32
|
+
raise DecodeError(f"invalid header, expected 'GCF ...' got {header!r}")
|
|
33
|
+
_parse_header(header[4:], p)
|
|
34
|
+
|
|
35
|
+
# Parse body: symbols and edges.
|
|
36
|
+
symbols: list[Symbol] = []
|
|
37
|
+
sym_by_id: dict[int, Symbol] = {}
|
|
38
|
+
current_distance = 0
|
|
39
|
+
in_edges = False
|
|
40
|
+
|
|
41
|
+
for line in lines[1:]:
|
|
42
|
+
line = line.rstrip("\r")
|
|
43
|
+
if not line:
|
|
44
|
+
continue
|
|
45
|
+
|
|
46
|
+
# Group header.
|
|
47
|
+
if line.startswith("## "):
|
|
48
|
+
group = line[3:]
|
|
49
|
+
in_edges = group == "edges"
|
|
50
|
+
if not in_edges:
|
|
51
|
+
if group == "targets":
|
|
52
|
+
current_distance = 0
|
|
53
|
+
elif group == "related":
|
|
54
|
+
current_distance = 1
|
|
55
|
+
elif group == "extended":
|
|
56
|
+
current_distance = 2
|
|
57
|
+
elif group.startswith("distance_"):
|
|
58
|
+
try:
|
|
59
|
+
current_distance = int(group[9:])
|
|
60
|
+
except ValueError:
|
|
61
|
+
pass
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
# Comment.
|
|
65
|
+
if line.startswith("# "):
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
if in_edges:
|
|
69
|
+
edge = _parse_edge_line(line, sym_by_id)
|
|
70
|
+
p.edges.append(edge)
|
|
71
|
+
else:
|
|
72
|
+
sym, sym_id = _parse_symbol_line(line, current_distance)
|
|
73
|
+
symbols.append(sym)
|
|
74
|
+
sym_by_id[sym_id] = sym
|
|
75
|
+
|
|
76
|
+
p.symbols = symbols
|
|
77
|
+
return p
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _parse_header(fields: str, p: Payload) -> None:
|
|
81
|
+
"""Parse header key=value pairs into the payload."""
|
|
82
|
+
for part in fields.split():
|
|
83
|
+
kv = part.split("=", 1)
|
|
84
|
+
if len(kv) != 2:
|
|
85
|
+
continue
|
|
86
|
+
key, value = kv
|
|
87
|
+
if key == "tool":
|
|
88
|
+
p.tool = value
|
|
89
|
+
elif key == "budget":
|
|
90
|
+
try:
|
|
91
|
+
p.token_budget = int(value)
|
|
92
|
+
except ValueError as e:
|
|
93
|
+
raise DecodeError(f"invalid budget {value!r}: {e}") from e
|
|
94
|
+
elif key == "tokens":
|
|
95
|
+
try:
|
|
96
|
+
p.tokens_used = int(value)
|
|
97
|
+
except ValueError as e:
|
|
98
|
+
raise DecodeError(f"invalid tokens {value!r}: {e}") from e
|
|
99
|
+
elif key == "pack_root":
|
|
100
|
+
p.pack_root = value
|
|
101
|
+
# "symbols" is informational, reconstructed from parsed symbols.
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _parse_symbol_line(line: str, distance: int) -> tuple[Symbol, int]:
|
|
105
|
+
"""Parse a symbol line into a Symbol and its local ID."""
|
|
106
|
+
if not line.startswith("@"):
|
|
107
|
+
raise DecodeError(f"expected symbol line starting with @, got {line!r}")
|
|
108
|
+
|
|
109
|
+
parts = line.split()
|
|
110
|
+
if len(parts) < 5:
|
|
111
|
+
raise DecodeError(
|
|
112
|
+
f"symbol line needs at least 5 fields, got {len(parts)} in {line!r}"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
id_str = parts[0][1:] # strip @
|
|
116
|
+
try:
|
|
117
|
+
sym_id = int(id_str)
|
|
118
|
+
except ValueError as e:
|
|
119
|
+
raise DecodeError(f"invalid symbol id {id_str!r}: {e}") from e
|
|
120
|
+
|
|
121
|
+
kind = parts[1]
|
|
122
|
+
kind = KIND_EXPAND.get(kind, kind)
|
|
123
|
+
|
|
124
|
+
qname = parts[2]
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
score = float(parts[3])
|
|
128
|
+
except ValueError as e:
|
|
129
|
+
raise DecodeError(f"invalid score {parts[3]!r}: {e}") from e
|
|
130
|
+
|
|
131
|
+
provenance = parts[4]
|
|
132
|
+
|
|
133
|
+
return Symbol(
|
|
134
|
+
qualified_name=qname,
|
|
135
|
+
kind=kind,
|
|
136
|
+
score=score,
|
|
137
|
+
provenance=provenance,
|
|
138
|
+
distance=distance,
|
|
139
|
+
), sym_id
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _parse_edge_line(line: str, sym_by_id: dict[int, Symbol]) -> Edge:
|
|
143
|
+
"""Parse an edge line into an Edge."""
|
|
144
|
+
parts = line.split()
|
|
145
|
+
if len(parts) < 2:
|
|
146
|
+
raise DecodeError(f"edge line needs at least 2 fields, got {line!r}")
|
|
147
|
+
|
|
148
|
+
ref = parts[0]
|
|
149
|
+
lt_idx = ref.find("<")
|
|
150
|
+
if lt_idx < 0:
|
|
151
|
+
raise DecodeError(f"edge line missing '<' separator in {ref!r}")
|
|
152
|
+
|
|
153
|
+
target_id_str = ref[1:lt_idx] # strip leading @
|
|
154
|
+
source_id_str = ref[lt_idx + 2:] # strip <@
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
target_id = int(target_id_str)
|
|
158
|
+
except ValueError as e:
|
|
159
|
+
raise DecodeError(f"invalid target id {target_id_str!r}: {e}") from e
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
source_id = int(source_id_str)
|
|
163
|
+
except ValueError as e:
|
|
164
|
+
raise DecodeError(f"invalid source id {source_id_str!r}: {e}") from e
|
|
165
|
+
|
|
166
|
+
target_sym = sym_by_id.get(target_id)
|
|
167
|
+
source_sym = sym_by_id.get(source_id)
|
|
168
|
+
if target_sym is None or source_sym is None:
|
|
169
|
+
raise DecodeError(
|
|
170
|
+
f"edge references unknown symbol id(s): target={target_id} source={source_id}"
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
edge_type = parts[1]
|
|
174
|
+
status = parts[2] if len(parts) >= 3 else ""
|
|
175
|
+
|
|
176
|
+
return Edge(
|
|
177
|
+
source=source_sym.qualified_name,
|
|
178
|
+
target=target_sym.qualified_name,
|
|
179
|
+
edge_type=edge_type,
|
|
180
|
+
status=status,
|
|
181
|
+
)
|
gcf/delta.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""GCF delta encoding: only added/removed symbols for incremental delivery."""
|
|
2
|
+
|
|
3
|
+
from .constants import KIND_ABBREV
|
|
4
|
+
from .types import DeltaPayload
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def encode_delta(d: DeltaPayload) -> str:
|
|
8
|
+
"""Encode a DeltaPayload into GCF delta format.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
d: The delta payload to encode.
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
GCF delta-formatted text string.
|
|
15
|
+
"""
|
|
16
|
+
parts: list[str] = []
|
|
17
|
+
|
|
18
|
+
# Header.
|
|
19
|
+
savings = 0.0
|
|
20
|
+
if d.full_tokens > 0:
|
|
21
|
+
savings = 100.0 * (1.0 - d.delta_tokens / d.full_tokens)
|
|
22
|
+
|
|
23
|
+
parts.append(
|
|
24
|
+
f"GCF tool={d.tool} delta=true base_root={d.base_root} "
|
|
25
|
+
f"new_root={d.new_root} tokens={d.delta_tokens} savings={savings:.0f}%"
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# Removed symbols: short references (consumer already has the full declaration).
|
|
29
|
+
if d.removed:
|
|
30
|
+
parts.append("## removed")
|
|
31
|
+
for s in d.removed:
|
|
32
|
+
kind = KIND_ABBREV.get(s.kind, s.kind)
|
|
33
|
+
parts.append(f"{kind} {s.qualified_name}")
|
|
34
|
+
|
|
35
|
+
# Added symbols: full declarations (consumer doesn't have these).
|
|
36
|
+
if d.added:
|
|
37
|
+
parts.append("## added")
|
|
38
|
+
for i, s in enumerate(d.added):
|
|
39
|
+
kind = KIND_ABBREV.get(s.kind, s.kind)
|
|
40
|
+
parts.append(f"@{i} {kind} {s.qualified_name} {s.score:.2f} {s.provenance}")
|
|
41
|
+
|
|
42
|
+
# Removed edges.
|
|
43
|
+
if d.removed_edges:
|
|
44
|
+
parts.append("## edges_removed")
|
|
45
|
+
for e in d.removed_edges:
|
|
46
|
+
parts.append(f"{e.source} -> {e.target} {e.edge_type}")
|
|
47
|
+
|
|
48
|
+
# Added edges.
|
|
49
|
+
if d.added_edges:
|
|
50
|
+
parts.append("## edges_added")
|
|
51
|
+
for e in d.added_edges:
|
|
52
|
+
parts.append(f"{e.source} -> {e.target} {e.edge_type}")
|
|
53
|
+
|
|
54
|
+
return "\n".join(parts) + "\n"
|
gcf/encode.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""GCF encoder: serializes Payload into GCF text format."""
|
|
2
|
+
|
|
3
|
+
from .constants import KIND_ABBREV
|
|
4
|
+
from .types import Payload, Symbol
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def encode(p: Payload) -> str:
|
|
8
|
+
"""Encode a Payload into GCF text format.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
p: The payload to encode.
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
GCF-formatted text string.
|
|
15
|
+
"""
|
|
16
|
+
parts: list[str] = []
|
|
17
|
+
|
|
18
|
+
# Header line.
|
|
19
|
+
header = f"GCF tool={p.tool} budget={p.token_budget} tokens={p.tokens_used} symbols={len(p.symbols)}"
|
|
20
|
+
if p.pack_root:
|
|
21
|
+
header += f" pack_root={p.pack_root}"
|
|
22
|
+
parts.append(header)
|
|
23
|
+
|
|
24
|
+
# Build symbol index for edge references.
|
|
25
|
+
sym_index: dict[str, int] = {}
|
|
26
|
+
for i, s in enumerate(p.symbols):
|
|
27
|
+
sym_index[s.qualified_name] = i
|
|
28
|
+
|
|
29
|
+
# Group symbols by distance.
|
|
30
|
+
groups = _group_by_distance(p.symbols)
|
|
31
|
+
group_names = ["targets", "related", "extended"]
|
|
32
|
+
|
|
33
|
+
for g_distance, g_symbols in groups:
|
|
34
|
+
if not g_symbols:
|
|
35
|
+
continue
|
|
36
|
+
if g_distance < len(group_names):
|
|
37
|
+
name = group_names[g_distance]
|
|
38
|
+
else:
|
|
39
|
+
name = f"distance_{g_distance}"
|
|
40
|
+
parts.append(f"## {name}")
|
|
41
|
+
|
|
42
|
+
for s in g_symbols:
|
|
43
|
+
idx = sym_index[s.qualified_name]
|
|
44
|
+
kind = KIND_ABBREV.get(s.kind, s.kind)
|
|
45
|
+
parts.append(f"@{idx} {kind} {s.qualified_name} {s.score:.2f} {s.provenance}")
|
|
46
|
+
|
|
47
|
+
# Edges section.
|
|
48
|
+
if p.edges:
|
|
49
|
+
edge_lines: list[str] = []
|
|
50
|
+
for e in p.edges:
|
|
51
|
+
src_idx = sym_index.get(e.source)
|
|
52
|
+
tgt_idx = sym_index.get(e.target)
|
|
53
|
+
if src_idx is None or tgt_idx is None:
|
|
54
|
+
continue
|
|
55
|
+
line = f"@{tgt_idx}<@{src_idx} {e.edge_type}"
|
|
56
|
+
if e.status and e.status != "unchanged":
|
|
57
|
+
line += f" {e.status}"
|
|
58
|
+
edge_lines.append(line)
|
|
59
|
+
parts.append("## edges")
|
|
60
|
+
parts.extend(edge_lines)
|
|
61
|
+
|
|
62
|
+
return "\n".join(parts) + "\n"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _group_by_distance(symbols: list[Symbol]) -> list[tuple[int, list[Symbol]]]:
|
|
66
|
+
"""Group symbols by distance, preserving order."""
|
|
67
|
+
if not symbols:
|
|
68
|
+
return []
|
|
69
|
+
|
|
70
|
+
groups: list[tuple[int, list[Symbol]]] = []
|
|
71
|
+
current_distance: int | None = None
|
|
72
|
+
current_symbols: list[Symbol] = []
|
|
73
|
+
|
|
74
|
+
for s in symbols:
|
|
75
|
+
if current_distance is None or current_distance != s.distance:
|
|
76
|
+
if current_symbols:
|
|
77
|
+
groups.append((current_distance, current_symbols)) # type: ignore[arg-type]
|
|
78
|
+
current_distance = s.distance
|
|
79
|
+
current_symbols = [s]
|
|
80
|
+
else:
|
|
81
|
+
current_symbols.append(s)
|
|
82
|
+
|
|
83
|
+
if current_symbols:
|
|
84
|
+
groups.append((current_distance, current_symbols)) # type: ignore[arg-type]
|
|
85
|
+
|
|
86
|
+
return groups
|
gcf/session.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""Session-based deduplication for GCF encoding."""
|
|
2
|
+
|
|
3
|
+
import threading
|
|
4
|
+
|
|
5
|
+
from .constants import KIND_ABBREV
|
|
6
|
+
from .encode import _group_by_distance
|
|
7
|
+
from .types import Payload, Symbol
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Session:
|
|
11
|
+
"""Tracks symbols transmitted to a client, enabling subsequent responses
|
|
12
|
+
to reference them by ID without full retransmission.
|
|
13
|
+
|
|
14
|
+
Thread-safe: multiple tool handlers may encode concurrently within a session.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self) -> None:
|
|
18
|
+
self._lock = threading.Lock()
|
|
19
|
+
self._symbols: dict[str, int] = {} # qualified_name -> global session ID
|
|
20
|
+
self._next_id: int = 0
|
|
21
|
+
|
|
22
|
+
def transmitted(self, qname: str) -> bool:
|
|
23
|
+
"""Return True if the symbol has been sent in a previous response."""
|
|
24
|
+
with self._lock:
|
|
25
|
+
return qname in self._symbols
|
|
26
|
+
|
|
27
|
+
def get_id(self, qname: str) -> int:
|
|
28
|
+
"""Return the session-global ID for a previously transmitted symbol.
|
|
29
|
+
|
|
30
|
+
Returns -1 if not found.
|
|
31
|
+
"""
|
|
32
|
+
with self._lock:
|
|
33
|
+
return self._symbols.get(qname, -1)
|
|
34
|
+
|
|
35
|
+
def record(self, symbols: list[Symbol]) -> None:
|
|
36
|
+
"""Mark symbols as transmitted and assign session-global IDs.
|
|
37
|
+
|
|
38
|
+
Call this after a successful encode to register newly-sent symbols.
|
|
39
|
+
"""
|
|
40
|
+
with self._lock:
|
|
41
|
+
for sym in symbols:
|
|
42
|
+
if sym.qualified_name not in self._symbols:
|
|
43
|
+
self._symbols[sym.qualified_name] = self._next_id
|
|
44
|
+
self._next_id += 1
|
|
45
|
+
|
|
46
|
+
def size(self) -> int:
|
|
47
|
+
"""Return the number of symbols tracked in this session."""
|
|
48
|
+
with self._lock:
|
|
49
|
+
return len(self._symbols)
|
|
50
|
+
|
|
51
|
+
def reset(self) -> None:
|
|
52
|
+
"""Clear the session state."""
|
|
53
|
+
with self._lock:
|
|
54
|
+
self._symbols.clear()
|
|
55
|
+
self._next_id = 0
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def encode_with_session(p: Payload, sess: Session | None = None) -> str:
|
|
59
|
+
"""Encode a payload with session deduplication.
|
|
60
|
+
|
|
61
|
+
Symbols that were already transmitted in prior responses are emitted as
|
|
62
|
+
bare references (`@N # previously transmitted`) instead of full declarations.
|
|
63
|
+
After encoding, newly-sent symbols are recorded in the session.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
p: The payload to encode.
|
|
67
|
+
sess: Optional session tracker. If None, encodes without deduplication.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
GCF-formatted text string.
|
|
71
|
+
"""
|
|
72
|
+
if sess is None:
|
|
73
|
+
from .encode import encode
|
|
74
|
+
return encode(p)
|
|
75
|
+
|
|
76
|
+
parts: list[str] = []
|
|
77
|
+
|
|
78
|
+
# Header with session=true marker.
|
|
79
|
+
header = (
|
|
80
|
+
f"GCF tool={p.tool} budget={p.token_budget} tokens={p.tokens_used} "
|
|
81
|
+
f"symbols={len(p.symbols)} session=true"
|
|
82
|
+
)
|
|
83
|
+
if p.pack_root:
|
|
84
|
+
header += f" pack_root={p.pack_root}"
|
|
85
|
+
parts.append(header)
|
|
86
|
+
|
|
87
|
+
# Build local ID mapping for this response.
|
|
88
|
+
local_index: dict[str, int] = {}
|
|
89
|
+
for i, s in enumerate(p.symbols):
|
|
90
|
+
local_index[s.qualified_name] = i
|
|
91
|
+
|
|
92
|
+
# Track which symbols are new (need full declaration).
|
|
93
|
+
new_symbols: list[Symbol] = []
|
|
94
|
+
|
|
95
|
+
# Group by distance.
|
|
96
|
+
groups = _group_by_distance(p.symbols)
|
|
97
|
+
group_names = ["targets", "related", "extended"]
|
|
98
|
+
|
|
99
|
+
for g_distance, g_symbols in groups:
|
|
100
|
+
if not g_symbols:
|
|
101
|
+
continue
|
|
102
|
+
if g_distance < len(group_names):
|
|
103
|
+
name = group_names[g_distance]
|
|
104
|
+
else:
|
|
105
|
+
name = f"distance_{g_distance}"
|
|
106
|
+
parts.append(f"## {name}")
|
|
107
|
+
|
|
108
|
+
for s in g_symbols:
|
|
109
|
+
idx = local_index[s.qualified_name]
|
|
110
|
+
if sess.transmitted(s.qualified_name):
|
|
111
|
+
# Bare reference: symbol was sent in a prior response.
|
|
112
|
+
parts.append(f"@{idx} # previously transmitted")
|
|
113
|
+
else:
|
|
114
|
+
# Full declaration.
|
|
115
|
+
kind = KIND_ABBREV.get(s.kind, s.kind)
|
|
116
|
+
parts.append(
|
|
117
|
+
f"@{idx} {kind} {s.qualified_name} {s.score:.2f} {s.provenance}"
|
|
118
|
+
)
|
|
119
|
+
new_symbols.append(s)
|
|
120
|
+
|
|
121
|
+
# Edges section.
|
|
122
|
+
if p.edges:
|
|
123
|
+
parts.append("## edges")
|
|
124
|
+
for e in p.edges:
|
|
125
|
+
src_idx = local_index.get(e.source)
|
|
126
|
+
tgt_idx = local_index.get(e.target)
|
|
127
|
+
if src_idx is None or tgt_idx is None:
|
|
128
|
+
continue
|
|
129
|
+
line = f"@{tgt_idx}<@{src_idx} {e.edge_type}"
|
|
130
|
+
if e.status and e.status != "unchanged":
|
|
131
|
+
line += f" {e.status}"
|
|
132
|
+
parts.append(line)
|
|
133
|
+
|
|
134
|
+
# Record all new symbols in the session.
|
|
135
|
+
sess.record(new_symbols)
|
|
136
|
+
|
|
137
|
+
return "\n".join(parts) + "\n"
|
gcf/types.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Data types for GCF payloads."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class Components:
|
|
8
|
+
"""Score breakdown for a symbol."""
|
|
9
|
+
|
|
10
|
+
blast_radius: float = 0.0
|
|
11
|
+
confidence: float = 0.0
|
|
12
|
+
recency: float = 0.0
|
|
13
|
+
distance: float = 0.0
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class Symbol:
|
|
18
|
+
"""A node in a GCF payload."""
|
|
19
|
+
|
|
20
|
+
qualified_name: str = ""
|
|
21
|
+
kind: str = ""
|
|
22
|
+
score: float = 0.0
|
|
23
|
+
provenance: str = ""
|
|
24
|
+
distance: int = 0
|
|
25
|
+
signature: str = ""
|
|
26
|
+
components: Components = field(default_factory=Components)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class Edge:
|
|
31
|
+
"""A directed relationship in a GCF payload."""
|
|
32
|
+
|
|
33
|
+
source: str = ""
|
|
34
|
+
target: str = ""
|
|
35
|
+
edge_type: str = ""
|
|
36
|
+
status: str = ""
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class Payload:
|
|
41
|
+
"""Input/output structure for GCF encoding/decoding."""
|
|
42
|
+
|
|
43
|
+
tool: str = ""
|
|
44
|
+
tokens_used: int = 0
|
|
45
|
+
token_budget: int = 0
|
|
46
|
+
pack_root: str = ""
|
|
47
|
+
symbols: list[Symbol] = field(default_factory=list)
|
|
48
|
+
edges: list[Edge] = field(default_factory=list)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class DeltaPayload:
|
|
53
|
+
"""Diff between a prior context pack and the current result."""
|
|
54
|
+
|
|
55
|
+
tool: str = ""
|
|
56
|
+
base_root: str = ""
|
|
57
|
+
new_root: str = ""
|
|
58
|
+
removed: list[Symbol] = field(default_factory=list)
|
|
59
|
+
added: list[Symbol] = field(default_factory=list)
|
|
60
|
+
removed_edges: list[Edge] = field(default_factory=list)
|
|
61
|
+
added_edges: list[Edge] = field(default_factory=list)
|
|
62
|
+
delta_tokens: int = 0
|
|
63
|
+
full_tokens: int = 0
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gcf-python
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python implementation of GCF (Graph Compact Format): token-optimized wire format for LLM tool responses
|
|
5
|
+
Project-URL: Homepage, https://github.com/blackwell-systems/gcf-python
|
|
6
|
+
Project-URL: Documentation, https://blackwell-systems.github.io/gcf/
|
|
7
|
+
Project-URL: Specification, https://github.com/blackwell-systems/gcf
|
|
8
|
+
Author: Blackwell Systems
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: gcf,graph,llm,mcp,token-efficient,wire-format
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
22
|
+
Classifier: Typing :: Typed
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
|
|
26
|
+
<p align="center">
|
|
27
|
+
<a href="https://github.com/blackwell-systems"><img src="https://raw.githubusercontent.com/blackwell-systems/blackwell-docs-theme/main/badge-trademark.svg" alt="Blackwell Systems"></a>
|
|
28
|
+
<a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue.svg" alt="License"></a>
|
|
29
|
+
</p>
|
|
30
|
+
|
|
31
|
+
# gcf-python
|
|
32
|
+
|
|
33
|
+
Python implementation of [GCF (Graph Compact Format)](https://github.com/blackwell-systems/gcf).
|
|
34
|
+
|
|
35
|
+
**84% fewer tokens than JSON. 32% fewer than TOON. 100% LLM comprehension accuracy at 500 symbols, where JSON fails.**
|
|
36
|
+
|
|
37
|
+
## Install
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
pip install gcf-py
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Zero dependencies. Pure Python. Python 3.9+. Includes CLI.
|
|
44
|
+
|
|
45
|
+
## CLI
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
gcf encode < payload.json # JSON to GCF
|
|
49
|
+
gcf decode < payload.gcf # GCF to JSON
|
|
50
|
+
gcf stats < payload.json # token comparison with visual bar
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
```
|
|
54
|
+
Payload: 50 symbols, 20 edges
|
|
55
|
+
|
|
56
|
+
JSON ██████████████████████████████ 4,200 tokens
|
|
57
|
+
GCF ████████░░░░░░░░░░░░░░░░░░░░░░ 1,150 tokens
|
|
58
|
+
|
|
59
|
+
Savings: 73% fewer tokens with GCF
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Library
|
|
63
|
+
|
|
64
|
+
### Quick Start
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from gcf import encode, Payload, Symbol, Edge
|
|
68
|
+
|
|
69
|
+
p = Payload(
|
|
70
|
+
tool="context_for_task",
|
|
71
|
+
token_budget=5000,
|
|
72
|
+
tokens_used=1847,
|
|
73
|
+
symbols=[
|
|
74
|
+
Symbol(qualified_name="pkg.AuthMiddleware", kind="function", score=0.78, provenance="lsp_resolved", distance=0),
|
|
75
|
+
Symbol(qualified_name="pkg.NewServer", kind="function", score=0.54, provenance="lsp_resolved", distance=1),
|
|
76
|
+
],
|
|
77
|
+
edges=[
|
|
78
|
+
Edge(source="pkg.NewServer", target="pkg.AuthMiddleware", edge_type="calls"),
|
|
79
|
+
],
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
output = encode(p)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Output:
|
|
86
|
+
```
|
|
87
|
+
GCF tool=context_for_task budget=5000 tokens=1847 symbols=2
|
|
88
|
+
## targets
|
|
89
|
+
@0 fn pkg.AuthMiddleware 0.78 lsp_resolved
|
|
90
|
+
## related
|
|
91
|
+
@1 fn pkg.NewServer 0.54 lsp_resolved
|
|
92
|
+
## edges
|
|
93
|
+
@0<@1 calls
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Decode
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from gcf import decode
|
|
100
|
+
|
|
101
|
+
p = decode(input_text)
|
|
102
|
+
print(p.tool, len(p.symbols), "symbols", len(p.edges), "edges")
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Session Deduplication
|
|
106
|
+
|
|
107
|
+
Track transmitted symbols across multiple tool responses. Previously-sent symbols become bare references instead of full declarations:
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
from gcf import encode_with_session, Session, Payload, Symbol
|
|
111
|
+
|
|
112
|
+
sess = Session()
|
|
113
|
+
|
|
114
|
+
out1 = encode_with_session(payload1, sess) # full declarations
|
|
115
|
+
out2 = encode_with_session(payload2, sess) # reused symbols as "@N # previously transmitted"
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
By the 5th call in a session: 92.7% token savings vs JSON.
|
|
119
|
+
|
|
120
|
+
## Delta Encoding
|
|
121
|
+
|
|
122
|
+
When the consumer already has a prior context pack, send only what changed:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from gcf import encode_delta, DeltaPayload, Symbol, Edge
|
|
126
|
+
|
|
127
|
+
delta = DeltaPayload(
|
|
128
|
+
tool="context_for_task",
|
|
129
|
+
base_root="aaa111",
|
|
130
|
+
new_root="bbb222",
|
|
131
|
+
removed=[Symbol(qualified_name="pkg.OldFunc", kind="function")],
|
|
132
|
+
added=[Symbol(qualified_name="pkg.NewFunc", kind="function", score=0.85, provenance="rwr")],
|
|
133
|
+
delta_tokens=30,
|
|
134
|
+
full_tokens=200,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
output = encode_delta(delta)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
81.2% savings on re-queries where the pack changed slightly.
|
|
141
|
+
|
|
142
|
+
## API
|
|
143
|
+
|
|
144
|
+
| Function | Description |
|
|
145
|
+
|----------|-------------|
|
|
146
|
+
| `encode(p: Payload) -> str` | Encode a payload to GCF text |
|
|
147
|
+
| `decode(input_text: str) -> Payload` | Parse GCF text back to a Payload |
|
|
148
|
+
| `encode_with_session(p: Payload, s: Session) -> str` | Encode with session deduplication |
|
|
149
|
+
| `encode_delta(d: DeltaPayload) -> str` | Encode a delta (added/removed only) |
|
|
150
|
+
| `Session()` | Create a new session tracker (thread-safe) |
|
|
151
|
+
|
|
152
|
+
## Types
|
|
153
|
+
|
|
154
|
+
| Type | Purpose |
|
|
155
|
+
|------|---------|
|
|
156
|
+
| `Payload` | Full GCF payload: tool, budget, symbols, edges, pack root |
|
|
157
|
+
| `Symbol` | Graph node: qualified name, kind, score, provenance, distance |
|
|
158
|
+
| `Edge` | Directed relationship: source, target, edge type |
|
|
159
|
+
| `DeltaPayload` | Diff between two packs: added/removed symbols and edges |
|
|
160
|
+
| `Session` | Thread-safe tracker for multi-call deduplication |
|
|
161
|
+
| `KIND_ABBREV` / `KIND_EXPAND` | Bidirectional kind abbreviation dicts |
|
|
162
|
+
|
|
163
|
+
## Comprehension Eval
|
|
164
|
+
|
|
165
|
+
Rigorous 3-way benchmark (GCF vs TOON vs JSON) at 500 symbols, 200 edges. Six structured extraction questions sent to an LLM:
|
|
166
|
+
|
|
167
|
+
| Format | Accuracy | Tokens | vs JSON |
|
|
168
|
+
|--------|----------|--------|---------|
|
|
169
|
+
| **GCF** | **100%** (6/6) | **11,090** | **79% fewer** |
|
|
170
|
+
| TOON | 100% (6/6) | 16,378 | 69% fewer |
|
|
171
|
+
| JSON | 66.7% (4/6) | 53,341 | baseline |
|
|
172
|
+
|
|
173
|
+
JSON failed on counting tasks. GCF and TOON both achieved perfect accuracy. GCF does it in 32% fewer tokens.
|
|
174
|
+
|
|
175
|
+
## Token Efficiency (TOON's Own Benchmark)
|
|
176
|
+
|
|
177
|
+
Running [TOON's benchmark harness](https://github.com/blackwell-systems/toon/tree/gcf-comparison) with GCF inserted (their datasets, their tokenizer):
|
|
178
|
+
|
|
179
|
+
| Track | GCF | TOON | Result |
|
|
180
|
+
|-------|-----|------|--------|
|
|
181
|
+
| Mixed-structure (nested, semi-uniform) | 169,554 | 227,896 | **GCF 34% smaller** |
|
|
182
|
+
| Flat-only (tabular) | 66,026 | 67,837 | **GCF 3% smaller** |
|
|
183
|
+
| Semi-uniform event logs | 107,269 | 154,032 | **GCF 44% smaller** |
|
|
184
|
+
|
|
185
|
+
GCF wins on every dataset except deeply nested config (75 tokens on a 618-token payload). On semi-uniform data, GCF uses 44% fewer tokens than TOON.
|
|
186
|
+
|
|
187
|
+
Reproducible: [blackwell-systems/toon@gcf-comparison](https://github.com/blackwell-systems/toon/tree/gcf-comparison)
|
|
188
|
+
|
|
189
|
+
## Other Implementations
|
|
190
|
+
|
|
191
|
+
- **Go**: [github.com/blackwell-systems/gcf-go](https://github.com/blackwell-systems/gcf-go)
|
|
192
|
+
- **TypeScript**: [github.com/blackwell-systems/gcf-typescript](https://github.com/blackwell-systems/gcf-typescript)
|
|
193
|
+
- **Specification**: [github.com/blackwell-systems/gcf](https://github.com/blackwell-systems/gcf)
|
|
194
|
+
|
|
195
|
+
## License
|
|
196
|
+
|
|
197
|
+
MIT
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
gcf/__init__.py,sha256=XzxWfa__EBT3GqV6nAC74WX6EpNFO1Qg-YsnCg1ROQQ,1483
|
|
2
|
+
gcf/cli.py,sha256=2hSTBqiYcn1_EgGXuO65MHiEGh0C4DRMvspTd2zUaso,4258
|
|
3
|
+
gcf/constants.py,sha256=cmZ8YJSOB0im_eyfN8v4UvrLpBC6Fuf4cfcKZGbutxY,638
|
|
4
|
+
gcf/decode.py,sha256=kdbYrx0WzozDw-PhPieBv6h_a0B995crCEK-CJoK59c,5162
|
|
5
|
+
gcf/delta.py,sha256=xU0ujtSq1iF7yU8yk_WNQKh8iove-WUV_nKSuvW1XVk,1656
|
|
6
|
+
gcf/encode.py,sha256=KYGxFHy5LJoOF0IQblAm78bLL5uFf5iQMtrnyuuQXCA,2664
|
|
7
|
+
gcf/session.py,sha256=jVfpEK4euCn7apVm-sb0OyycUJrFUPaAUEWCT0d2c14,4472
|
|
8
|
+
gcf/types.py,sha256=yZL2knyFYguh2ex1ZXO1VwD4NEY4jvC-DL6-R-i-x0U,1429
|
|
9
|
+
gcf_python-0.1.0.dist-info/METADATA,sha256=hfE2L2HB1wxrBi8mRYHTD-KiViqVZ-4omjQNdT-ZtNA,6646
|
|
10
|
+
gcf_python-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
11
|
+
gcf_python-0.1.0.dist-info/entry_points.txt,sha256=aFT6gqlkh8iGfM8cblE-LUMxHH08_v71IIoZtDdRIVA,37
|
|
12
|
+
gcf_python-0.1.0.dist-info/licenses/LICENSE,sha256=txSvg3E4LugiB7MOOTci6WKd6wMOrOJTvaITeFJ2SgU,1074
|
|
13
|
+
gcf_python-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Blackwell Systems
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|