gcf-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gcf/__init__.py ADDED
@@ -0,0 +1,60 @@
1
+ """GCF (Graph Compact Format): token-optimized wire format for LLM tool responses.
2
+
3
+ 84% fewer tokens than JSON. 32% fewer than TOON. 100% LLM comprehension accuracy.
4
+
5
+ Encode a payload:
6
+
7
+ from gcf import encode, Payload, Symbol
8
+
9
+ p = Payload(
10
+ tool="context_for_task",
11
+ token_budget=5000,
12
+ tokens_used=1847,
13
+ symbols=[Symbol(qualified_name="pkg.Func", kind="function", score=0.9, provenance="lsp_resolved")],
14
+ )
15
+ output = encode(p)
16
+
17
+ Decode a payload:
18
+
19
+ from gcf import decode
20
+ p = decode(input_text)
21
+
22
+ Session deduplication:
23
+
24
+ from gcf import encode_with_session, Session
25
+ sess = Session()
26
+ out1 = encode_with_session(payload1, sess) # full declarations
27
+ out2 = encode_with_session(payload2, sess) # reused symbols as bare refs
28
+
29
+ Delta encoding:
30
+
31
+ from gcf import encode_delta, DeltaPayload
32
+ out = encode_delta(DeltaPayload(...))
33
+
34
+ Specification: https://github.com/blackwell-systems/gcf
35
+ """
36
+
37
+ from .constants import KIND_ABBREV, KIND_EXPAND
38
+ from .decode import DecodeError, decode
39
+ from .delta import encode_delta
40
+ from .encode import encode
41
+ from .session import Session, encode_with_session
42
+ from .types import Components, DeltaPayload, Edge, Payload, Symbol
43
+
44
+ __all__ = [
45
+ "Components",
46
+ "DecodeError",
47
+ "DeltaPayload",
48
+ "Edge",
49
+ "KIND_ABBREV",
50
+ "KIND_EXPAND",
51
+ "Payload",
52
+ "Session",
53
+ "Symbol",
54
+ "decode",
55
+ "encode",
56
+ "encode_delta",
57
+ "encode_with_session",
58
+ ]
59
+
60
+ __version__ = "0.1.0"
gcf/cli.py ADDED
@@ -0,0 +1,155 @@
1
+ """GCF command-line interface: encode, decode, stats."""
2
+
3
+ import json
4
+ import sys
5
+
6
+ from .decode import decode
7
+ from .encode import encode
8
+ from .types import Edge, Payload, Symbol
9
+
10
+ USAGE = """gcf - token-optimized wire format for LLM tool responses
11
+
12
+ Usage:
13
+ gcf encode [file] Encode JSON payload to GCF (stdin if no file)
14
+ gcf decode [file] Decode GCF text to JSON (stdin if no file)
15
+ gcf stats [file] Compare token counts: JSON vs GCF (stdin if no file)
16
+ gcf version Print version
17
+
18
+ Examples:
19
+ gcf encode < payload.json
20
+ gcf decode < payload.gcf
21
+ gcf stats payload.json
22
+ """
23
+
24
+
25
+ def main() -> None:
26
+ args = sys.argv[1:]
27
+ if not args or args[0] in ("-h", "--help", "help"):
28
+ print(USAGE, end="")
29
+ sys.exit(0 if args else 1)
30
+
31
+ cmd = args[0]
32
+ file_args = args[1:]
33
+
34
+ if cmd == "encode":
35
+ data = _read_input(file_args)
36
+ _do_encode(data)
37
+ elif cmd == "decode":
38
+ data = _read_input(file_args)
39
+ _do_decode(data)
40
+ elif cmd == "stats":
41
+ data = _read_input(file_args)
42
+ _do_stats(data)
43
+ elif cmd == "version":
44
+ print("gcf 0.1.0")
45
+ else:
46
+ print(f"unknown command: {cmd}\n", file=sys.stderr)
47
+ print(USAGE, file=sys.stderr, end="")
48
+ sys.exit(1)
49
+
50
+
51
+ def _read_input(args: list[str]) -> str:
52
+ if args and args[0] != "-":
53
+ with open(args[0]) as f:
54
+ return f.read()
55
+ return sys.stdin.read()
56
+
57
+
58
+ def _payload_from_json(data: str) -> Payload:
59
+ obj = json.loads(data)
60
+ symbols = [
61
+ Symbol(
62
+ qualified_name=s["qualifiedName"],
63
+ kind=s["kind"],
64
+ score=s["score"],
65
+ provenance=s["provenance"],
66
+ distance=s.get("distance", 0),
67
+ )
68
+ for s in obj.get("symbols", [])
69
+ ]
70
+ edges = [
71
+ Edge(
72
+ source=e["source"],
73
+ target=e["target"],
74
+ edge_type=e["edgeType"],
75
+ status=e.get("status", ""),
76
+ )
77
+ for e in obj.get("edges", [])
78
+ ]
79
+ return Payload(
80
+ tool=obj.get("tool", ""),
81
+ token_budget=obj.get("tokenBudget", 0),
82
+ tokens_used=obj.get("tokensUsed", 0),
83
+ pack_root=obj.get("packRoot", ""),
84
+ symbols=symbols,
85
+ edges=edges,
86
+ )
87
+
88
+
89
+ def _payload_to_json(p: Payload) -> str:
90
+ obj = {
91
+ "tool": p.tool,
92
+ "tokensUsed": p.tokens_used,
93
+ "tokenBudget": p.token_budget,
94
+ "packRoot": p.pack_root,
95
+ "symbols": [
96
+ {
97
+ "qualifiedName": s.qualified_name,
98
+ "kind": s.kind,
99
+ "score": s.score,
100
+ "provenance": s.provenance,
101
+ "distance": s.distance,
102
+ }
103
+ for s in p.symbols
104
+ ],
105
+ "edges": [
106
+ {
107
+ "source": e.source,
108
+ "target": e.target,
109
+ "edgeType": e.edge_type,
110
+ **({"status": e.status} if e.status else {}),
111
+ }
112
+ for e in p.edges
113
+ ],
114
+ }
115
+ return json.dumps(obj, indent=2)
116
+
117
+
118
+ def _do_encode(data: str) -> None:
119
+ try:
120
+ p = _payload_from_json(data)
121
+ except (json.JSONDecodeError, KeyError, TypeError) as e:
122
+ print(f"error: invalid JSON: {e}", file=sys.stderr)
123
+ sys.exit(1)
124
+ print(encode(p), end="")
125
+
126
+
127
+ def _do_decode(data: str) -> None:
128
+ p = decode(data)
129
+ print(_payload_to_json(p))
130
+
131
+
132
+ def _do_stats(data: str) -> None:
133
+ try:
134
+ p = _payload_from_json(data)
135
+ except (json.JSONDecodeError, KeyError, TypeError) as e:
136
+ print(f"error: invalid JSON: {e}", file=sys.stderr)
137
+ sys.exit(1)
138
+
139
+ gcf_output = encode(p)
140
+ json_tokens = len(data.strip()) // 4
141
+ gcf_tokens = len(gcf_output.strip()) // 4
142
+
143
+ savings = 0.0
144
+ if json_tokens > 0:
145
+ savings = 100.0 * (1.0 - gcf_tokens / json_tokens)
146
+
147
+ bar_width = 30
148
+ json_bar = "█" * bar_width
149
+ gcf_filled = (gcf_tokens * bar_width) // json_tokens if json_tokens > 0 else 0
150
+ gcf_bar = "█" * gcf_filled + "░" * (bar_width - gcf_filled)
151
+
152
+ print(f"Payload: {len(p.symbols)} symbols, {len(p.edges)} edges\n")
153
+ print(f" JSON {json_bar} {json_tokens} tokens")
154
+ print(f" GCF {gcf_bar} {gcf_tokens} tokens")
155
+ print(f"\n Savings: {savings:.0f}% fewer tokens with GCF")
gcf/constants.py ADDED
@@ -0,0 +1,24 @@
1
+ """Kind abbreviation mappings for GCF encoding/decoding."""
2
+
3
+ # Maps full kind names to short GCF abbreviations.
4
+ KIND_ABBREV: dict[str, str] = {
5
+ "function": "fn",
6
+ "type": "type",
7
+ "method": "method",
8
+ "interface": "iface",
9
+ "var": "var",
10
+ "const": "const",
11
+ "resource": "resource",
12
+ "table": "table",
13
+ "class": "class",
14
+ "selector": "selector",
15
+ "field": "field",
16
+ "route_handler": "route",
17
+ "external": "ext",
18
+ "file": "file",
19
+ "package": "pkg",
20
+ "service": "svc",
21
+ }
22
+
23
+ # Maps short GCF abbreviations to full kind names.
24
+ KIND_EXPAND: dict[str, str] = {v: k for k, v in KIND_ABBREV.items()}
gcf/decode.py ADDED
@@ -0,0 +1,181 @@
1
+ """GCF decoder: parses GCF text back into a Payload."""
2
+
3
+ from .constants import KIND_EXPAND
4
+ from .types import Edge, Payload, Symbol
5
+
6
+
7
+ class DecodeError(Exception):
8
+ """Raised when GCF text cannot be parsed."""
9
+
10
+
11
+ def decode(input_text: str) -> Payload:
12
+ """Parse GCF text back into a Payload.
13
+
14
+ Args:
15
+ input_text: GCF-formatted text string.
16
+
17
+ Returns:
18
+ Parsed Payload.
19
+
20
+ Raises:
21
+ DecodeError: If the input is not valid GCF.
22
+ """
23
+ lines = input_text.split("\n")
24
+ if not lines:
25
+ raise DecodeError("empty input")
26
+
27
+ p = Payload()
28
+
29
+ # Parse header.
30
+ header = lines[0]
31
+ if not header.startswith("GCF "):
32
+ raise DecodeError(f"invalid header, expected 'GCF ...' got {header!r}")
33
+ _parse_header(header[4:], p)
34
+
35
+ # Parse body: symbols and edges.
36
+ symbols: list[Symbol] = []
37
+ sym_by_id: dict[int, Symbol] = {}
38
+ current_distance = 0
39
+ in_edges = False
40
+
41
+ for line in lines[1:]:
42
+ line = line.rstrip("\r")
43
+ if not line:
44
+ continue
45
+
46
+ # Group header.
47
+ if line.startswith("## "):
48
+ group = line[3:]
49
+ in_edges = group == "edges"
50
+ if not in_edges:
51
+ if group == "targets":
52
+ current_distance = 0
53
+ elif group == "related":
54
+ current_distance = 1
55
+ elif group == "extended":
56
+ current_distance = 2
57
+ elif group.startswith("distance_"):
58
+ try:
59
+ current_distance = int(group[9:])
60
+ except ValueError:
61
+ pass
62
+ continue
63
+
64
+ # Comment.
65
+ if line.startswith("# "):
66
+ continue
67
+
68
+ if in_edges:
69
+ edge = _parse_edge_line(line, sym_by_id)
70
+ p.edges.append(edge)
71
+ else:
72
+ sym, sym_id = _parse_symbol_line(line, current_distance)
73
+ symbols.append(sym)
74
+ sym_by_id[sym_id] = sym
75
+
76
+ p.symbols = symbols
77
+ return p
78
+
79
+
80
+ def _parse_header(fields: str, p: Payload) -> None:
81
+ """Parse header key=value pairs into the payload."""
82
+ for part in fields.split():
83
+ kv = part.split("=", 1)
84
+ if len(kv) != 2:
85
+ continue
86
+ key, value = kv
87
+ if key == "tool":
88
+ p.tool = value
89
+ elif key == "budget":
90
+ try:
91
+ p.token_budget = int(value)
92
+ except ValueError as e:
93
+ raise DecodeError(f"invalid budget {value!r}: {e}") from e
94
+ elif key == "tokens":
95
+ try:
96
+ p.tokens_used = int(value)
97
+ except ValueError as e:
98
+ raise DecodeError(f"invalid tokens {value!r}: {e}") from e
99
+ elif key == "pack_root":
100
+ p.pack_root = value
101
+ # "symbols" is informational, reconstructed from parsed symbols.
102
+
103
+
104
+ def _parse_symbol_line(line: str, distance: int) -> tuple[Symbol, int]:
105
+ """Parse a symbol line into a Symbol and its local ID."""
106
+ if not line.startswith("@"):
107
+ raise DecodeError(f"expected symbol line starting with @, got {line!r}")
108
+
109
+ parts = line.split()
110
+ if len(parts) < 5:
111
+ raise DecodeError(
112
+ f"symbol line needs at least 5 fields, got {len(parts)} in {line!r}"
113
+ )
114
+
115
+ id_str = parts[0][1:] # strip @
116
+ try:
117
+ sym_id = int(id_str)
118
+ except ValueError as e:
119
+ raise DecodeError(f"invalid symbol id {id_str!r}: {e}") from e
120
+
121
+ kind = parts[1]
122
+ kind = KIND_EXPAND.get(kind, kind)
123
+
124
+ qname = parts[2]
125
+
126
+ try:
127
+ score = float(parts[3])
128
+ except ValueError as e:
129
+ raise DecodeError(f"invalid score {parts[3]!r}: {e}") from e
130
+
131
+ provenance = parts[4]
132
+
133
+ return Symbol(
134
+ qualified_name=qname,
135
+ kind=kind,
136
+ score=score,
137
+ provenance=provenance,
138
+ distance=distance,
139
+ ), sym_id
140
+
141
+
142
+ def _parse_edge_line(line: str, sym_by_id: dict[int, Symbol]) -> Edge:
143
+ """Parse an edge line into an Edge."""
144
+ parts = line.split()
145
+ if len(parts) < 2:
146
+ raise DecodeError(f"edge line needs at least 2 fields, got {line!r}")
147
+
148
+ ref = parts[0]
149
+ lt_idx = ref.find("<")
150
+ if lt_idx < 0:
151
+ raise DecodeError(f"edge line missing '<' separator in {ref!r}")
152
+
153
+ target_id_str = ref[1:lt_idx] # strip leading @
154
+ source_id_str = ref[lt_idx + 2:] # strip <@
155
+
156
+ try:
157
+ target_id = int(target_id_str)
158
+ except ValueError as e:
159
+ raise DecodeError(f"invalid target id {target_id_str!r}: {e}") from e
160
+
161
+ try:
162
+ source_id = int(source_id_str)
163
+ except ValueError as e:
164
+ raise DecodeError(f"invalid source id {source_id_str!r}: {e}") from e
165
+
166
+ target_sym = sym_by_id.get(target_id)
167
+ source_sym = sym_by_id.get(source_id)
168
+ if target_sym is None or source_sym is None:
169
+ raise DecodeError(
170
+ f"edge references unknown symbol id(s): target={target_id} source={source_id}"
171
+ )
172
+
173
+ edge_type = parts[1]
174
+ status = parts[2] if len(parts) >= 3 else ""
175
+
176
+ return Edge(
177
+ source=source_sym.qualified_name,
178
+ target=target_sym.qualified_name,
179
+ edge_type=edge_type,
180
+ status=status,
181
+ )
gcf/delta.py ADDED
@@ -0,0 +1,54 @@
1
+ """GCF delta encoding: only added/removed symbols for incremental delivery."""
2
+
3
+ from .constants import KIND_ABBREV
4
+ from .types import DeltaPayload
5
+
6
+
7
+ def encode_delta(d: DeltaPayload) -> str:
8
+ """Encode a DeltaPayload into GCF delta format.
9
+
10
+ Args:
11
+ d: The delta payload to encode.
12
+
13
+ Returns:
14
+ GCF delta-formatted text string.
15
+ """
16
+ parts: list[str] = []
17
+
18
+ # Header.
19
+ savings = 0.0
20
+ if d.full_tokens > 0:
21
+ savings = 100.0 * (1.0 - d.delta_tokens / d.full_tokens)
22
+
23
+ parts.append(
24
+ f"GCF tool={d.tool} delta=true base_root={d.base_root} "
25
+ f"new_root={d.new_root} tokens={d.delta_tokens} savings={savings:.0f}%"
26
+ )
27
+
28
+ # Removed symbols: short references (consumer already has the full declaration).
29
+ if d.removed:
30
+ parts.append("## removed")
31
+ for s in d.removed:
32
+ kind = KIND_ABBREV.get(s.kind, s.kind)
33
+ parts.append(f"{kind} {s.qualified_name}")
34
+
35
+ # Added symbols: full declarations (consumer doesn't have these).
36
+ if d.added:
37
+ parts.append("## added")
38
+ for i, s in enumerate(d.added):
39
+ kind = KIND_ABBREV.get(s.kind, s.kind)
40
+ parts.append(f"@{i} {kind} {s.qualified_name} {s.score:.2f} {s.provenance}")
41
+
42
+ # Removed edges.
43
+ if d.removed_edges:
44
+ parts.append("## edges_removed")
45
+ for e in d.removed_edges:
46
+ parts.append(f"{e.source} -> {e.target} {e.edge_type}")
47
+
48
+ # Added edges.
49
+ if d.added_edges:
50
+ parts.append("## edges_added")
51
+ for e in d.added_edges:
52
+ parts.append(f"{e.source} -> {e.target} {e.edge_type}")
53
+
54
+ return "\n".join(parts) + "\n"
gcf/encode.py ADDED
@@ -0,0 +1,86 @@
1
+ """GCF encoder: serializes Payload into GCF text format."""
2
+
3
+ from .constants import KIND_ABBREV
4
+ from .types import Payload, Symbol
5
+
6
+
7
+ def encode(p: Payload) -> str:
8
+ """Encode a Payload into GCF text format.
9
+
10
+ Args:
11
+ p: The payload to encode.
12
+
13
+ Returns:
14
+ GCF-formatted text string.
15
+ """
16
+ parts: list[str] = []
17
+
18
+ # Header line.
19
+ header = f"GCF tool={p.tool} budget={p.token_budget} tokens={p.tokens_used} symbols={len(p.symbols)}"
20
+ if p.pack_root:
21
+ header += f" pack_root={p.pack_root}"
22
+ parts.append(header)
23
+
24
+ # Build symbol index for edge references.
25
+ sym_index: dict[str, int] = {}
26
+ for i, s in enumerate(p.symbols):
27
+ sym_index[s.qualified_name] = i
28
+
29
+ # Group symbols by distance.
30
+ groups = _group_by_distance(p.symbols)
31
+ group_names = ["targets", "related", "extended"]
32
+
33
+ for g_distance, g_symbols in groups:
34
+ if not g_symbols:
35
+ continue
36
+ if g_distance < len(group_names):
37
+ name = group_names[g_distance]
38
+ else:
39
+ name = f"distance_{g_distance}"
40
+ parts.append(f"## {name}")
41
+
42
+ for s in g_symbols:
43
+ idx = sym_index[s.qualified_name]
44
+ kind = KIND_ABBREV.get(s.kind, s.kind)
45
+ parts.append(f"@{idx} {kind} {s.qualified_name} {s.score:.2f} {s.provenance}")
46
+
47
+ # Edges section.
48
+ if p.edges:
49
+ edge_lines: list[str] = []
50
+ for e in p.edges:
51
+ src_idx = sym_index.get(e.source)
52
+ tgt_idx = sym_index.get(e.target)
53
+ if src_idx is None or tgt_idx is None:
54
+ continue
55
+ line = f"@{tgt_idx}<@{src_idx} {e.edge_type}"
56
+ if e.status and e.status != "unchanged":
57
+ line += f" {e.status}"
58
+ edge_lines.append(line)
59
+ parts.append("## edges")
60
+ parts.extend(edge_lines)
61
+
62
+ return "\n".join(parts) + "\n"
63
+
64
+
65
+ def _group_by_distance(symbols: list[Symbol]) -> list[tuple[int, list[Symbol]]]:
66
+ """Group symbols by distance, preserving order."""
67
+ if not symbols:
68
+ return []
69
+
70
+ groups: list[tuple[int, list[Symbol]]] = []
71
+ current_distance: int | None = None
72
+ current_symbols: list[Symbol] = []
73
+
74
+ for s in symbols:
75
+ if current_distance is None or current_distance != s.distance:
76
+ if current_symbols:
77
+ groups.append((current_distance, current_symbols)) # type: ignore[arg-type]
78
+ current_distance = s.distance
79
+ current_symbols = [s]
80
+ else:
81
+ current_symbols.append(s)
82
+
83
+ if current_symbols:
84
+ groups.append((current_distance, current_symbols)) # type: ignore[arg-type]
85
+
86
+ return groups
gcf/session.py ADDED
@@ -0,0 +1,137 @@
1
+ """Session-based deduplication for GCF encoding."""
2
+
3
+ import threading
4
+
5
+ from .constants import KIND_ABBREV
6
+ from .encode import _group_by_distance
7
+ from .types import Payload, Symbol
8
+
9
+
10
+ class Session:
11
+ """Tracks symbols transmitted to a client, enabling subsequent responses
12
+ to reference them by ID without full retransmission.
13
+
14
+ Thread-safe: multiple tool handlers may encode concurrently within a session.
15
+ """
16
+
17
+ def __init__(self) -> None:
18
+ self._lock = threading.Lock()
19
+ self._symbols: dict[str, int] = {} # qualified_name -> global session ID
20
+ self._next_id: int = 0
21
+
22
+ def transmitted(self, qname: str) -> bool:
23
+ """Return True if the symbol has been sent in a previous response."""
24
+ with self._lock:
25
+ return qname in self._symbols
26
+
27
+ def get_id(self, qname: str) -> int:
28
+ """Return the session-global ID for a previously transmitted symbol.
29
+
30
+ Returns -1 if not found.
31
+ """
32
+ with self._lock:
33
+ return self._symbols.get(qname, -1)
34
+
35
+ def record(self, symbols: list[Symbol]) -> None:
36
+ """Mark symbols as transmitted and assign session-global IDs.
37
+
38
+ Call this after a successful encode to register newly-sent symbols.
39
+ """
40
+ with self._lock:
41
+ for sym in symbols:
42
+ if sym.qualified_name not in self._symbols:
43
+ self._symbols[sym.qualified_name] = self._next_id
44
+ self._next_id += 1
45
+
46
+ def size(self) -> int:
47
+ """Return the number of symbols tracked in this session."""
48
+ with self._lock:
49
+ return len(self._symbols)
50
+
51
+ def reset(self) -> None:
52
+ """Clear the session state."""
53
+ with self._lock:
54
+ self._symbols.clear()
55
+ self._next_id = 0
56
+
57
+
58
+ def encode_with_session(p: Payload, sess: Session | None = None) -> str:
59
+ """Encode a payload with session deduplication.
60
+
61
+ Symbols that were already transmitted in prior responses are emitted as
62
+ bare references (`@N # previously transmitted`) instead of full declarations.
63
+ After encoding, newly-sent symbols are recorded in the session.
64
+
65
+ Args:
66
+ p: The payload to encode.
67
+ sess: Optional session tracker. If None, encodes without deduplication.
68
+
69
+ Returns:
70
+ GCF-formatted text string.
71
+ """
72
+ if sess is None:
73
+ from .encode import encode
74
+ return encode(p)
75
+
76
+ parts: list[str] = []
77
+
78
+ # Header with session=true marker.
79
+ header = (
80
+ f"GCF tool={p.tool} budget={p.token_budget} tokens={p.tokens_used} "
81
+ f"symbols={len(p.symbols)} session=true"
82
+ )
83
+ if p.pack_root:
84
+ header += f" pack_root={p.pack_root}"
85
+ parts.append(header)
86
+
87
+ # Build local ID mapping for this response.
88
+ local_index: dict[str, int] = {}
89
+ for i, s in enumerate(p.symbols):
90
+ local_index[s.qualified_name] = i
91
+
92
+ # Track which symbols are new (need full declaration).
93
+ new_symbols: list[Symbol] = []
94
+
95
+ # Group by distance.
96
+ groups = _group_by_distance(p.symbols)
97
+ group_names = ["targets", "related", "extended"]
98
+
99
+ for g_distance, g_symbols in groups:
100
+ if not g_symbols:
101
+ continue
102
+ if g_distance < len(group_names):
103
+ name = group_names[g_distance]
104
+ else:
105
+ name = f"distance_{g_distance}"
106
+ parts.append(f"## {name}")
107
+
108
+ for s in g_symbols:
109
+ idx = local_index[s.qualified_name]
110
+ if sess.transmitted(s.qualified_name):
111
+ # Bare reference: symbol was sent in a prior response.
112
+ parts.append(f"@{idx} # previously transmitted")
113
+ else:
114
+ # Full declaration.
115
+ kind = KIND_ABBREV.get(s.kind, s.kind)
116
+ parts.append(
117
+ f"@{idx} {kind} {s.qualified_name} {s.score:.2f} {s.provenance}"
118
+ )
119
+ new_symbols.append(s)
120
+
121
+ # Edges section.
122
+ if p.edges:
123
+ parts.append("## edges")
124
+ for e in p.edges:
125
+ src_idx = local_index.get(e.source)
126
+ tgt_idx = local_index.get(e.target)
127
+ if src_idx is None or tgt_idx is None:
128
+ continue
129
+ line = f"@{tgt_idx}<@{src_idx} {e.edge_type}"
130
+ if e.status and e.status != "unchanged":
131
+ line += f" {e.status}"
132
+ parts.append(line)
133
+
134
+ # Record all new symbols in the session.
135
+ sess.record(new_symbols)
136
+
137
+ return "\n".join(parts) + "\n"
gcf/types.py ADDED
@@ -0,0 +1,63 @@
1
+ """Data types for GCF payloads."""
2
+
3
+ from dataclasses import dataclass, field
4
+
5
+
6
+ @dataclass
7
+ class Components:
8
+ """Score breakdown for a symbol."""
9
+
10
+ blast_radius: float = 0.0
11
+ confidence: float = 0.0
12
+ recency: float = 0.0
13
+ distance: float = 0.0
14
+
15
+
16
+ @dataclass
17
+ class Symbol:
18
+ """A node in a GCF payload."""
19
+
20
+ qualified_name: str = ""
21
+ kind: str = ""
22
+ score: float = 0.0
23
+ provenance: str = ""
24
+ distance: int = 0
25
+ signature: str = ""
26
+ components: Components = field(default_factory=Components)
27
+
28
+
29
+ @dataclass
30
+ class Edge:
31
+ """A directed relationship in a GCF payload."""
32
+
33
+ source: str = ""
34
+ target: str = ""
35
+ edge_type: str = ""
36
+ status: str = ""
37
+
38
+
39
+ @dataclass
40
+ class Payload:
41
+ """Input/output structure for GCF encoding/decoding."""
42
+
43
+ tool: str = ""
44
+ tokens_used: int = 0
45
+ token_budget: int = 0
46
+ pack_root: str = ""
47
+ symbols: list[Symbol] = field(default_factory=list)
48
+ edges: list[Edge] = field(default_factory=list)
49
+
50
+
51
+ @dataclass
52
+ class DeltaPayload:
53
+ """Diff between a prior context pack and the current result."""
54
+
55
+ tool: str = ""
56
+ base_root: str = ""
57
+ new_root: str = ""
58
+ removed: list[Symbol] = field(default_factory=list)
59
+ added: list[Symbol] = field(default_factory=list)
60
+ removed_edges: list[Edge] = field(default_factory=list)
61
+ added_edges: list[Edge] = field(default_factory=list)
62
+ delta_tokens: int = 0
63
+ full_tokens: int = 0
@@ -0,0 +1,197 @@
1
+ Metadata-Version: 2.4
2
+ Name: gcf-python
3
+ Version: 0.1.0
4
+ Summary: Python implementation of GCF (Graph Compact Format): token-optimized wire format for LLM tool responses
5
+ Project-URL: Homepage, https://github.com/blackwell-systems/gcf-python
6
+ Project-URL: Documentation, https://blackwell-systems.github.io/gcf/
7
+ Project-URL: Specification, https://github.com/blackwell-systems/gcf
8
+ Author: Blackwell Systems
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: gcf,graph,llm,mcp,token-efficient,wire-format
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Software Development :: Libraries
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.9
24
+ Description-Content-Type: text/markdown
25
+
26
+ <p align="center">
27
+ <a href="https://github.com/blackwell-systems"><img src="https://raw.githubusercontent.com/blackwell-systems/blackwell-docs-theme/main/badge-trademark.svg" alt="Blackwell Systems"></a>
28
+ <a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue.svg" alt="License"></a>
29
+ </p>
30
+
31
+ # gcf-python
32
+
33
+ Python implementation of [GCF (Graph Compact Format)](https://github.com/blackwell-systems/gcf).
34
+
35
+ **84% fewer tokens than JSON. 32% fewer than TOON. 100% LLM comprehension accuracy at 500 symbols, where JSON fails.**
36
+
37
+ ## Install
38
+
39
+ ```
40
+ pip install gcf-py
41
+ ```
42
+
43
+ Zero dependencies. Pure Python. Python 3.9+. Includes CLI.
44
+
45
+ ## CLI
46
+
47
+ ```bash
48
+ gcf encode < payload.json # JSON to GCF
49
+ gcf decode < payload.gcf # GCF to JSON
50
+ gcf stats < payload.json # token comparison with visual bar
51
+ ```
52
+
53
+ ```
54
+ Payload: 50 symbols, 20 edges
55
+
56
+ JSON ██████████████████████████████ 4,200 tokens
57
+ GCF ████████░░░░░░░░░░░░░░░░░░░░░░ 1,150 tokens
58
+
59
+ Savings: 73% fewer tokens with GCF
60
+ ```
61
+
62
+ ## Library
63
+
64
+ ### Quick Start
65
+
66
+ ```python
67
+ from gcf import encode, Payload, Symbol, Edge
68
+
69
+ p = Payload(
70
+ tool="context_for_task",
71
+ token_budget=5000,
72
+ tokens_used=1847,
73
+ symbols=[
74
+ Symbol(qualified_name="pkg.AuthMiddleware", kind="function", score=0.78, provenance="lsp_resolved", distance=0),
75
+ Symbol(qualified_name="pkg.NewServer", kind="function", score=0.54, provenance="lsp_resolved", distance=1),
76
+ ],
77
+ edges=[
78
+ Edge(source="pkg.NewServer", target="pkg.AuthMiddleware", edge_type="calls"),
79
+ ],
80
+ )
81
+
82
+ output = encode(p)
83
+ ```
84
+
85
+ Output:
86
+ ```
87
+ GCF tool=context_for_task budget=5000 tokens=1847 symbols=2
88
+ ## targets
89
+ @0 fn pkg.AuthMiddleware 0.78 lsp_resolved
90
+ ## related
91
+ @1 fn pkg.NewServer 0.54 lsp_resolved
92
+ ## edges
93
+ @0<@1 calls
94
+ ```
95
+
96
+ ## Decode
97
+
98
+ ```python
99
+ from gcf import decode
100
+
101
+ p = decode(input_text)
102
+ print(p.tool, len(p.symbols), "symbols", len(p.edges), "edges")
103
+ ```
104
+
105
+ ## Session Deduplication
106
+
107
+ Track transmitted symbols across multiple tool responses. Previously-sent symbols become bare references instead of full declarations:
108
+
109
+ ```python
110
+ from gcf import encode_with_session, Session, Payload, Symbol
111
+
112
+ sess = Session()
113
+
114
+ out1 = encode_with_session(payload1, sess) # full declarations
115
+ out2 = encode_with_session(payload2, sess) # reused symbols as "@N # previously transmitted"
116
+ ```
117
+
118
+ By the 5th call in a session: 92.7% token savings vs JSON.
119
+
120
+ ## Delta Encoding
121
+
122
+ When the consumer already has a prior context pack, send only what changed:
123
+
124
+ ```python
125
+ from gcf import encode_delta, DeltaPayload, Symbol, Edge
126
+
127
+ delta = DeltaPayload(
128
+ tool="context_for_task",
129
+ base_root="aaa111",
130
+ new_root="bbb222",
131
+ removed=[Symbol(qualified_name="pkg.OldFunc", kind="function")],
132
+ added=[Symbol(qualified_name="pkg.NewFunc", kind="function", score=0.85, provenance="rwr")],
133
+ delta_tokens=30,
134
+ full_tokens=200,
135
+ )
136
+
137
+ output = encode_delta(delta)
138
+ ```
139
+
140
+ 81.2% savings on re-queries where the pack changed slightly.
141
+
142
+ ## API
143
+
144
+ | Function | Description |
145
+ |----------|-------------|
146
+ | `encode(p: Payload) -> str` | Encode a payload to GCF text |
147
+ | `decode(input_text: str) -> Payload` | Parse GCF text back to a Payload |
148
+ | `encode_with_session(p: Payload, s: Session) -> str` | Encode with session deduplication |
149
+ | `encode_delta(d: DeltaPayload) -> str` | Encode a delta (added/removed only) |
150
+ | `Session()` | Create a new session tracker (thread-safe) |
151
+
152
+ ## Types
153
+
154
+ | Type | Purpose |
155
+ |------|---------|
156
+ | `Payload` | Full GCF payload: tool, budget, symbols, edges, pack root |
157
+ | `Symbol` | Graph node: qualified name, kind, score, provenance, distance |
158
+ | `Edge` | Directed relationship: source, target, edge type |
159
+ | `DeltaPayload` | Diff between two packs: added/removed symbols and edges |
160
+ | `Session` | Thread-safe tracker for multi-call deduplication |
161
+ | `KIND_ABBREV` / `KIND_EXPAND` | Bidirectional kind abbreviation dicts |
162
+
163
+ ## Comprehension Eval
164
+
165
+ Rigorous 3-way benchmark (GCF vs TOON vs JSON) at 500 symbols, 200 edges. Six structured extraction questions sent to an LLM:
166
+
167
+ | Format | Accuracy | Tokens | vs JSON |
168
+ |--------|----------|--------|---------|
169
+ | **GCF** | **100%** (6/6) | **11,090** | **79% fewer** |
170
+ | TOON | 100% (6/6) | 16,378 | 69% fewer |
171
+ | JSON | 66.7% (4/6) | 53,341 | baseline |
172
+
173
+ JSON failed on counting tasks. GCF and TOON both achieved perfect accuracy. GCF does it in 32% fewer tokens.
174
+
175
+ ## Token Efficiency (TOON's Own Benchmark)
176
+
177
+ Running [TOON's benchmark harness](https://github.com/blackwell-systems/toon/tree/gcf-comparison) with GCF inserted (their datasets, their tokenizer):
178
+
179
+ | Track | GCF | TOON | Result |
180
+ |-------|-----|------|--------|
181
+ | Mixed-structure (nested, semi-uniform) | 169,554 | 227,896 | **GCF 34% smaller** |
182
+ | Flat-only (tabular) | 66,026 | 67,837 | **GCF 3% smaller** |
183
+ | Semi-uniform event logs | 107,269 | 154,032 | **GCF 44% smaller** |
184
+
185
+ GCF wins on every dataset except deeply nested config (75 tokens on a 618-token payload). On semi-uniform data, GCF uses 44% fewer tokens than TOON.
186
+
187
+ Reproducible: [blackwell-systems/toon@gcf-comparison](https://github.com/blackwell-systems/toon/tree/gcf-comparison)
188
+
189
+ ## Other Implementations
190
+
191
+ - **Go**: [github.com/blackwell-systems/gcf-go](https://github.com/blackwell-systems/gcf-go)
192
+ - **TypeScript**: [github.com/blackwell-systems/gcf-typescript](https://github.com/blackwell-systems/gcf-typescript)
193
+ - **Specification**: [github.com/blackwell-systems/gcf](https://github.com/blackwell-systems/gcf)
194
+
195
+ ## License
196
+
197
+ MIT
@@ -0,0 +1,13 @@
1
+ gcf/__init__.py,sha256=XzxWfa__EBT3GqV6nAC74WX6EpNFO1Qg-YsnCg1ROQQ,1483
2
+ gcf/cli.py,sha256=2hSTBqiYcn1_EgGXuO65MHiEGh0C4DRMvspTd2zUaso,4258
3
+ gcf/constants.py,sha256=cmZ8YJSOB0im_eyfN8v4UvrLpBC6Fuf4cfcKZGbutxY,638
4
+ gcf/decode.py,sha256=kdbYrx0WzozDw-PhPieBv6h_a0B995crCEK-CJoK59c,5162
5
+ gcf/delta.py,sha256=xU0ujtSq1iF7yU8yk_WNQKh8iove-WUV_nKSuvW1XVk,1656
6
+ gcf/encode.py,sha256=KYGxFHy5LJoOF0IQblAm78bLL5uFf5iQMtrnyuuQXCA,2664
7
+ gcf/session.py,sha256=jVfpEK4euCn7apVm-sb0OyycUJrFUPaAUEWCT0d2c14,4472
8
+ gcf/types.py,sha256=yZL2knyFYguh2ex1ZXO1VwD4NEY4jvC-DL6-R-i-x0U,1429
9
+ gcf_python-0.1.0.dist-info/METADATA,sha256=hfE2L2HB1wxrBi8mRYHTD-KiViqVZ-4omjQNdT-ZtNA,6646
10
+ gcf_python-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
11
+ gcf_python-0.1.0.dist-info/entry_points.txt,sha256=aFT6gqlkh8iGfM8cblE-LUMxHH08_v71IIoZtDdRIVA,37
12
+ gcf_python-0.1.0.dist-info/licenses/LICENSE,sha256=txSvg3E4LugiB7MOOTci6WKd6wMOrOJTvaITeFJ2SgU,1074
13
+ gcf_python-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ gcf = gcf.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Blackwell Systems
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.