gcf-python 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gcf/__init__.py CHANGED
@@ -40,13 +40,16 @@ from .delta import encode_delta
40
40
  from .encode import encode
41
41
  from .generic import encode_generic
42
42
  from .session import Session, encode_with_session
43
+ from .decode_generic import decode_generic
43
44
  from .stream import StreamEncoder
45
+ from .stream_generic import GenericStreamEncoder
44
46
  from .types import Components, DeltaPayload, Edge, Payload, Symbol
45
47
 
46
48
  __all__ = [
47
49
  "Components",
48
50
  "DecodeError",
49
51
  "DeltaPayload",
52
+ "GenericStreamEncoder",
50
53
  "Edge",
51
54
  "KIND_ABBREV",
52
55
  "KIND_EXPAND",
@@ -55,6 +58,7 @@ __all__ = [
55
58
  "StreamEncoder",
56
59
  "Symbol",
57
60
  "decode",
61
+ "decode_generic",
58
62
  "encode",
59
63
  "encode_delta",
60
64
  "encode_generic",
gcf/decode_generic.py ADDED
@@ -0,0 +1,255 @@
1
+ """GCF generic decoder: parses any GCF text (tabular or graph) back to Python objects."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from .decode import decode
8
+
9
+
10
+ def decode_generic(input_text: str) -> Any:
11
+ """Decode any GCF text back into Python objects.
12
+
13
+ Handles tabular arrays, key-value pairs, nested sections, inline
14
+ primitive arrays, and graph profile payloads.
15
+
16
+ Returns dicts, lists, and primitives matching the original structure.
17
+ """
18
+ input_text = input_text.rstrip("\n\r")
19
+ if not input_text:
20
+ return None
21
+
22
+ lines = input_text.split("\n")
23
+
24
+ # Graph profile fallback.
25
+ if lines[0].startswith("GCF "):
26
+ p = decode(input_text)
27
+ return {
28
+ "tool": p.tool,
29
+ "tokenBudget": p.token_budget,
30
+ "tokensUsed": p.tokens_used,
31
+ "packRoot": p.pack_root,
32
+ "symbols": [
33
+ {
34
+ "qualifiedName": s.qualified_name,
35
+ "kind": s.kind,
36
+ "score": s.score,
37
+ "provenance": s.provenance,
38
+ "distance": s.distance,
39
+ }
40
+ for s in p.symbols
41
+ ],
42
+ "edges": [
43
+ {
44
+ "source": e.source,
45
+ "target": e.target,
46
+ "edgeType": e.edge_type,
47
+ **({"status": e.status} if e.status else {}),
48
+ }
49
+ for e in p.edges
50
+ ],
51
+ }
52
+
53
+ result: dict[str, Any] = {}
54
+ _parse_object(lines, 0, 0, result)
55
+ return result
56
+
57
+
58
+ def _parse_object(lines: list[str], start: int, depth: int, out: dict[str, Any]) -> int:
59
+ indent = " " * depth
60
+ i = start
61
+
62
+ while i < len(lines):
63
+ raw = lines[i].rstrip("\r")
64
+ if raw == "" or raw.startswith("# "):
65
+ i += 1
66
+ continue
67
+
68
+ if depth > 0 and not raw.startswith(indent):
69
+ break
70
+
71
+ content = raw[len(indent):] if depth > 0 else raw
72
+
73
+ if content.startswith("## _summary"):
74
+ i += 1
75
+ continue
76
+
77
+ if content.startswith("## "):
78
+ header = content[3:]
79
+ bracket_idx = header.find(" [")
80
+
81
+ if bracket_idx >= 0:
82
+ name = header[:bracket_idx]
83
+ rest = header[bracket_idx + 2:]
84
+ close_bracket = rest.find("]")
85
+
86
+ if close_bracket >= 0:
87
+ after_bracket = rest[close_bracket + 1:]
88
+
89
+ if after_bracket.startswith("{"):
90
+ field_end = after_bracket.find("}")
91
+ if field_end >= 0:
92
+ fields = after_bracket[1:field_end].split(",")
93
+ i += 1
94
+ rows, consumed = _parse_tabular_rows(lines, i, depth, fields)
95
+ out[name] = rows
96
+ i += consumed
97
+ continue
98
+ else:
99
+ count_str = rest[:close_bracket]
100
+ if count_str == "0":
101
+ out[name] = []
102
+ i += 1
103
+ continue
104
+ i += 1
105
+ items, consumed = _parse_non_uniform_array(lines, i, depth)
106
+ out[name] = items
107
+ i += consumed
108
+ continue
109
+
110
+ name = header
111
+ bi = name.find(" [")
112
+ if bi >= 0:
113
+ name = name[:bi]
114
+ i += 1
115
+ nested: dict[str, Any] = {}
116
+ consumed = _parse_object(lines, i, depth + 1, nested)
117
+ out[name] = nested
118
+ i += consumed
119
+ continue
120
+
121
+ # Inline primitive array.
122
+ bracket_idx = content.find("[")
123
+ if bracket_idx > 0:
124
+ colon_idx = content.find("]: ")
125
+ if colon_idx > bracket_idx:
126
+ name = content[:bracket_idx]
127
+ vals_str = content[colon_idx + 3:]
128
+ out[name] = [_parse_value(v.strip()) for v in vals_str.split(",")]
129
+ i += 1
130
+ continue
131
+
132
+ # Key=value.
133
+ eq_idx = content.find("=")
134
+ if eq_idx > 0:
135
+ key = content[:eq_idx]
136
+ val = content[eq_idx + 1:]
137
+ out[key] = _parse_value(val)
138
+ i += 1
139
+ continue
140
+
141
+ i += 1
142
+
143
+ return i - start
144
+
145
+
146
+ def _parse_tabular_rows(
147
+ lines: list[str], start: int, depth: int, fields: list[str]
148
+ ) -> tuple[list[Any], int]:
149
+ indent = " " * depth
150
+ rows: list[Any] = []
151
+ i = start
152
+
153
+ while i < len(lines):
154
+ raw = lines[i].rstrip("\r")
155
+ if raw == "":
156
+ i += 1
157
+ continue
158
+
159
+ if depth > 0 and not raw.startswith(indent):
160
+ break
161
+ content = raw[len(indent):] if depth > 0 else raw
162
+
163
+ if content.startswith("## "):
164
+ break
165
+ if content.startswith("# "):
166
+ i += 1
167
+ continue
168
+
169
+ row_data = content
170
+ has_nested = False
171
+ if row_data.startswith("@"):
172
+ sp = row_data.find(" ")
173
+ if sp > 0:
174
+ row_data = row_data[sp + 1:]
175
+ has_nested = True
176
+
177
+ vals = row_data.split("|")
178
+ row: dict[str, Any] = {}
179
+ for j, f in enumerate(fields):
180
+ row[f] = _parse_value(vals[j]) if j < len(vals) else None
181
+
182
+ i += 1
183
+
184
+ if has_nested:
185
+ nested_indent = indent + " "
186
+ while i < len(lines):
187
+ nl = lines[i].rstrip("\r")
188
+ if not nl.startswith(nested_indent):
189
+ break
190
+ nc = nl[len(nested_indent):]
191
+
192
+ if nc.startswith("."):
193
+ field_name = nc[1:]
194
+ i += 1
195
+ nested: dict[str, Any] = {}
196
+ consumed = _parse_object(lines, i, depth + 2, nested)
197
+ row[field_name] = nested
198
+ i += consumed
199
+ else:
200
+ break
201
+
202
+ rows.append(row)
203
+
204
+ return rows, i - start
205
+
206
+
207
+ def _parse_non_uniform_array(
208
+ lines: list[str], start: int, depth: int
209
+ ) -> tuple[list[Any], int]:
210
+ indent = " " * depth
211
+ items: list[Any] = []
212
+ i = start
213
+
214
+ while i < len(lines):
215
+ raw = lines[i].rstrip("\r")
216
+ if raw == "":
217
+ i += 1
218
+ continue
219
+ if depth > 0 and not raw.startswith(indent):
220
+ break
221
+ content = raw[len(indent):] if depth > 0 else raw
222
+ if content.startswith("## "):
223
+ break
224
+
225
+ if content.startswith("@"):
226
+ sp = content.find(" ")
227
+ if sp > 0:
228
+ items.append(_parse_value(content[sp + 1:]))
229
+ i += 1
230
+ else:
231
+ break
232
+
233
+ return items, i - start
234
+
235
+
236
+ def _parse_value(s: str) -> Any:
237
+ if s == "-":
238
+ return None
239
+ if s == "true":
240
+ return True
241
+ if s == "false":
242
+ return False
243
+ if s == '""':
244
+ return ""
245
+ if len(s) >= 2 and s[0] == '"' and s[-1] == '"':
246
+ return s[1:-1].replace('\\"', '"').replace("\\\\", "\\")
247
+ try:
248
+ return int(s)
249
+ except ValueError:
250
+ pass
251
+ try:
252
+ return float(s)
253
+ except ValueError:
254
+ pass
255
+ return s
gcf/stream_generic.py ADDED
@@ -0,0 +1,111 @@
1
+ """GCF generic streaming encoder: zero-buffering tabular encode to any writable."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import threading
6
+ from typing import Any, Sequence
7
+
8
+
9
+ class GenericStreamEncoder:
10
+ """Writes GCF tabular output incrementally as rows arrive.
11
+
12
+ Zero buffering: each row is written immediately. A trailer summary is
13
+ emitted on close() with the final counts.
14
+
15
+ Example::
16
+
17
+ enc = GenericStreamEncoder(sys.stdout)
18
+ enc.begin_array("employees", ["id", "name", "department", "salary"])
19
+ enc.write_row([1, "Alice", "Engineering", 95000])
20
+ enc.write_row([2, "Bob", "Sales", 72000])
21
+ enc.end_array()
22
+ enc.close()
23
+ """
24
+
25
+ def __init__(self, writer: Any) -> None:
26
+ self._w = writer
27
+ self._lock = threading.Lock()
28
+ self._sections: list[tuple[str, int]] = []
29
+ self._current: dict[str, Any] | None = None
30
+
31
+ def begin_array(self, name: str, fields: Sequence[str]) -> None:
32
+ """Start a tabular array section with deferred count [?]."""
33
+ with self._lock:
34
+ if self._current is not None:
35
+ self._end_array_locked()
36
+ self._w.write(f"## {name} [?]{{{','.join(fields)}}}\n")
37
+ self._current = {"name": name, "fields": list(fields), "count": 0}
38
+
39
+ def write_row(self, values: Sequence[Any]) -> None:
40
+ """Emit a single pipe-separated row immediately."""
41
+ with self._lock:
42
+ if self._current is None:
43
+ return
44
+ parts = [_format_value(v) for v in values]
45
+ self._w.write("|".join(parts) + "\n")
46
+ self._current["count"] += 1
47
+
48
+ def end_array(self) -> None:
49
+ """Close the current array section and record its count."""
50
+ with self._lock:
51
+ self._end_array_locked()
52
+
53
+ def write_kv(self, key: str, value: Any) -> None:
54
+ """Emit a key=value line immediately."""
55
+ with self._lock:
56
+ self._w.write(f"{key}={_format_value(value)}\n")
57
+
58
+ def write_section(self, name: str) -> None:
59
+ """Start a nested object section (## key)."""
60
+ with self._lock:
61
+ if self._current is not None:
62
+ self._end_array_locked()
63
+ self._w.write(f"## {name}\n")
64
+
65
+ def write_inline_array(self, name: str, values: Sequence[Any]) -> None:
66
+ """Emit a primitive array inline: name[N]: val1,val2,val3"""
67
+ with self._lock:
68
+ parts = [_format_value(v) for v in values]
69
+ self._w.write(f"{name}[{len(values)}]: {','.join(parts)}\n")
70
+
71
+ def close(self) -> None:
72
+ """Emit the ## _summary trailer with final counts."""
73
+ with self._lock:
74
+ if self._current is not None:
75
+ self._end_array_locked()
76
+ if not self._sections:
77
+ return
78
+ total_rows = 0
79
+ section_parts: list[str] = []
80
+ for name, count in self._sections:
81
+ section_parts.append(f"{name}:{count}")
82
+ total_rows += count
83
+ self._w.write(
84
+ f"## _summary rows={total_rows} sections={','.join(section_parts)}\n"
85
+ )
86
+
87
+ def _end_array_locked(self) -> None:
88
+ if self._current is None:
89
+ return
90
+ self._sections.append((self._current["name"], self._current["count"]))
91
+ self._current = None
92
+
93
+
94
+ def _format_value(v: Any) -> str:
95
+ if v is None:
96
+ return "-"
97
+ if isinstance(v, bool):
98
+ return "true" if v else "false"
99
+ if isinstance(v, int):
100
+ return str(v)
101
+ if isinstance(v, float):
102
+ # Match Go's %g formatting
103
+ s = f"{v:g}"
104
+ return s
105
+ if isinstance(v, str):
106
+ if v == "":
107
+ return '""'
108
+ if "|" in v or "\n" in v:
109
+ return '"' + v.replace('"', '\\"') + '"'
110
+ return v
111
+ return str(v)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gcf-python
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Python implementation of GCF (Graph Compact Format): token-optimized wire format for LLM tool responses
5
5
  Project-URL: Homepage, https://github.com/blackwell-systems/gcf-python
6
6
  Project-URL: Documentation, https://blackwell-systems.github.io/gcf/
@@ -30,7 +30,7 @@ Description-Content-Type: text/markdown
30
30
 
31
31
  # gcf-python
32
32
 
33
- Python implementation of [GCF (Graph Compact Format)](https://gcformat.com/) — the most token-efficient wire format for LLMs. A drop-in alternative to JSON and TOON for any structured data.
33
+ Python implementation of [GCF](https://gcformat.com/) — the most token-efficient wire format for LLMs. A drop-in alternative to JSON and TOON for any structured data.
34
34
 
35
35
  **79% fewer input tokens than JSON. 75% fewer output tokens. 52% smaller than TOON. 100% LLM comprehension at 500 symbols, where JSON scores 76.9% and TOON scores 92.3%.**
36
36
 
@@ -66,33 +66,21 @@ Payload: 50 symbols, 20 edges
66
66
  ### Quick Start
67
67
 
68
68
  ```python
69
- from gcf import encode, Payload, Symbol, Edge
69
+ from gcf import encode_generic
70
70
 
71
- p = Payload(
72
- tool="context_for_task",
73
- token_budget=5000,
74
- tokens_used=1847,
75
- symbols=[
76
- Symbol(qualified_name="pkg.AuthMiddleware", kind="function", score=0.78, provenance="lsp_resolved", distance=0),
77
- Symbol(qualified_name="pkg.NewServer", kind="function", score=0.54, provenance="lsp_resolved", distance=1),
78
- ],
79
- edges=[
80
- Edge(source="pkg.NewServer", target="pkg.AuthMiddleware", edge_type="calls"),
71
+ output = encode_generic({
72
+ "employees": [
73
+ {"id": 1, "name": "Alice", "department": "Engineering", "salary": 95000},
74
+ {"id": 2, "name": "Bob", "department": "Sales", "salary": 72000},
81
75
  ],
82
- )
83
-
84
- output = encode(p)
76
+ })
85
77
  ```
86
78
 
87
79
  Output:
88
80
  ```
89
- GCF tool=context_for_task budget=5000 tokens=1847 symbols=2 edges=1
90
- ## targets
91
- @0 fn pkg.AuthMiddleware 0.78 lsp_resolved
92
- ## related
93
- @1 fn pkg.NewServer 0.54 lsp_resolved
94
- ## edges [1]
95
- @0<@1 calls
81
+ ## employees [2]{id,name,department,salary}
82
+ 1|Alice|Engineering|95000
83
+ 2|Bob|Sales|72000
96
84
  ```
97
85
 
98
86
  ## Decode
@@ -1,15 +1,17 @@
1
- gcf/__init__.py,sha256=K4n34s5a0cd0MnYng5UvoTaxT1wIjidv7KgUThELmP4,1596
1
+ gcf/__init__.py,sha256=ihKGNwTM0cYDi3t2o4PJjcRjaRW-rE6LYZKPg_KAldE,1738
2
2
  gcf/cli.py,sha256=2hSTBqiYcn1_EgGXuO65MHiEGh0C4DRMvspTd2zUaso,4258
3
3
  gcf/constants.py,sha256=cmZ8YJSOB0im_eyfN8v4UvrLpBC6Fuf4cfcKZGbutxY,638
4
4
  gcf/decode.py,sha256=48G6XmBilGYl0-c_Xy7MX0iTwOxGW9bn3wC4UyNApc4,5465
5
+ gcf/decode_generic.py,sha256=NWRhOPWHypCDw6BbnDq78J7QA7EmfuBhvRyDKxBLwXg,7292
5
6
  gcf/delta.py,sha256=xU0ujtSq1iF7yU8yk_WNQKh8iove-WUV_nKSuvW1XVk,1656
6
7
  gcf/encode.py,sha256=Oljb1r5b7SHmng1XYvcvuJMpaRfJVJ81VOyocPf_kAs,2915
7
8
  gcf/generic.py,sha256=Nf1Ii0pYS0dgZ4o2ghl-3Qhnms-kCKY2D3q-3-oGpe8,5454
8
9
  gcf/session.py,sha256=4_ARRL06Tg2CI8D2eyi0V5nFphFAFMfOKBXYnAbI6Nk,4690
9
10
  gcf/stream.py,sha256=DBzZrb9t5ldVEsvcRX90SY59Fm0PM4aPt4XBIrJ1Gro,5186
11
+ gcf/stream_generic.py,sha256=udKtvj-YJXl_eG_KzMZ3g9_EBMS3BTcAmAnsczPJ90M,3826
10
12
  gcf/types.py,sha256=AWm-LQoSqLHAYtEjcAxWQZqJ4JXqNreLUKO2mJFgNMA,1465
11
- gcf_python-0.4.0.dist-info/METADATA,sha256=pucirOGE46EL70gW8PjeRe1uY4WyzSihRZrn-zryNqk,9074
12
- gcf_python-0.4.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
13
- gcf_python-0.4.0.dist-info/entry_points.txt,sha256=aFT6gqlkh8iGfM8cblE-LUMxHH08_v71IIoZtDdRIVA,37
14
- gcf_python-0.4.0.dist-info/licenses/LICENSE,sha256=txSvg3E4LugiB7MOOTci6WKd6wMOrOJTvaITeFJ2SgU,1074
15
- gcf_python-0.4.0.dist-info/RECORD,,
13
+ gcf_python-0.5.0.dist-info/METADATA,sha256=M399fPoVeSL4AvQqm7HaR92rkVCilg5MC1jzqWHwSSg,8667
14
+ gcf_python-0.5.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
15
+ gcf_python-0.5.0.dist-info/entry_points.txt,sha256=aFT6gqlkh8iGfM8cblE-LUMxHH08_v71IIoZtDdRIVA,37
16
+ gcf_python-0.5.0.dist-info/licenses/LICENSE,sha256=txSvg3E4LugiB7MOOTci6WKd6wMOrOJTvaITeFJ2SgU,1074
17
+ gcf_python-0.5.0.dist-info/RECORD,,