gcf-python 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. gcf_python-0.1.1/CHANGELOG.md +18 -0
  2. {gcf_python-0.1.0 → gcf_python-0.1.1}/PKG-INFO +28 -3
  3. {gcf_python-0.1.0 → gcf_python-0.1.1}/README.md +27 -2
  4. {gcf_python-0.1.0 → gcf_python-0.1.1}/pyproject.toml +1 -1
  5. {gcf_python-0.1.0 → gcf_python-0.1.1}/src/gcf/__init__.py +3 -1
  6. {gcf_python-0.1.0 → gcf_python-0.1.1}/src/gcf/decode.py +2 -0
  7. {gcf_python-0.1.0 → gcf_python-0.1.1}/src/gcf/encode.py +2 -0
  8. gcf_python-0.1.1/src/gcf/generic.py +150 -0
  9. {gcf_python-0.1.0 → gcf_python-0.1.1}/src/gcf/session.py +2 -0
  10. {gcf_python-0.1.0 → gcf_python-0.1.1}/src/gcf/types.py +2 -0
  11. gcf_python-0.1.1/tests/test_generic.py +170 -0
  12. {gcf_python-0.1.0 → gcf_python-0.1.1}/.github/workflows/ci.yml +0 -0
  13. {gcf_python-0.1.0 → gcf_python-0.1.1}/.github/workflows/publish.yml +0 -0
  14. {gcf_python-0.1.0 → gcf_python-0.1.1}/.gitignore +0 -0
  15. {gcf_python-0.1.0 → gcf_python-0.1.1}/LICENSE +0 -0
  16. {gcf_python-0.1.0 → gcf_python-0.1.1}/src/gcf/cli.py +0 -0
  17. {gcf_python-0.1.0 → gcf_python-0.1.1}/src/gcf/constants.py +0 -0
  18. {gcf_python-0.1.0 → gcf_python-0.1.1}/src/gcf/delta.py +0 -0
  19. {gcf_python-0.1.0 → gcf_python-0.1.1}/tests/__init__.py +0 -0
  20. {gcf_python-0.1.0 → gcf_python-0.1.1}/tests/test_decode.py +0 -0
  21. {gcf_python-0.1.0 → gcf_python-0.1.1}/tests/test_delta.py +0 -0
  22. {gcf_python-0.1.0 → gcf_python-0.1.1}/tests/test_encode.py +0 -0
  23. {gcf_python-0.1.0 → gcf_python-0.1.1}/tests/test_roundtrip.py +0 -0
  24. {gcf_python-0.1.0 → gcf_python-0.1.1}/tests/test_session.py +0 -0
@@ -0,0 +1,18 @@
1
+ # Changelog
2
+
3
+ ## v0.1.1 (2026-06-03)
4
+
5
+ - `encode_generic`: encode arbitrary Python values into GCF tabular format
6
+ - Tabular encoding: positional rows with pipe separators, section headers, nested field support
7
+ - Uniform array detection with 70% key overlap threshold
8
+
9
+ ## v0.1.0 (2026-06-03)
10
+
11
+ - Initial release
12
+ - `encode` / `decode`: full GCF round-trip
13
+ - `encode_with_session`: session deduplication (92.7% savings by 5th call)
14
+ - `encode_delta`: delta encoding for re-queries (81.2% savings)
15
+ - Thread-safe `Session` class
16
+ - 16 kind abbreviations
17
+ - CLI: `gcf encode`, `gcf decode`, `gcf stats`
18
+ - Type hints, Python 3.9+, zero runtime dependencies
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gcf-python
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Python implementation of GCF (Graph Compact Format): token-optimized wire format for LLM tool responses
5
5
  Project-URL: Homepage, https://github.com/blackwell-systems/gcf-python
6
6
  Project-URL: Documentation, https://blackwell-systems.github.io/gcf/
@@ -37,7 +37,7 @@ Python implementation of [GCF (Graph Compact Format)](https://github.com/blackwe
37
37
  ## Install
38
38
 
39
39
  ```
40
- pip install gcf-py
40
+ pip install gcf-python
41
41
  ```
42
42
 
43
43
  Zero dependencies. Pure Python. Python 3.9+. Includes CLI.
@@ -139,11 +139,36 @@ output = encode_delta(delta)
139
139
 
140
140
  81.2% savings on re-queries where the pack changed slightly.
141
141
 
142
+ ## Generic Encoding
143
+
144
+ Encode any Python value (not just graph payloads) into GCF tabular format:
145
+
146
+ ```python
147
+ from gcf import encode_generic
148
+
149
+ output = encode_generic({
150
+ "employees": [
151
+ {"id": 1, "name": "Alice", "department": "Engineering", "salary": 95000},
152
+ {"id": 2, "name": "Bob", "department": "Sales", "salary": 72000},
153
+ ],
154
+ })
155
+ ```
156
+
157
+ Output:
158
+ ```
159
+ ## employees [2]{id,name,department,salary}
160
+ 1|Alice|Engineering|95000
161
+ 2|Bob|Sales|72000
162
+ ```
163
+
164
+ Works on dicts, lists, and primitives. Lists of uniform dicts get tabular rows. Nested dicts use `## key` section headers.
165
+
142
166
  ## API
143
167
 
144
168
  | Function | Description |
145
169
  |----------|-------------|
146
- | `encode(p: Payload) -> str` | Encode a payload to GCF text |
170
+ | `encode(p: Payload) -> str` | Encode a graph payload to GCF text |
171
+ | `encode_generic(data: Any) -> str` | Encode any value to GCF tabular format |
147
172
  | `decode(input_text: str) -> Payload` | Parse GCF text back to a Payload |
148
173
  | `encode_with_session(p: Payload, s: Session) -> str` | Encode with session deduplication |
149
174
  | `encode_delta(d: DeltaPayload) -> str` | Encode a delta (added/removed only) |
@@ -12,7 +12,7 @@ Python implementation of [GCF (Graph Compact Format)](https://github.com/blackwe
12
12
  ## Install
13
13
 
14
14
  ```
15
- pip install gcf-py
15
+ pip install gcf-python
16
16
  ```
17
17
 
18
18
  Zero dependencies. Pure Python. Python 3.9+. Includes CLI.
@@ -114,11 +114,36 @@ output = encode_delta(delta)
114
114
 
115
115
  81.2% savings on re-queries where the pack changed slightly.
116
116
 
117
+ ## Generic Encoding
118
+
119
+ Encode any Python value (not just graph payloads) into GCF tabular format:
120
+
121
+ ```python
122
+ from gcf import encode_generic
123
+
124
+ output = encode_generic({
125
+ "employees": [
126
+ {"id": 1, "name": "Alice", "department": "Engineering", "salary": 95000},
127
+ {"id": 2, "name": "Bob", "department": "Sales", "salary": 72000},
128
+ ],
129
+ })
130
+ ```
131
+
132
+ Output:
133
+ ```
134
+ ## employees [2]{id,name,department,salary}
135
+ 1|Alice|Engineering|95000
136
+ 2|Bob|Sales|72000
137
+ ```
138
+
139
+ Works on dicts, lists, and primitives. Lists of uniform dicts get tabular rows. Nested dicts use `## key` section headers.
140
+
117
141
  ## API
118
142
 
119
143
  | Function | Description |
120
144
  |----------|-------------|
121
- | `encode(p: Payload) -> str` | Encode a payload to GCF text |
145
+ | `encode(p: Payload) -> str` | Encode a graph payload to GCF text |
146
+ | `encode_generic(data: Any) -> str` | Encode any value to GCF tabular format |
122
147
  | `decode(input_text: str) -> Payload` | Parse GCF text back to a Payload |
123
148
  | `encode_with_session(p: Payload, s: Session) -> str` | Encode with session deduplication |
124
149
  | `encode_delta(d: DeltaPayload) -> str` | Encode a delta (added/removed only) |
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "gcf-python"
7
- version = "0.1.0"
7
+ version = "0.1.1"
8
8
  description = "Python implementation of GCF (Graph Compact Format): token-optimized wire format for LLM tool responses"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -38,6 +38,7 @@ from .constants import KIND_ABBREV, KIND_EXPAND
38
38
  from .decode import DecodeError, decode
39
39
  from .delta import encode_delta
40
40
  from .encode import encode
41
+ from .generic import encode_generic
41
42
  from .session import Session, encode_with_session
42
43
  from .types import Components, DeltaPayload, Edge, Payload, Symbol
43
44
 
@@ -54,7 +55,8 @@ __all__ = [
54
55
  "decode",
55
56
  "encode",
56
57
  "encode_delta",
58
+ "encode_generic",
57
59
  "encode_with_session",
58
60
  ]
59
61
 
60
- __version__ = "0.1.0"
62
+ __version__ = "0.1.1"
@@ -1,5 +1,7 @@
1
1
  """GCF decoder: parses GCF text back into a Payload."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from .constants import KIND_EXPAND
4
6
  from .types import Edge, Payload, Symbol
5
7
 
@@ -1,5 +1,7 @@
1
1
  """GCF encoder: serializes Payload into GCF text format."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from .constants import KIND_ABBREV
4
6
  from .types import Payload, Symbol
5
7
 
@@ -0,0 +1,150 @@
1
+ """GCF generic encoder: serializes arbitrary Python values into GCF tabular format."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+
8
+ def encode_generic(data: Any) -> str:
9
+ """Encode any Python value into GCF tabular format.
10
+
11
+ Unlike encode() which handles the graph Payload type, encode_generic()
12
+ works on arbitrary dicts, lists, and primitives using GCF's tabular
13
+ encoding grammar.
14
+
15
+ Args:
16
+ data: Any Python value (dict, list, primitive, or None).
17
+
18
+ Returns:
19
+ GCF-formatted text string.
20
+ """
21
+ lines: list[str] = []
22
+ _encode_value(data, lines, depth=0)
23
+ return "\n".join(lines) + "\n" if lines else "\n"
24
+
25
+
26
+ def _encode_value(value: Any, lines: list[str], depth: int) -> None:
27
+ """Dispatch encoding based on value type."""
28
+ if isinstance(value, dict):
29
+ _encode_dict(value, lines, depth)
30
+ elif isinstance(value, list):
31
+ _encode_array(value, "items", lines, depth)
32
+ else:
33
+ lines.append(_indent(depth) + _format_value(value))
34
+
35
+
36
+ def _encode_dict(d: dict, lines: list[str], depth: int) -> None:
37
+ """Encode a dict into key=value pairs with section headers for nested values."""
38
+ prefix = _indent(depth)
39
+ for key, value in d.items():
40
+ if isinstance(value, list):
41
+ _encode_array(value, key, lines, depth)
42
+ elif isinstance(value, dict):
43
+ lines.append(f"{prefix}## {key}")
44
+ _encode_dict(value, lines, depth + 1)
45
+ else:
46
+ lines.append(f"{prefix}{key}={_format_value(value)}")
47
+
48
+
49
+ def _encode_array(items: list, name: str, lines: list[str], depth: int) -> None:
50
+ """Encode a list, using tabular format for uniform dict lists."""
51
+ prefix = _indent(depth)
52
+
53
+ if not items:
54
+ lines.append(f"{prefix}## {name} [0]")
55
+ return
56
+
57
+ if _is_uniform_dict_list(items):
58
+ _encode_tabular(items, name, lines, depth)
59
+ else:
60
+ lines.append(f"{prefix}## {name} [{len(items)}]")
61
+ for i, item in enumerate(items):
62
+ if isinstance(item, dict):
63
+ lines.append(f"{prefix}@{i}")
64
+ _encode_dict(item, lines, depth + 1)
65
+ else:
66
+ lines.append(f"{prefix}@{i} {_format_value(item)}")
67
+
68
+
69
+ def _encode_tabular(items: list[dict], name: str, lines: list[str], depth: int) -> None:
70
+ """Encode a uniform list of dicts as a tabular section."""
71
+ prefix = _indent(depth)
72
+
73
+ # Collect all keys from the first item to determine field order.
74
+ all_keys = list(items[0].keys())
75
+ primitive_fields = [k for k in all_keys if not isinstance(items[0][k], (dict, list))]
76
+ nested_fields = [k for k in all_keys if isinstance(items[0][k], (dict, list))]
77
+
78
+ # Header with field names (primitive fields only in the column spec).
79
+ header = f"{prefix}## {name} [{len(items)}]{{{','.join(primitive_fields)}}}"
80
+ lines.append(header)
81
+
82
+ for i, item in enumerate(items):
83
+ row_values = [_format_value(item.get(f)) for f in primitive_fields]
84
+ row_str = "|".join(row_values)
85
+
86
+ if nested_fields:
87
+ lines.append(f"{prefix}@{i} {row_str}")
88
+ inner_prefix = _indent(depth + 1)
89
+ for nk in nested_fields:
90
+ nv = item.get(nk)
91
+ if isinstance(nv, list):
92
+ _encode_array(nv, nk, lines, depth + 1)
93
+ elif isinstance(nv, dict):
94
+ lines.append(f"{inner_prefix}## {nk}")
95
+ _encode_dict(nv, lines, depth + 2)
96
+ else:
97
+ lines.append(f"{prefix}{row_str}")
98
+
99
+
100
+ def _is_uniform_dict_list(items: list) -> bool:
101
+ """Check whether a list contains uniform dicts (same keys across items).
102
+
103
+ Samples up to the first 5 items. Considers the list uniform if key
104
+ overlap is at least 70% between consecutive items and the first item.
105
+ """
106
+ if not items or not isinstance(items[0], dict):
107
+ return False
108
+
109
+ sample = items[:5]
110
+ if not all(isinstance(item, dict) for item in sample):
111
+ return False
112
+
113
+ if not sample:
114
+ return False
115
+
116
+ reference_keys = set(sample[0].keys())
117
+ if not reference_keys:
118
+ return False
119
+
120
+ for item in sample[1:]:
121
+ item_keys = set(item.keys())
122
+ union = reference_keys | item_keys
123
+ intersection = reference_keys & item_keys
124
+ if not union or len(intersection) / len(union) < 0.7:
125
+ return False
126
+
127
+ return True
128
+
129
+
130
+ def _format_value(value: Any) -> str:
131
+ """Format a single value for GCF output.
132
+
133
+ None becomes "-". Booleans are lowercased. Numbers are unquoted.
134
+ Strings containing "|" or newlines are quoted. Everything else is direct.
135
+ """
136
+ if value is None:
137
+ return "-"
138
+ if isinstance(value, bool):
139
+ return "true" if value else "false"
140
+ if isinstance(value, (int, float)):
141
+ return str(value)
142
+ s = str(value)
143
+ if "|" in s or "\n" in s:
144
+ return f'"{s}"'
145
+ return s
146
+
147
+
148
+ def _indent(depth: int) -> str:
149
+ """Return indentation string for the given depth (2 spaces per level)."""
150
+ return " " * depth
@@ -1,5 +1,7 @@
1
1
  """Session-based deduplication for GCF encoding."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import threading
4
6
 
5
7
  from .constants import KIND_ABBREV
@@ -1,5 +1,7 @@
1
1
  """Data types for GCF payloads."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from dataclasses import dataclass, field
4
6
 
5
7
 
@@ -0,0 +1,170 @@
1
+ """Tests for GCF generic encoding."""
2
+
3
+ from gcf import encode_generic
4
+
5
+
6
+ def test_encode_flat_tabular_list():
7
+ """Flat tabular list of dicts produces header with fields and pipe-separated rows."""
8
+ data = {
9
+ "employees": [
10
+ {"name": "Alice", "role": "engineer", "level": 5},
11
+ {"name": "Bob", "role": "designer", "level": 3},
12
+ {"name": "Carol", "role": "manager", "level": 7},
13
+ ],
14
+ }
15
+ output = encode_generic(data)
16
+
17
+ assert "## employees [3]{name,role,level}" in output
18
+ assert "Alice|engineer|5" in output
19
+ assert "Bob|designer|3" in output
20
+ assert "Carol|manager|7" in output
21
+ # Pure flat rows should not have @id prefix.
22
+ lines = output.strip().splitlines()
23
+ row_lines = [l for l in lines if "|" in l]
24
+ for line in row_lines:
25
+ assert not line.strip().startswith("@")
26
+
27
+
28
+ def test_encode_nested_dict():
29
+ """Nested dicts produce ## section headers and indented key=value pairs."""
30
+ data = {
31
+ "server": {
32
+ "host": "localhost",
33
+ "port": 8080,
34
+ },
35
+ "debug": True,
36
+ }
37
+ output = encode_generic(data)
38
+
39
+ assert "## server" in output
40
+ assert " host=localhost" in output
41
+ assert " port=8080" in output
42
+ assert "debug=true" in output
43
+
44
+
45
+ def test_encode_mixed_data():
46
+ """Mixed data with tabular rows containing nested fields uses @id prefix."""
47
+ data = {
48
+ "projects": [
49
+ {
50
+ "name": "Alpha",
51
+ "status": "active",
52
+ "config": {"env": "prod", "region": "us-east"},
53
+ },
54
+ {
55
+ "name": "Beta",
56
+ "status": "draft",
57
+ "config": {"env": "staging", "region": "eu-west"},
58
+ },
59
+ ],
60
+ }
61
+ output = encode_generic(data)
62
+
63
+ # Header lists only primitive fields.
64
+ assert "## projects [2]{name,status}" in output
65
+ # Rows with nested data get @id prefix.
66
+ assert "@0 Alpha|active" in output
67
+ assert "@1 Beta|draft" in output
68
+ # Nested config values are indented.
69
+ assert "## config" in output
70
+ assert "env=" in output
71
+ assert "region=" in output
72
+
73
+
74
+ def test_encode_none_value():
75
+ """None is encoded as a dash."""
76
+ data = {"value": None}
77
+ output = encode_generic(data)
78
+ assert "value=-" in output
79
+
80
+
81
+ def test_encode_none_in_tabular():
82
+ """None values in tabular rows render as dashes."""
83
+ data = {
84
+ "items": [
85
+ {"a": 1, "b": None},
86
+ {"a": 2, "b": "hello"},
87
+ ],
88
+ }
89
+ output = encode_generic(data)
90
+ assert "1|-" in output
91
+ assert "2|hello" in output
92
+
93
+
94
+ def test_encode_pipe_separators_in_tabular():
95
+ """Tabular rows use pipe separators between fields."""
96
+ data = {
97
+ "rows": [
98
+ {"x": 10, "y": 20, "z": 30},
99
+ {"x": 40, "y": 50, "z": 60},
100
+ ],
101
+ }
102
+ output = encode_generic(data)
103
+ assert "10|20|30" in output
104
+ assert "40|50|60" in output
105
+
106
+
107
+ def test_encode_no_repeated_field_names_in_rows():
108
+ """Field names appear only in the header, not repeated in each row."""
109
+ data = {
110
+ "people": [
111
+ {"name": "Alice", "age": 30},
112
+ {"name": "Bob", "age": 25},
113
+ ],
114
+ }
115
+ output = encode_generic(data)
116
+
117
+ # Field names appear exactly once (in the header).
118
+ lines = output.strip().splitlines()
119
+ header_lines = [l for l in lines if l.strip().startswith("## people")]
120
+ assert len(header_lines) == 1
121
+ assert "name" in header_lines[0]
122
+ assert "age" in header_lines[0]
123
+
124
+ # Data rows do not contain field names.
125
+ data_lines = [l for l in lines if not l.strip().startswith("##")]
126
+ for line in data_lines:
127
+ assert "name=" not in line
128
+ assert "age=" not in line
129
+
130
+
131
+ def test_encode_boolean_formatting():
132
+ """Booleans are lowercased (true/false)."""
133
+ data = {"enabled": True, "verbose": False}
134
+ output = encode_generic(data)
135
+ assert "enabled=true" in output
136
+ assert "verbose=false" in output
137
+
138
+
139
+ def test_encode_empty_list():
140
+ """Empty list produces a header with count zero."""
141
+ data = {"items": []}
142
+ output = encode_generic(data)
143
+ assert "## items [0]" in output
144
+
145
+
146
+ def test_encode_non_uniform_list():
147
+ """Non-uniform list items get @N indices without tabular headers."""
148
+ data = {
149
+ "things": [
150
+ {"a": 1},
151
+ {"completely": "different", "keys": True},
152
+ ],
153
+ }
154
+ output = encode_generic(data)
155
+ assert "## things [2]" in output
156
+ assert "@0" in output
157
+ assert "@1" in output
158
+
159
+
160
+ def test_encode_primitive_value():
161
+ """A bare primitive is encoded directly."""
162
+ assert encode_generic(42) == "42\n"
163
+ assert encode_generic("hello") == "hello\n"
164
+
165
+
166
+ def test_encode_string_with_pipe():
167
+ """Strings containing pipe characters are quoted."""
168
+ data = {"val": "a|b"}
169
+ output = encode_generic(data)
170
+ assert 'val="a|b"' in output
File without changes
File without changes
File without changes
File without changes
File without changes