PyPI - gcf-python - Versions diffs - 0.1.0__tar.gz → 0.1.1__tar.gz - Mend

gcf-python 0.1.0tar.gz → 0.1.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

gcf_python-0.1.1/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,18 @@
+# Changelog
+## v0.1.1 (2026-06-03)
+- `encode_generic`: encode arbitrary Python values into GCF tabular format
+- Tabular encoding: positional rows with pipe separators, section headers, nested field support
+- Uniform array detection with 70% key overlap threshold
+## v0.1.0 (2026-06-03)
+- Initial release
+- `encode` / `decode`: full GCF round-trip
+- `encode_with_session`: session deduplication (92.7% savings by 5th call)
+- `encode_delta`: delta encoding for re-queries (81.2% savings)
+- Thread-safe `Session` class
+- 16 kind abbreviations
+- CLI: `gcf encode`, `gcf decode`, `gcf stats`
+- Type hints, Python 3.9+, zero runtime dependencies

{gcf_python-0.1.0 → gcf_python-0.1.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: gcf-python
-Version: 0.1.0
+Version: 0.1.1
 Summary: Python implementation of GCF (Graph Compact Format): token-optimized wire format for LLM tool responses
 Project-URL: Homepage, https://github.com/blackwell-systems/gcf-python
 Project-URL: Documentation, https://blackwell-systems.github.io/gcf/
@@ -37,7 +37,7 @@ Python implementation of [GCF (Graph Compact Format)](https://github.com/blackwe
 ## Install
 ```
-pip install gcf-py
+pip install gcf-python
 ```
 Zero dependencies. Pure Python. Python 3.9+. Includes CLI.
@@ -139,11 +139,36 @@ output = encode_delta(delta)
 81.2% savings on re-queries where the pack changed slightly.
+## Generic Encoding
+Encode any Python value (not just graph payloads) into GCF tabular format:
+```python
+from gcf import encode_generic
+output = encode_generic({
+    "employees": [
+        {"id": 1, "name": "Alice", "department": "Engineering", "salary": 95000},
+        {"id": 2, "name": "Bob", "department": "Sales", "salary": 72000},
+    ],
+})
+```
+Output:
+```
+## employees [2]{id,name,department,salary}
+1|Alice|Engineering|95000
+2|Bob|Sales|72000
+```
+Works on dicts, lists, and primitives. Lists of uniform dicts get tabular rows. Nested dicts use `## key` section headers.
 ## API
 | Function | Description |
 |----------|-------------|
-| `encode(p: Payload) -> str` | Encode a payload to GCF text |
+| `encode(p: Payload) -> str` | Encode a graph payload to GCF text |
+| `encode_generic(data: Any) -> str` | Encode any value to GCF tabular format |
 | `decode(input_text: str) -> Payload` | Parse GCF text back to a Payload |
 | `encode_with_session(p: Payload, s: Session) -> str` | Encode with session deduplication |
 | `encode_delta(d: DeltaPayload) -> str` | Encode a delta (added/removed only) |

{gcf_python-0.1.0 → gcf_python-0.1.1}/README.md RENAMED Viewed

@@ -12,7 +12,7 @@ Python implementation of [GCF (Graph Compact Format)](https://github.com/blackwe
 ## Install
 ```
-pip install gcf-py
+pip install gcf-python
 ```
 Zero dependencies. Pure Python. Python 3.9+. Includes CLI.
@@ -114,11 +114,36 @@ output = encode_delta(delta)
 81.2% savings on re-queries where the pack changed slightly.
+## Generic Encoding
+Encode any Python value (not just graph payloads) into GCF tabular format:
+```python
+from gcf import encode_generic
+output = encode_generic({
+    "employees": [
+        {"id": 1, "name": "Alice", "department": "Engineering", "salary": 95000},
+        {"id": 2, "name": "Bob", "department": "Sales", "salary": 72000},
+    ],
+})
+```
+Output:
+```
+## employees [2]{id,name,department,salary}
+1|Alice|Engineering|95000
+2|Bob|Sales|72000
+```
+Works on dicts, lists, and primitives. Lists of uniform dicts get tabular rows. Nested dicts use `## key` section headers.
 ## API
 | Function | Description |
 |----------|-------------|
-| `encode(p: Payload) -> str` | Encode a payload to GCF text |
+| `encode(p: Payload) -> str` | Encode a graph payload to GCF text |
+| `encode_generic(data: Any) -> str` | Encode any value to GCF tabular format |
 | `decode(input_text: str) -> Payload` | Parse GCF text back to a Payload |
 | `encode_with_session(p: Payload, s: Session) -> str` | Encode with session deduplication |
 | `encode_delta(d: DeltaPayload) -> str` | Encode a delta (added/removed only) |

{gcf_python-0.1.0 → gcf_python-0.1.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "gcf-python"
-version = "0.1.0"
+version = "0.1.1"
 description = "Python implementation of GCF (Graph Compact Format): token-optimized wire format for LLM tool responses"
 readme = "README.md"
 license = {text = "MIT"}

{gcf_python-0.1.0 → gcf_python-0.1.1}/src/gcf/__init__.py RENAMED Viewed

@@ -38,6 +38,7 @@ from .constants import KIND_ABBREV, KIND_EXPAND
 from .decode import DecodeError, decode
 from .delta import encode_delta
 from .encode import encode
+from .generic import encode_generic
 from .session import Session, encode_with_session
 from .types import Components, DeltaPayload, Edge, Payload, Symbol
@@ -54,7 +55,8 @@ __all__ = [
     "decode",
     "encode",
     "encode_delta",
+    "encode_generic",
     "encode_with_session",
 ]
-__version__ = "0.1.0"
+__version__ = "0.1.1"

{gcf_python-0.1.0 → gcf_python-0.1.1}/src/gcf/decode.py RENAMED Viewed

@@ -1,5 +1,7 @@
 """GCF decoder: parses GCF text back into a Payload."""
+from __future__ import annotations
 from .constants import KIND_EXPAND
 from .types import Edge, Payload, Symbol

{gcf_python-0.1.0 → gcf_python-0.1.1}/src/gcf/encode.py RENAMED Viewed

@@ -1,5 +1,7 @@
 """GCF encoder: serializes Payload into GCF text format."""
+from __future__ import annotations
 from .constants import KIND_ABBREV
 from .types import Payload, Symbol

gcf_python-0.1.1/src/gcf/generic.py ADDED Viewed

@@ -0,0 +1,150 @@
+"""GCF generic encoder: serializes arbitrary Python values into GCF tabular format."""
+from __future__ import annotations
+from typing import Any
+def encode_generic(data: Any) -> str:
+    """Encode any Python value into GCF tabular format.
+    Unlike encode() which handles the graph Payload type, encode_generic()
+    works on arbitrary dicts, lists, and primitives using GCF's tabular
+    encoding grammar.
+    Args:
+        data: Any Python value (dict, list, primitive, or None).
+    Returns:
+        GCF-formatted text string.
+    """
+    lines: list[str] = []
+    _encode_value(data, lines, depth=0)
+    return "\n".join(lines) + "\n" if lines else "\n"
+def _encode_value(value: Any, lines: list[str], depth: int) -> None:
+    """Dispatch encoding based on value type."""
+    if isinstance(value, dict):
+        _encode_dict(value, lines, depth)
+    elif isinstance(value, list):
+        _encode_array(value, "items", lines, depth)
+    else:
+        lines.append(_indent(depth) + _format_value(value))
+def _encode_dict(d: dict, lines: list[str], depth: int) -> None:
+    """Encode a dict into key=value pairs with section headers for nested values."""
+    prefix = _indent(depth)
+    for key, value in d.items():
+        if isinstance(value, list):
+            _encode_array(value, key, lines, depth)
+        elif isinstance(value, dict):
+            lines.append(f"{prefix}## {key}")
+            _encode_dict(value, lines, depth + 1)
+        else:
+            lines.append(f"{prefix}{key}={_format_value(value)}")
+def _encode_array(items: list, name: str, lines: list[str], depth: int) -> None:
+    """Encode a list, using tabular format for uniform dict lists."""
+    prefix = _indent(depth)
+    if not items:
+        lines.append(f"{prefix}## {name} [0]")
+        return
+    if _is_uniform_dict_list(items):
+        _encode_tabular(items, name, lines, depth)
+    else:
+        lines.append(f"{prefix}## {name} [{len(items)}]")
+        for i, item in enumerate(items):
+            if isinstance(item, dict):
+                lines.append(f"{prefix}@{i}")
+                _encode_dict(item, lines, depth + 1)
+            else:
+                lines.append(f"{prefix}@{i} {_format_value(item)}")
+def _encode_tabular(items: list[dict], name: str, lines: list[str], depth: int) -> None:
+    """Encode a uniform list of dicts as a tabular section."""
+    prefix = _indent(depth)
+    # Collect all keys from the first item to determine field order.
+    all_keys = list(items[0].keys())
+    primitive_fields = [k for k in all_keys if not isinstance(items[0][k], (dict, list))]
+    nested_fields = [k for k in all_keys if isinstance(items[0][k], (dict, list))]
+    # Header with field names (primitive fields only in the column spec).
+    header = f"{prefix}## {name} [{len(items)}]{{{','.join(primitive_fields)}}}"
+    lines.append(header)
+    for i, item in enumerate(items):
+        row_values = [_format_value(item.get(f)) for f in primitive_fields]
+        row_str = "|".join(row_values)
+        if nested_fields:
+            lines.append(f"{prefix}@{i} {row_str}")
+            inner_prefix = _indent(depth + 1)
+            for nk in nested_fields:
+                nv = item.get(nk)
+                if isinstance(nv, list):
+                    _encode_array(nv, nk, lines, depth + 1)
+                elif isinstance(nv, dict):
+                    lines.append(f"{inner_prefix}## {nk}")
+                    _encode_dict(nv, lines, depth + 2)
+        else:
+            lines.append(f"{prefix}{row_str}")
+def _is_uniform_dict_list(items: list) -> bool:
+    """Check whether a list contains uniform dicts (same keys across items).
+    Samples up to the first 5 items. Considers the list uniform if key
+    overlap is at least 70% between consecutive items and the first item.
+    """
+    if not items or not isinstance(items[0], dict):
+        return False
+    sample = items[:5]
+    if not all(isinstance(item, dict) for item in sample):
+        return False
+    if not sample:
+        return False
+    reference_keys = set(sample[0].keys())
+    if not reference_keys:
+        return False
+    for item in sample[1:]:
+        item_keys = set(item.keys())
+        union = reference_keys | item_keys
+        intersection = reference_keys & item_keys
+        if not union or len(intersection) / len(union) < 0.7:
+            return False
+    return True
+def _format_value(value: Any) -> str:
+    """Format a single value for GCF output.
+    None becomes "-". Booleans are lowercased. Numbers are unquoted.
+    Strings containing "|" or newlines are quoted. Everything else is direct.
+    """
+    if value is None:
+        return "-"
+    if isinstance(value, bool):
+        return "true" if value else "false"
+    if isinstance(value, (int, float)):
+        return str(value)
+    s = str(value)
+    if "|" in s or "\n" in s:
+        return f'"{s}"'
+    return s
+def _indent(depth: int) -> str:
+    """Return indentation string for the given depth (2 spaces per level)."""
+    return "  " * depth

{gcf_python-0.1.0 → gcf_python-0.1.1}/src/gcf/session.py RENAMED Viewed

@@ -1,5 +1,7 @@
 """Session-based deduplication for GCF encoding."""
+from __future__ import annotations
 import threading
 from .constants import KIND_ABBREV

{gcf_python-0.1.0 → gcf_python-0.1.1}/src/gcf/types.py RENAMED Viewed

@@ -1,5 +1,7 @@
 """Data types for GCF payloads."""
+from __future__ import annotations
 from dataclasses import dataclass, field

gcf_python-0.1.1/tests/test_generic.py ADDED Viewed

@@ -0,0 +1,170 @@
+"""Tests for GCF generic encoding."""
+from gcf import encode_generic
+def test_encode_flat_tabular_list():
+    """Flat tabular list of dicts produces header with fields and pipe-separated rows."""
+    data = {
+        "employees": [
+            {"name": "Alice", "role": "engineer", "level": 5},
+            {"name": "Bob", "role": "designer", "level": 3},
+            {"name": "Carol", "role": "manager", "level": 7},
+        ],
+    }
+    output = encode_generic(data)
+    assert "## employees [3]{name,role,level}" in output
+    assert "Alice|engineer|5" in output
+    assert "Bob|designer|3" in output
+    assert "Carol|manager|7" in output
+    # Pure flat rows should not have @id prefix.
+    lines = output.strip().splitlines()
+    row_lines = [l for l in lines if "|" in l]
+    for line in row_lines:
+        assert not line.strip().startswith("@")
+def test_encode_nested_dict():
+    """Nested dicts produce ## section headers and indented key=value pairs."""
+    data = {
+        "server": {
+            "host": "localhost",
+            "port": 8080,
+        },
+        "debug": True,
+    }
+    output = encode_generic(data)
+    assert "## server" in output
+    assert "  host=localhost" in output
+    assert "  port=8080" in output
+    assert "debug=true" in output
+def test_encode_mixed_data():
+    """Mixed data with tabular rows containing nested fields uses @id prefix."""
+    data = {
+        "projects": [
+            {
+                "name": "Alpha",
+                "status": "active",
+                "config": {"env": "prod", "region": "us-east"},
+            },
+            {
+                "name": "Beta",
+                "status": "draft",
+                "config": {"env": "staging", "region": "eu-west"},
+            },
+        ],
+    }
+    output = encode_generic(data)
+    # Header lists only primitive fields.
+    assert "## projects [2]{name,status}" in output
+    # Rows with nested data get @id prefix.
+    assert "@0 Alpha|active" in output
+    assert "@1 Beta|draft" in output
+    # Nested config values are indented.
+    assert "## config" in output
+    assert "env=" in output
+    assert "region=" in output
+def test_encode_none_value():
+    """None is encoded as a dash."""
+    data = {"value": None}
+    output = encode_generic(data)
+    assert "value=-" in output
+def test_encode_none_in_tabular():
+    """None values in tabular rows render as dashes."""
+    data = {
+        "items": [
+            {"a": 1, "b": None},
+            {"a": 2, "b": "hello"},
+        ],
+    }
+    output = encode_generic(data)
+    assert "1|-" in output
+    assert "2|hello" in output
+def test_encode_pipe_separators_in_tabular():
+    """Tabular rows use pipe separators between fields."""
+    data = {
+        "rows": [
+            {"x": 10, "y": 20, "z": 30},
+            {"x": 40, "y": 50, "z": 60},
+        ],
+    }
+    output = encode_generic(data)
+    assert "10|20|30" in output
+    assert "40|50|60" in output
+def test_encode_no_repeated_field_names_in_rows():
+    """Field names appear only in the header, not repeated in each row."""
+    data = {
+        "people": [
+            {"name": "Alice", "age": 30},
+            {"name": "Bob", "age": 25},
+        ],
+    }
+    output = encode_generic(data)
+    # Field names appear exactly once (in the header).
+    lines = output.strip().splitlines()
+    header_lines = [l for l in lines if l.strip().startswith("## people")]
+    assert len(header_lines) == 1
+    assert "name" in header_lines[0]
+    assert "age" in header_lines[0]
+    # Data rows do not contain field names.
+    data_lines = [l for l in lines if not l.strip().startswith("##")]
+    for line in data_lines:
+        assert "name=" not in line
+        assert "age=" not in line
+def test_encode_boolean_formatting():
+    """Booleans are lowercased (true/false)."""
+    data = {"enabled": True, "verbose": False}
+    output = encode_generic(data)
+    assert "enabled=true" in output
+    assert "verbose=false" in output
+def test_encode_empty_list():
+    """Empty list produces a header with count zero."""
+    data = {"items": []}
+    output = encode_generic(data)
+    assert "## items [0]" in output
+def test_encode_non_uniform_list():
+    """Non-uniform list items get @N indices without tabular headers."""
+    data = {
+        "things": [
+            {"a": 1},
+            {"completely": "different", "keys": True},
+        ],
+    }
+    output = encode_generic(data)
+    assert "## things [2]" in output
+    assert "@0" in output
+    assert "@1" in output
+def test_encode_primitive_value():
+    """A bare primitive is encoded directly."""
+    assert encode_generic(42) == "42\n"
+    assert encode_generic("hello") == "hello\n"
+def test_encode_string_with_pipe():
+    """Strings containing pipe characters are quoted."""
+    data = {"val": "a|b"}
+    output = encode_generic(data)
+    assert 'val="a|b"' in output