PyPI - gemini-coax - Versions diffs - 0.1.0__tar.gz - Mend

gemini-coax 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

gemini_coax-0.1.0/.github/workflows/ci.yml +25 -0
gemini_coax-0.1.0/.github/workflows/release.yml +57 -0
gemini_coax-0.1.0/.gitignore +28 -0
gemini_coax-0.1.0/LICENSE +21 -0
gemini_coax-0.1.0/PKG-INFO +146 -0
gemini_coax-0.1.0/README.md +111 -0
gemini_coax-0.1.0/pyproject.toml +77 -0
gemini_coax-0.1.0/src/gemini_coax/__init__.py +99 -0
gemini_coax-0.1.0/src/gemini_coax/langchain.py +171 -0
gemini_coax-0.1.0/src/gemini_coax/py.typed +0 -0
gemini_coax-0.1.0/src/gemini_coax/repair.py +321 -0
gemini_coax-0.1.0/src/gemini_coax/schema.py +223 -0
gemini_coax-0.1.0/tests/test_repair.py +57 -0
gemini_coax-0.1.0/tests/test_schema.py +106 -0

gemini_coax-0.1.0/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,25 @@
+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install
+        run: pip install -e ".[dev]"
+      - name: Lint
+        run: ruff check src tests
+      - name: Test
+        run: pytest -q

gemini_coax-0.1.0/.github/workflows/release.yml ADDED Viewed

@@ -0,0 +1,57 @@
+name: Release
+on:
+  push:
+    tags:
+      - "v*"
+jobs:
+  build:
+    name: Build distribution
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Verify tag matches package version
+        run: |
+          TAG="${GITHUB_REF_NAME#v}"
+          PKG=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
+          echo "tag=$TAG  pyproject=$PKG"
+          if [ "$TAG" != "$PKG" ]; then
+            echo "::error::Tag v$TAG does not match pyproject version $PKG"
+            exit 1
+          fi
+      - name: Run tests
+        run: |
+          pip install -e ".[dev]"
+          ruff check src tests
+          pytest -q
+      - name: Build
+        run: |
+          pip install build
+          python -m build
+      - uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
+  publish:
+    name: Publish to PyPI
+    needs: build
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write # OIDC token for Trusted Publishing — no API token needed
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist/
+      - name: Publish
+        uses: pypa/gh-action-pypi-publish@release/v1

gemini_coax-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,28 @@
+# Python
+__pycache__/
+*.py[cod]
+*.egg-info/
+.eggs/
+build/
+dist/
+*.egg
+# Virtual envs
+.venv/
+venv/
+env/
+# Tooling caches
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.coverage
+htmlcov/
+# uv
+uv.lock
+# Editors / OS
+.vscode/
+.idea/
+.DS_Store

gemini_coax-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 mreza0100
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

gemini_coax-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,146 @@
+Metadata-Version: 2.4
+Name: gemini-coax
+Version: 0.1.0
+Summary: Make Google Gemini structured output actually validate against your Pydantic models — fixes the anyOf/enum drop, ignored numeric & length bounds, and degraded array tails.
+Project-URL: Homepage, https://github.com/mreza0100/gemini-coax
+Project-URL: Repository, https://github.com/mreza0100/gemini-coax
+Project-URL: Issues, https://github.com/mreza0100/gemini-coax/issues
+Author: mreza0100
+License: MIT
+License-File: LICENSE
+Keywords: anyof,constrained-decoding,gemini,google-gemini,json-schema,langchain,langchain-google-genai,llm,pydantic,structured-output,validation,vertex-ai
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Typing :: Typed
+Requires-Python: >=3.10
+Requires-Dist: pydantic>=2.0
+Provides-Extra: dev
+Requires-Dist: mypy>=1.10; extra == 'dev'
+Requires-Dist: pytest>=8.0; extra == 'dev'
+Requires-Dist: ruff>=0.6; extra == 'dev'
+Provides-Extra: langchain
+Requires-Dist: langchain-core>=0.3; extra == 'langchain'
+Requires-Dist: langchain-google-genai<5,>=4.2; extra == 'langchain'
+Requires-Dist: tenacity>=9.0; extra == 'langchain'
+Description-Content-Type: text/markdown
+# gemini-coax
+**Make Google Gemini structured output actually validate against your Pydantic models.**
+[![PyPI](https://img.shields.io/pypi/v/gemini-coax.svg)](https://pypi.org/project/gemini-coax/)
+[![Python](https://img.shields.io/pypi/pyversions/gemini-coax.svg)](https://pypi.org/project/gemini-coax/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](./LICENSE)
+Gemini's `response_json_schema` promises structured output, then quietly breaks
+its own promise. It enforces *shape* (types, properties, required) but **silently
+ignores value-level constraints** — so the model hallucinates enum values, blows
+past your numeric bounds, and trails off into half-formed objects at the end of
+long arrays. Pydantic then rejects the *entire* response over one bad field.
+`gemini-coax` coaxes the output back into shape. No retries, no extra LLM calls
+for the common cases — just targeted repair at the validation seam.
+If you've hit any of these, this library is for you:
+- `ValueError: AnyOf is not supported in the response schema for the Gemini API`
+- `Input should be 'a', 'b' or 'c' [type=literal_error]` on a value the schema *defined*
+- A nullable `Literal[...] | None` field where Gemini invents values off-menu
+- `ge`/`le`/`max_length`/`max_items` constraints ignored, failing validation
+- Empty `{}` or truncated objects at the tail of a long list, killing the whole array
+## Install
+```bash
+pip install gemini-coax                  # core — pure, depends only on pydantic
+pip install "gemini-coax[langchain]"     # + the drop-in ChatGoogleGenerativeAI
+```
+## Use it — LangChain (`langchain-google-genai`)
+Swap `ChatGoogleGenerativeAI` for `GeminiSafe`. That's the whole change. Every
+`with_structured_output()` call is now coaxed; no edits in your chains.
+```python
+from typing import Literal
+from pydantic import BaseModel, Field
+from gemini_coax import GeminiSafe          # was: ChatGoogleGenerativeAI
+class Finding(BaseModel):
+    label: Literal["bug", "smell", "nit"] | None   # nullable enum — Gemini drops the enum
+    severity: int = Field(ge=1, le=5)              # bounds Gemini ignores
+class Report(BaseModel):
+    findings: list[Finding]                        # long array → degraded tail
+llm = GeminiSafe(model="gemini-2.5-flash", temperature=0)
+report = llm.with_structured_output(Report).invoke("Review this diff: ...")
+# Validates. The anyOf-enum is stripped before send, out-of-range
+# severities are clamped, and a broken trailing finding is salvaged away.
+```
+It also retries transient transport faults (`ConnectionResetError`, aiohttp
+`ClientOSError`, `ServerDisconnectedError`) that the google-genai SDK leaves
+uncaught — at the single async seam every call funnels through.
+## Use it — raw `google-genai` SDK (no LangChain)
+One call. Hand it the decoded dict and your model:
+```python
+from gemini_coax import coax
+raw = json.loads(response.text)     # whatever Gemini gave you
+report = coax(raw, Report)          # clamp → fill nullables → validate → repair enums → salvage lists
+```
+Or compose the pieces yourself:
+```python
+from gemini_coax import (
+    strip_nullable_anyof,   # rewrite the schema BEFORE you send it
+    clamp_to_constraints,   # clamp ignored numeric / length / array bounds
+    fill_missing_nullables, # inject None for nullables Gemini omitted
+    repair_enums,           # fuzzy-match close-but-wrong enum values
+    salvage_lists,          # drop broken tail entries, keep the valid ones
+)
+schema = strip_nullable_anyof(Report.model_json_schema())   # send THIS to Gemini
+```
+## What it does
+| Gemini misbehavior | gemini-coax response |
+| --- | --- |
+| Drops `enum` inside `anyOf` (nullable `Literal`) → hallucinated values | `strip_nullable_anyof` rewrites the schema to a plain enum + drops it from `required` before send |
+| Ignores `ge/le/gt/lt`, `max_length`, `max_items` | `clamp_to_constraints` clamps raw values to the model's field metadata |
+| Omits a now-optional nullable field | `fill_missing_nullables` injects `None` so re-validation passes |
+| Close-but-wrong enum at the array tail (`"defensiveness"` vs `"defensiveness-tone"`) | `repair_enums` fuzzy-matches it back (zero-cost `difflib`) |
+| Empty `{}` / truncated objects when the token budget runs out | `salvage_lists` validates entries individually, keeps the good ones |
+| Transient transport fault before any HTTP status | `GeminiSafe` retries with exponential backoff + jitter |
+A full-chain retry is 100–300× more expensive than these repairs — and often
+makes things worse. Repair beats re-roll.
+## Design
+Two layers, so the value isn't hostage to any framework's release notes:
+- **Core** (`gemini_coax.schema`, `gemini_coax.repair`, `coax`) — pure functions
+  over `dict` + Pydantic. Only dependency is `pydantic`. Works with the raw SDK,
+  Vertex AI, or anything that hands you a dict.
+- **Adapter** (`gemini_coax.langchain.GeminiSafe`) — the LangChain drop-in.
+  Pulled in only by the `[langchain]` extra; pins `langchain-google-genai>=4.2,<5`.
+## License
+MIT

gemini_coax-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,111 @@
+# gemini-coax
+**Make Google Gemini structured output actually validate against your Pydantic models.**
+[![PyPI](https://img.shields.io/pypi/v/gemini-coax.svg)](https://pypi.org/project/gemini-coax/)
+[![Python](https://img.shields.io/pypi/pyversions/gemini-coax.svg)](https://pypi.org/project/gemini-coax/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](./LICENSE)
+Gemini's `response_json_schema` promises structured output, then quietly breaks
+its own promise. It enforces *shape* (types, properties, required) but **silently
+ignores value-level constraints** — so the model hallucinates enum values, blows
+past your numeric bounds, and trails off into half-formed objects at the end of
+long arrays. Pydantic then rejects the *entire* response over one bad field.
+`gemini-coax` coaxes the output back into shape. No retries, no extra LLM calls
+for the common cases — just targeted repair at the validation seam.
+If you've hit any of these, this library is for you:
+- `ValueError: AnyOf is not supported in the response schema for the Gemini API`
+- `Input should be 'a', 'b' or 'c' [type=literal_error]` on a value the schema *defined*
+- A nullable `Literal[...] | None` field where Gemini invents values off-menu
+- `ge`/`le`/`max_length`/`max_items` constraints ignored, failing validation
+- Empty `{}` or truncated objects at the tail of a long list, killing the whole array
+## Install
+```bash
+pip install gemini-coax                  # core — pure, depends only on pydantic
+pip install "gemini-coax[langchain]"     # + the drop-in ChatGoogleGenerativeAI
+```
+## Use it — LangChain (`langchain-google-genai`)
+Swap `ChatGoogleGenerativeAI` for `GeminiSafe`. That's the whole change. Every
+`with_structured_output()` call is now coaxed; no edits in your chains.
+```python
+from typing import Literal
+from pydantic import BaseModel, Field
+from gemini_coax import GeminiSafe          # was: ChatGoogleGenerativeAI
+class Finding(BaseModel):
+    label: Literal["bug", "smell", "nit"] | None   # nullable enum — Gemini drops the enum
+    severity: int = Field(ge=1, le=5)              # bounds Gemini ignores
+class Report(BaseModel):
+    findings: list[Finding]                        # long array → degraded tail
+llm = GeminiSafe(model="gemini-2.5-flash", temperature=0)
+report = llm.with_structured_output(Report).invoke("Review this diff: ...")
+# Validates. The anyOf-enum is stripped before send, out-of-range
+# severities are clamped, and a broken trailing finding is salvaged away.
+```
+It also retries transient transport faults (`ConnectionResetError`, aiohttp
+`ClientOSError`, `ServerDisconnectedError`) that the google-genai SDK leaves
+uncaught — at the single async seam every call funnels through.
+## Use it — raw `google-genai` SDK (no LangChain)
+One call. Hand it the decoded dict and your model:
+```python
+from gemini_coax import coax
+raw = json.loads(response.text)     # whatever Gemini gave you
+report = coax(raw, Report)          # clamp → fill nullables → validate → repair enums → salvage lists
+```
+Or compose the pieces yourself:
+```python
+from gemini_coax import (
+    strip_nullable_anyof,   # rewrite the schema BEFORE you send it
+    clamp_to_constraints,   # clamp ignored numeric / length / array bounds
+    fill_missing_nullables, # inject None for nullables Gemini omitted
+    repair_enums,           # fuzzy-match close-but-wrong enum values
+    salvage_lists,          # drop broken tail entries, keep the valid ones
+)
+schema = strip_nullable_anyof(Report.model_json_schema())   # send THIS to Gemini
+```
+## What it does
+| Gemini misbehavior | gemini-coax response |
+| --- | --- |
+| Drops `enum` inside `anyOf` (nullable `Literal`) → hallucinated values | `strip_nullable_anyof` rewrites the schema to a plain enum + drops it from `required` before send |
+| Ignores `ge/le/gt/lt`, `max_length`, `max_items` | `clamp_to_constraints` clamps raw values to the model's field metadata |
+| Omits a now-optional nullable field | `fill_missing_nullables` injects `None` so re-validation passes |
+| Close-but-wrong enum at the array tail (`"defensiveness"` vs `"defensiveness-tone"`) | `repair_enums` fuzzy-matches it back (zero-cost `difflib`) |
+| Empty `{}` / truncated objects when the token budget runs out | `salvage_lists` validates entries individually, keeps the good ones |
+| Transient transport fault before any HTTP status | `GeminiSafe` retries with exponential backoff + jitter |
+A full-chain retry is 100–300× more expensive than these repairs — and often
+makes things worse. Repair beats re-roll.
+## Design
+Two layers, so the value isn't hostage to any framework's release notes:
+- **Core** (`gemini_coax.schema`, `gemini_coax.repair`, `coax`) — pure functions
+  over `dict` + Pydantic. Only dependency is `pydantic`. Works with the raw SDK,
+  Vertex AI, or anything that hands you a dict.
+- **Adapter** (`gemini_coax.langchain.GeminiSafe`) — the LangChain drop-in.
+  Pulled in only by the `[langchain]` extra; pins `langchain-google-genai>=4.2,<5`.
+## License
+MIT

gemini_coax-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,77 @@
+[project]
+name = "gemini-coax"
+version = "0.1.0"
+description = "Make Google Gemini structured output actually validate against your Pydantic models — fixes the anyOf/enum drop, ignored numeric & length bounds, and degraded array tails."
+readme = "README.md"
+requires-python = ">=3.10"
+license = { text = "MIT" }
+authors = [{ name = "mreza0100" }]
+keywords = [
+    "gemini",
+    "google-gemini",
+    "structured-output",
+    "pydantic",
+    "json-schema",
+    "langchain",
+    "langchain-google-genai",
+    "vertex-ai",
+    "llm",
+    "anyof",
+    "constrained-decoding",
+    "validation",
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Typing :: Typed",
+]
+dependencies = [
+    "pydantic>=2.0",
+]
+[project.optional-dependencies]
+langchain = [
+    "langchain-google-genai>=4.2,<5",
+    "langchain-core>=0.3",
+    "tenacity>=9.0",
+]
+dev = [
+    "pytest>=8.0",
+    "ruff>=0.6",
+    "mypy>=1.10",
+]
+[project.urls]
+Homepage = "https://github.com/mreza0100/gemini-coax"
+Repository = "https://github.com/mreza0100/gemini-coax"
+Issues = "https://github.com/mreza0100/gemini-coax/issues"
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["src/gemini_coax"]
+[tool.ruff]
+line-length = 100
+target-version = "py310"
+[tool.ruff.lint]
+select = ["E", "F", "I", "UP", "B", "SIM"]
+[tool.mypy]
+python_version = "3.10"
+strict = true
+[tool.pytest.ini_options]
+testpaths = ["tests"]

gemini_coax-0.1.0/src/gemini_coax/__init__.py ADDED Viewed

@@ -0,0 +1,99 @@
+"""gemini-coax — make Gemini structured output actually validate.
+Gemini's ``response_json_schema`` enforces structure but silently ignores
+``anyOf`` enums, numeric/length/array bounds, and degrades at the tail of long
+arrays — so Pydantic rejects otherwise-good output. ``gemini-coax`` coaxes it
+into shape.
+Two layers:
+* **Core** (this module + :mod:`gemini_coax.schema` / :mod:`gemini_coax.repair`)
+  — pure functions over ``dict`` + Pydantic, no provider SDK. Use :func:`coax`
+  with the raw ``google-genai`` SDK.
+* **Adapter** (:mod:`gemini_coax.langchain`, optional ``[langchain]`` extra) —
+  :class:`~gemini_coax.langchain.GeminiSafe`, a drop-in ``ChatGoogleGenerativeAI``.
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+from pydantic import BaseModel, ValidationError
+from .repair import repair_enums, salvage_list, salvage_lists
+from .schema import clamp_to_constraints, fill_missing_nullables, strip_nullable_anyof
+if TYPE_CHECKING:
+    # Static binding for type checkers: at runtime ``GeminiSafe`` is supplied
+    # lazily by ``__getattr__`` below (so the core import stays free of
+    # langchain), but mypy/pyright cannot follow a runtime ``__getattr__`` and
+    # would type ``from gemini_coax import GeminiSafe`` as ``Any``. This
+    # re-export (``as GeminiSafe``) gives them the concrete class without
+    # importing langchain at runtime.
+    from .langchain import GeminiSafe as GeminiSafe
+__version__ = "0.1.0"
+__all__ = [
+    "coax",
+    "strip_nullable_anyof",
+    "clamp_to_constraints",
+    "fill_missing_nullables",
+    "repair_enums",
+    "salvage_list",
+    "salvage_lists",
+    "__version__",
+]
+def __getattr__(name: str) -> Any:
+    """Lazily expose the optional LangChain adapter at the top level.
+    ``from gemini_coax import GeminiSafe`` works without forcing the core import
+    to depend on ``langchain`` — the adapter (and its ``langchain`` requirement)
+    is only imported the moment ``GeminiSafe`` is actually accessed. With the
+    ``[langchain]`` extra absent, that access raises the adapter's helpful
+    ImportError; plain ``import gemini_coax`` stays clean either way.
+    """
+    if name == "GeminiSafe":
+        from .langchain import GeminiSafe
+        return GeminiSafe
+    msg = f"module {__name__!r} has no attribute {name!r}"
+    raise AttributeError(msg)
+def coax(raw: dict[str, Any], model: type[BaseModel]) -> BaseModel:
+    """Coax a raw Gemini dict into a validated model instance.
+    Runs the full framework-free pipeline: clamp out-of-range values, fill
+    omitted nullables, then validate. If validation still fails, repair wrong
+    enum values, then salvage broken list tails. Raises the original
+    ``ValidationError`` only if nothing could be recovered.
+    This is the one-call entry point for the raw ``google-genai`` SDK. LangChain
+    users should use :class:`gemini_coax.langchain.GeminiSafe` instead, which
+    applies the same pipeline transparently inside ``with_structured_output``.
+    Args:
+        raw: The decoded JSON dict Gemini returned.
+        model: The Pydantic model you expected.
+    Returns:
+        A validated instance of ``model``.
+    Raises:
+        ValidationError: If the output could not be coaxed into the schema.
+    """
+    clamped = clamp_to_constraints(raw, model)
+    clamped = fill_missing_nullables(clamped, model)
+    try:
+        return model.model_validate(clamped)
+    except ValidationError as error:
+        repaired = repair_enums(error, clamped, model)
+        if repaired is not None:
+            return repaired
+        salvaged = salvage_lists(clamped, model)
+        if salvaged is not None:
+            return salvaged
+        raise