PyPI - phylax - Versions diffs - 1.0.0__tar.gz - Mend

phylax 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

phylax-1.0.0/LICENSE +21 -0
phylax-1.0.0/PKG-INFO +196 -0
phylax-1.0.0/README.md +156 -0
phylax-1.0.0/phylax/__init__.py +44 -0
phylax-1.0.0/phylax/_internal/__init__.py +6 -0
phylax-1.0.0/phylax/_internal/adapters/__init__.py +1 -0
phylax-1.0.0/phylax/_internal/adapters/gemini.py +154 -0
phylax-1.0.0/phylax/_internal/adapters/llama.py +157 -0
phylax-1.0.0/phylax/_internal/adapters/openai.py +149 -0
phylax-1.0.0/phylax/_internal/capture.py +276 -0
phylax-1.0.0/phylax/_internal/context.py +111 -0
phylax-1.0.0/phylax/_internal/decorator.py +252 -0
phylax-1.0.0/phylax/_internal/expectations/__init__.py +1 -0
phylax-1.0.0/phylax/_internal/expectations/evaluator.py +134 -0
phylax-1.0.0/phylax/_internal/expectations/rules.py +216 -0
phylax-1.0.0/phylax/_internal/graph.py +831 -0
phylax-1.0.0/phylax/_internal/schema.py +148 -0
phylax-1.0.0/phylax/cli/__init__.py +1 -0
phylax-1.0.0/phylax/cli/main.py +515 -0
phylax-1.0.0/phylax/server/__init__.py +1 -0
phylax-1.0.0/phylax/server/main.py +76 -0
phylax-1.0.0/phylax/server/routes/__init__.py +1 -0
phylax-1.0.0/phylax/server/routes/chat.py +172 -0
phylax-1.0.0/phylax/server/routes/replay.py +247 -0
phylax-1.0.0/phylax/server/routes/traces.py +300 -0
phylax-1.0.0/phylax/server/storage/__init__.py +1 -0
phylax-1.0.0/phylax/server/storage/files.py +392 -0
phylax-1.0.0/phylax/server/storage/sqlite.py +206 -0
phylax-1.0.0/phylax.egg-info/PKG-INFO +196 -0
phylax-1.0.0/phylax.egg-info/SOURCES.txt +38 -0
phylax-1.0.0/phylax.egg-info/dependency_links.txt +1 -0
phylax-1.0.0/phylax.egg-info/entry_points.txt +2 -0
phylax-1.0.0/phylax.egg-info/requires.txt +21 -0
phylax-1.0.0/phylax.egg-info/top_level.txt +1 -0
phylax-1.0.0/pyproject.toml +62 -0
phylax-1.0.0/setup.cfg +4 -0
phylax-1.0.0/tests/test_context.py +121 -0
phylax-1.0.0/tests/test_contract.py +269 -0
phylax-1.0.0/tests/test_expectations.py +196 -0
phylax-1.0.0/tests/test_schema.py +211 -0

phylax-1.0.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Mohit Manglani
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

phylax-1.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,196 @@
+Metadata-Version: 2.4
+Name: phylax
+Version: 1.0.0
+Summary: Deterministic regression enforcement for LLM systems.
+Author: Phylax Team
+License: MIT
+Project-URL: Homepage, https://github.com/xXMohitXx/Phylax
+Project-URL: Documentation, https://github.com/xXMohitXx/Phylax#readme
+Project-URL: Repository, https://github.com/xXMohitXx/Phylax
+Keywords: llm,testing,regression,ci,deterministic,tracing
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Software Development :: Testing
+Classifier: Topic :: Software Development :: Quality Assurance
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: pydantic>=2.5.0
+Requires-Dist: pyyaml>=6.0
+Provides-Extra: server
+Requires-Dist: fastapi>=0.109.0; extra == "server"
+Requires-Dist: uvicorn[standard]>=0.27.0; extra == "server"
+Provides-Extra: openai
+Requires-Dist: openai>=1.0.0; extra == "openai"
+Provides-Extra: google
+Requires-Dist: google-generativeai>=0.8.0; extra == "google"
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0.0; extra == "dev"
+Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
+Requires-Dist: httpx>=0.26.0; extra == "dev"
+Requires-Dist: requests>=2.31.0; extra == "dev"
+Provides-Extra: all
+Requires-Dist: phylax[google,openai,server]; extra == "all"
+Dynamic: license-file
+<p align="center">
+  <img src="https://raw.githubusercontent.com/xXMohitXx/Phylax/main/assets/logo/phylax_logo.png" alt="Phylax Logo" width="200">
+</p>
+# Phylax
+**Deterministic regression enforcement for LLM systems.**
+[![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
+[![PyPI version](https://img.shields.io/pypi/v/phylax.svg)](https://pypi.org/project/phylax/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+---
+## The Problem
+LLM outputs change unexpectedly. Same prompt, different model version → different behavior.
+Without Phylax, you discover this **in production**.
+## Installation
+```bash
+pip install phylax
+```
+For server/UI support:
+```bash
+pip install phylax[server]
+```
+For all LLM providers:
+```bash
+pip install phylax[all]
+```
+## Quick Start
+```python
+from phylax import trace, expect, execution
+@trace(provider="gemini")
+@expect(must_include=["refund"], max_latency_ms=1500)
+def customer_reply(query):
+    return llm.generate(query)
+# Track multi-step agent flows
+with execution("customer-support-flow"):
+    result = customer_reply("I want a refund")
+```
+```bash
+# Mark a known-good response as baseline
+phylax bless <trace_id>
+# In CI: fail if output regresses
+phylax check  # exits 1 on failure
+```
+That's it. Your CI now blocks LLM regressions.
+---
+## What Phylax is NOT
+- ❌ **Not monitoring** — no metrics, no dashboards
+- ❌ **Not observability** — no traces-to-cloud, no analytics
+- ❌ **Not AI judgment** — rules are deterministic, not LLM-based
+- ❌ **Not cloud-dependent** — runs entirely local
+- ❌ **Not prompt engineering** — tests outputs, not prompts
+Phylax is a **test framework**. It tells you when LLM behavior changes.
+---
+## CI Integration
+```yaml
+# .github/workflows/phylax.yml
+- run: phylax check
+  env:
+    GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+```
+**Exit codes:**
+- `0` — All golden traces pass
+- `1` — Regression detected
+---
+## Expectations (Deterministic Rules)
+```python
+@expect(
+    must_include=["word"],       # Required content
+    must_not_include=["sorry"],  # Forbidden content
+    max_latency_ms=2000,         # Performance gate
+    min_tokens=10                # Minimum length
+)
+```
+All rules are deterministic. No AI judgment. No ambiguity.
+---
+## Commands
+| Command | What it does |
+|---------|--------------|
+| `phylax init` | Initialize config |
+| `phylax server` | Start API server |
+| `phylax list` | List traces |
+| `phylax list --failed` | Show only failed traces |
+| `phylax show <id>` | Show trace details |
+| `phylax replay <id>` | Re-run a trace |
+| `phylax bless <id>` | Mark as golden baseline |
+| `phylax check` | CI regression check |
+---
+## Features
+| Feature | Description |
+|---------|-------------|
+| **Trace Capture** | Record every LLM call automatically |
+| **Expectations** | Define PASS/FAIL rules (4 deterministic rules) |
+| **Golden Traces** | Baseline comparisons with hash verification |
+| **CI Integration** | `phylax check` exits 1 on regression |
+| **Execution Graphs** | Visualize multi-step agent workflows |
+| **Forensics Mode** | Debug failures with guided investigation |
+---
+## Stability Guarantee
+Phylax v1.0.0 is **API-frozen**:
+- No breaking changes in v1.x
+- `trace`, `expect`, `execution` are stable
+- Exit codes are stable
+- Schema is stable
+See [docs/contract.md](https://github.com/xXMohitXx/Phylax/blob/main/docs/contract.md) for full guarantees.
+---
+## Documentation
+- [Quickstart](https://github.com/xXMohitXx/Phylax/blob/main/docs/quickstart.md)
+- [Mental Model](https://github.com/xXMohitXx/Phylax/blob/main/docs/mental-model.md)
+- [API Contract](https://github.com/xXMohitXx/Phylax/blob/main/docs/contract.md)
+---
+## License
+MIT License

phylax-1.0.0/README.md ADDED Viewed

@@ -0,0 +1,156 @@
+<p align="center">
+  <img src="https://raw.githubusercontent.com/xXMohitXx/Phylax/main/assets/logo/phylax_logo.png" alt="Phylax Logo" width="200">
+</p>
+# Phylax
+**Deterministic regression enforcement for LLM systems.**
+[![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
+[![PyPI version](https://img.shields.io/pypi/v/phylax.svg)](https://pypi.org/project/phylax/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+---
+## The Problem
+LLM outputs change unexpectedly. Same prompt, different model version → different behavior.
+Without Phylax, you discover this **in production**.
+## Installation
+```bash
+pip install phylax
+```
+For server/UI support:
+```bash
+pip install phylax[server]
+```
+For all LLM providers:
+```bash
+pip install phylax[all]
+```
+## Quick Start
+```python
+from phylax import trace, expect, execution
+@trace(provider="gemini")
+@expect(must_include=["refund"], max_latency_ms=1500)
+def customer_reply(query):
+    return llm.generate(query)
+# Track multi-step agent flows
+with execution("customer-support-flow"):
+    result = customer_reply("I want a refund")
+```
+```bash
+# Mark a known-good response as baseline
+phylax bless <trace_id>
+# In CI: fail if output regresses
+phylax check  # exits 1 on failure
+```
+That's it. Your CI now blocks LLM regressions.
+---
+## What Phylax is NOT
+- ❌ **Not monitoring** — no metrics, no dashboards
+- ❌ **Not observability** — no traces-to-cloud, no analytics
+- ❌ **Not AI judgment** — rules are deterministic, not LLM-based
+- ❌ **Not cloud-dependent** — runs entirely local
+- ❌ **Not prompt engineering** — tests outputs, not prompts
+Phylax is a **test framework**. It tells you when LLM behavior changes.
+---
+## CI Integration
+```yaml
+# .github/workflows/phylax.yml
+- run: phylax check
+  env:
+    GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+```
+**Exit codes:**
+- `0` — All golden traces pass
+- `1` — Regression detected
+---
+## Expectations (Deterministic Rules)
+```python
+@expect(
+    must_include=["word"],       # Required content
+    must_not_include=["sorry"],  # Forbidden content
+    max_latency_ms=2000,         # Performance gate
+    min_tokens=10                # Minimum length
+)
+```
+All rules are deterministic. No AI judgment. No ambiguity.
+---
+## Commands
+| Command | What it does |
+|---------|--------------|
+| `phylax init` | Initialize config |
+| `phylax server` | Start API server |
+| `phylax list` | List traces |
+| `phylax list --failed` | Show only failed traces |
+| `phylax show <id>` | Show trace details |
+| `phylax replay <id>` | Re-run a trace |
+| `phylax bless <id>` | Mark as golden baseline |
+| `phylax check` | CI regression check |
+---
+## Features
+| Feature | Description |
+|---------|-------------|
+| **Trace Capture** | Record every LLM call automatically |
+| **Expectations** | Define PASS/FAIL rules (4 deterministic rules) |
+| **Golden Traces** | Baseline comparisons with hash verification |
+| **CI Integration** | `phylax check` exits 1 on regression |
+| **Execution Graphs** | Visualize multi-step agent workflows |
+| **Forensics Mode** | Debug failures with guided investigation |
+---
+## Stability Guarantee
+Phylax v1.0.0 is **API-frozen**:
+- No breaking changes in v1.x
+- `trace`, `expect`, `execution` are stable
+- Exit codes are stable
+- Schema is stable
+See [docs/contract.md](https://github.com/xXMohitXx/Phylax/blob/main/docs/contract.md) for full guarantees.
+---
+## Documentation
+- [Quickstart](https://github.com/xXMohitXx/Phylax/blob/main/docs/quickstart.md)
+- [Mental Model](https://github.com/xXMohitXx/Phylax/blob/main/docs/mental-model.md)
+- [API Contract](https://github.com/xXMohitXx/Phylax/blob/main/docs/contract.md)
+---
+## License
+MIT License

phylax-1.0.0/phylax/__init__.py ADDED Viewed

@@ -0,0 +1,44 @@
+"""
+Phylax - Deterministic regression enforcement for LLM systems.
+Public API:
+    trace       - Decorator to trace LLM calls
+    expect      - Decorator to add expectations
+    execution   - Context manager for grouping traces
+    Trace       - Trace data model
+    Verdict     - Verdict enum (PASS, FAIL, TAINTED)
+"""
+from phylax._internal.schema import (
+    Trace,
+    TraceRequest,
+    TraceResponse,
+    TraceRuntime,
+    Verdict,
+)
+from phylax._internal.decorator import trace, expect
+from phylax._internal.context import execution
+from phylax._internal.graph import ExecutionGraph, NodeRole, GraphStage, GraphDiff, NodeDiff
+__version__ = "1.0.0"
+__all__ = [
+    # Core decorators
+    "trace",
+    "expect",
+    # Context manager
+    "execution",
+    # Data models
+    "Trace",
+    "TraceRequest",
+    "TraceResponse",
+    "TraceRuntime",
+    "Verdict",
+    # Graph (advanced)
+    "ExecutionGraph",
+    "NodeRole",
+    "GraphStage",
+    "GraphDiff",
+    "NodeDiff",
+    # Version
+    "__version__",
+]

phylax-1.0.0/phylax/_internal/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""
+Phylax internal modules.
+These are implementation details and should not be imported directly.
+Use the public API from `phylax` instead.
+"""

phylax-1.0.0/phylax/_internal/adapters/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Phylax internal adapters for LLM providers."""

phylax-1.0.0/phylax/_internal/adapters/gemini.py ADDED Viewed

@@ -0,0 +1,154 @@
+"""
+Gemini Adapter
+Provides integration with Google's Gemini API.
+"""
+from typing import Any, Optional
+from phylax._internal.capture import CaptureLayer, get_capture_layer
+from phylax._internal.schema import Trace
+class GeminiAdapter:
+    """
+    Adapter for Google Gemini API.
+    Usage:
+        adapter = GeminiAdapter(api_key="your-key")
+        response = adapter.chat_completion(
+            model="gemini-2.5-flash",
+            messages=[{"role": "user", "content": "Hello!"}]
+        )
+    """
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        capture_layer: Optional[CaptureLayer] = None,
+    ):
+        """
+        Initialize the Gemini adapter.
+        Args:
+            api_key: Optional API key (uses GOOGLE_API_KEY env var if not provided)
+            capture_layer: Optional custom capture layer
+        """
+        self.api_key = api_key
+        self.capture_layer = capture_layer or get_capture_layer()
+        self._client = None
+    def _get_client(self, model: str):
+        """Get or create the Gemini client."""
+        try:
+            import google.generativeai as genai
+            if self.api_key:
+                genai.configure(api_key=self.api_key)
+            return genai.GenerativeModel(model)
+        except ImportError:
+            raise ImportError(
+                "google-generativeai package not installed. "
+                "Install with: pip install google-generativeai"
+            )
+    def chat_completion(
+        self,
+        model: str = "gemini-2.5-flash",
+        messages: list[dict[str, str]] = None,
+        temperature: float = 0.7,
+        max_tokens: int = 256,
+        **kwargs,
+    ) -> tuple[Any, Trace]:
+        """
+        Create a chat completion with automatic tracing.
+        Args:
+            model: The model to use (e.g., "gemini-2.5-flash")
+            messages: List of messages with role and content
+            temperature: Sampling temperature
+            max_tokens: Maximum tokens to generate
+            **kwargs: Additional parameters
+        Returns:
+            Tuple of (Gemini response, Trace)
+        """
+        messages = messages or []
+        parameters = {
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+            **kwargs,
+        }
+        def make_call():
+            client = self._get_client(model)
+            # Convert messages to Gemini format
+            # Gemini uses a different format - combine into a single prompt or use chat
+            contents = []
+            for msg in messages:
+                role = msg.get("role", "user")
+                content = msg.get("content", "")
+                # Map roles to Gemini format
+                if role == "system":
+                    # Prepend system message to first user message
+                    contents.append({"role": "user", "parts": [content]})
+                elif role == "assistant":
+                    contents.append({"role": "model", "parts": [content]})
+                else:
+                    contents.append({"role": "user", "parts": [content]})
+            # Create generation config
+            generation_config = {
+                "temperature": temperature,
+                "max_output_tokens": max_tokens,
+            }
+            # Make the call
+            response = client.generate_content(
+                contents,
+                generation_config=generation_config,
+            )
+            return response
+        response, trace = self.capture_layer.capture(
+            provider="gemini",
+            model=model,
+            messages=messages,
+            parameters=parameters,
+            call_fn=make_call,
+        )
+        return response, trace
+    def generate(
+        self,
+        prompt: str,
+        model: str = "gemini-2.5-flash",
+        temperature: float = 0.7,
+        max_tokens: int = 256,
+        **kwargs,
+    ) -> tuple[Any, Trace]:
+        """
+        Simple text generation with a prompt.
+        Args:
+            prompt: The prompt text
+            model: The model to use
+            temperature: Sampling temperature
+            max_tokens: Maximum tokens
+        Returns:
+            Tuple of (response, Trace)
+        """
+        messages = [{"role": "user", "content": prompt}]
+        return self.chat_completion(
+            model=model,
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            **kwargs,
+        )