phylax 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. phylax-1.0.0/LICENSE +21 -0
  2. phylax-1.0.0/PKG-INFO +196 -0
  3. phylax-1.0.0/README.md +156 -0
  4. phylax-1.0.0/phylax/__init__.py +44 -0
  5. phylax-1.0.0/phylax/_internal/__init__.py +6 -0
  6. phylax-1.0.0/phylax/_internal/adapters/__init__.py +1 -0
  7. phylax-1.0.0/phylax/_internal/adapters/gemini.py +154 -0
  8. phylax-1.0.0/phylax/_internal/adapters/llama.py +157 -0
  9. phylax-1.0.0/phylax/_internal/adapters/openai.py +149 -0
  10. phylax-1.0.0/phylax/_internal/capture.py +276 -0
  11. phylax-1.0.0/phylax/_internal/context.py +111 -0
  12. phylax-1.0.0/phylax/_internal/decorator.py +252 -0
  13. phylax-1.0.0/phylax/_internal/expectations/__init__.py +1 -0
  14. phylax-1.0.0/phylax/_internal/expectations/evaluator.py +134 -0
  15. phylax-1.0.0/phylax/_internal/expectations/rules.py +216 -0
  16. phylax-1.0.0/phylax/_internal/graph.py +831 -0
  17. phylax-1.0.0/phylax/_internal/schema.py +148 -0
  18. phylax-1.0.0/phylax/cli/__init__.py +1 -0
  19. phylax-1.0.0/phylax/cli/main.py +515 -0
  20. phylax-1.0.0/phylax/server/__init__.py +1 -0
  21. phylax-1.0.0/phylax/server/main.py +76 -0
  22. phylax-1.0.0/phylax/server/routes/__init__.py +1 -0
  23. phylax-1.0.0/phylax/server/routes/chat.py +172 -0
  24. phylax-1.0.0/phylax/server/routes/replay.py +247 -0
  25. phylax-1.0.0/phylax/server/routes/traces.py +300 -0
  26. phylax-1.0.0/phylax/server/storage/__init__.py +1 -0
  27. phylax-1.0.0/phylax/server/storage/files.py +392 -0
  28. phylax-1.0.0/phylax/server/storage/sqlite.py +206 -0
  29. phylax-1.0.0/phylax.egg-info/PKG-INFO +196 -0
  30. phylax-1.0.0/phylax.egg-info/SOURCES.txt +38 -0
  31. phylax-1.0.0/phylax.egg-info/dependency_links.txt +1 -0
  32. phylax-1.0.0/phylax.egg-info/entry_points.txt +2 -0
  33. phylax-1.0.0/phylax.egg-info/requires.txt +21 -0
  34. phylax-1.0.0/phylax.egg-info/top_level.txt +1 -0
  35. phylax-1.0.0/pyproject.toml +62 -0
  36. phylax-1.0.0/setup.cfg +4 -0
  37. phylax-1.0.0/tests/test_context.py +121 -0
  38. phylax-1.0.0/tests/test_contract.py +269 -0
  39. phylax-1.0.0/tests/test_expectations.py +196 -0
  40. phylax-1.0.0/tests/test_schema.py +211 -0
phylax-1.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mohit Manglani
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
phylax-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,196 @@
1
+ Metadata-Version: 2.4
2
+ Name: phylax
3
+ Version: 1.0.0
4
+ Summary: Deterministic regression enforcement for LLM systems.
5
+ Author: Phylax Team
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/xXMohitXx/Phylax
8
+ Project-URL: Documentation, https://github.com/xXMohitXx/Phylax#readme
9
+ Project-URL: Repository, https://github.com/xXMohitXx/Phylax
10
+ Keywords: llm,testing,regression,ci,deterministic,tracing
11
+ Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Software Development :: Testing
19
+ Classifier: Topic :: Software Development :: Quality Assurance
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: pydantic>=2.5.0
24
+ Requires-Dist: pyyaml>=6.0
25
+ Provides-Extra: server
26
+ Requires-Dist: fastapi>=0.109.0; extra == "server"
27
+ Requires-Dist: uvicorn[standard]>=0.27.0; extra == "server"
28
+ Provides-Extra: openai
29
+ Requires-Dist: openai>=1.0.0; extra == "openai"
30
+ Provides-Extra: google
31
+ Requires-Dist: google-generativeai>=0.8.0; extra == "google"
32
+ Provides-Extra: dev
33
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
34
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
35
+ Requires-Dist: httpx>=0.26.0; extra == "dev"
36
+ Requires-Dist: requests>=2.31.0; extra == "dev"
37
+ Provides-Extra: all
38
+ Requires-Dist: phylax[google,openai,server]; extra == "all"
39
+ Dynamic: license-file
40
+
41
+ <p align="center">
42
+ <img src="https://raw.githubusercontent.com/xXMohitXx/Phylax/main/assets/logo/phylax_logo.png" alt="Phylax Logo" width="200">
43
+ </p>
44
+
45
+ # Phylax
46
+
47
+ **Deterministic regression enforcement for LLM systems.**
48
+
49
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
50
+ [![PyPI version](https://img.shields.io/pypi/v/phylax.svg)](https://pypi.org/project/phylax/)
51
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
52
+
53
+ ---
54
+
55
+ ## The Problem
56
+
57
+ LLM outputs change unexpectedly. Same prompt, different model version → different behavior.
58
+ Without Phylax, you discover this **in production**.
59
+
60
+ ## Installation
61
+
62
+ ```bash
63
+ pip install phylax
64
+ ```
65
+
66
+ For server/UI support:
67
+ ```bash
68
+ pip install phylax[server]
69
+ ```
70
+
71
+ For all LLM providers:
72
+ ```bash
73
+ pip install phylax[all]
74
+ ```
75
+
76
+ ## Quick Start
77
+
78
+ ```python
79
+ from phylax import trace, expect, execution
80
+
81
+ @trace(provider="gemini")
82
+ @expect(must_include=["refund"], max_latency_ms=1500)
83
+ def customer_reply(query):
84
+ return llm.generate(query)
85
+
86
+ # Track multi-step agent flows
87
+ with execution("customer-support-flow"):
88
+ result = customer_reply("I want a refund")
89
+ ```
90
+
91
+ ```bash
92
+ # Mark a known-good response as baseline
93
+ phylax bless <trace_id>
94
+
95
+ # In CI: fail if output regresses
96
+ phylax check # exits 1 on failure
97
+ ```
98
+
99
+ That's it. Your CI now blocks LLM regressions.
100
+
101
+ ---
102
+
103
+ ## What Phylax is NOT
104
+
105
+ - ❌ **Not monitoring** — no metrics, no dashboards
106
+ - ❌ **Not observability** — no traces-to-cloud, no analytics
107
+ - ❌ **Not AI judgment** — rules are deterministic, not LLM-based
108
+ - ❌ **Not cloud-dependent** — runs entirely local
109
+ - ❌ **Not prompt engineering** — tests outputs, not prompts
110
+
111
+ Phylax is a **test framework**. It tells you when LLM behavior changes.
112
+
113
+ ---
114
+
115
+ ## CI Integration
116
+
117
+ ```yaml
118
+ # .github/workflows/phylax.yml
119
+ - run: phylax check
120
+ env:
121
+ GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
122
+ ```
123
+
124
+ **Exit codes:**
125
+ - `0` — All golden traces pass
126
+ - `1` — Regression detected
127
+
128
+ ---
129
+
130
+ ## Expectations (Deterministic Rules)
131
+
132
+ ```python
133
+ @expect(
134
+ must_include=["word"], # Required content
135
+ must_not_include=["sorry"], # Forbidden content
136
+ max_latency_ms=2000, # Performance gate
137
+ min_tokens=10 # Minimum length
138
+ )
139
+ ```
140
+
141
+ All rules are deterministic. No AI judgment. No ambiguity.
142
+
143
+ ---
144
+
145
+ ## Commands
146
+
147
+ | Command | What it does |
148
+ |---------|--------------|
149
+ | `phylax init` | Initialize config |
150
+ | `phylax server` | Start API server |
151
+ | `phylax list` | List traces |
152
+ | `phylax list --failed` | Show only failed traces |
153
+ | `phylax show <id>` | Show trace details |
154
+ | `phylax replay <id>` | Re-run a trace |
155
+ | `phylax bless <id>` | Mark as golden baseline |
156
+ | `phylax check` | CI regression check |
157
+
158
+ ---
159
+
160
+ ## Features
161
+
162
+ | Feature | Description |
163
+ |---------|-------------|
164
+ | **Trace Capture** | Record every LLM call automatically |
165
+ | **Expectations** | Define PASS/FAIL rules (4 deterministic rules) |
166
+ | **Golden Traces** | Baseline comparisons with hash verification |
167
+ | **CI Integration** | `phylax check` exits 1 on regression |
168
+ | **Execution Graphs** | Visualize multi-step agent workflows |
169
+ | **Forensics Mode** | Debug failures with guided investigation |
170
+
171
+ ---
172
+
173
+ ## Stability Guarantee
174
+
175
+ Phylax v1.0.0 is **API-frozen**:
176
+
177
+ - No breaking changes in v1.x
178
+ - `trace`, `expect`, `execution` are stable
179
+ - Exit codes are stable
180
+ - Schema is stable
181
+
182
+ See [docs/contract.md](https://github.com/xXMohitXx/Phylax/blob/main/docs/contract.md) for full guarantees.
183
+
184
+ ---
185
+
186
+ ## Documentation
187
+
188
+ - [Quickstart](https://github.com/xXMohitXx/Phylax/blob/main/docs/quickstart.md)
189
+ - [Mental Model](https://github.com/xXMohitXx/Phylax/blob/main/docs/mental-model.md)
190
+ - [API Contract](https://github.com/xXMohitXx/Phylax/blob/main/docs/contract.md)
191
+
192
+ ---
193
+
194
+ ## License
195
+
196
+ MIT License
phylax-1.0.0/README.md ADDED
@@ -0,0 +1,156 @@
1
+ <p align="center">
2
+ <img src="https://raw.githubusercontent.com/xXMohitXx/Phylax/main/assets/logo/phylax_logo.png" alt="Phylax Logo" width="200">
3
+ </p>
4
+
5
+ # Phylax
6
+
7
+ **Deterministic regression enforcement for LLM systems.**
8
+
9
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
10
+ [![PyPI version](https://img.shields.io/pypi/v/phylax.svg)](https://pypi.org/project/phylax/)
11
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
12
+
13
+ ---
14
+
15
+ ## The Problem
16
+
17
+ LLM outputs change unexpectedly. Same prompt, different model version → different behavior.
18
+ Without Phylax, you discover this **in production**.
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ pip install phylax
24
+ ```
25
+
26
+ For server/UI support:
27
+ ```bash
28
+ pip install phylax[server]
29
+ ```
30
+
31
+ For all LLM providers:
32
+ ```bash
33
+ pip install phylax[all]
34
+ ```
35
+
36
+ ## Quick Start
37
+
38
+ ```python
39
+ from phylax import trace, expect, execution
40
+
41
+ @trace(provider="gemini")
42
+ @expect(must_include=["refund"], max_latency_ms=1500)
43
+ def customer_reply(query):
44
+ return llm.generate(query)
45
+
46
+ # Track multi-step agent flows
47
+ with execution("customer-support-flow"):
48
+ result = customer_reply("I want a refund")
49
+ ```
50
+
51
+ ```bash
52
+ # Mark a known-good response as baseline
53
+ phylax bless <trace_id>
54
+
55
+ # In CI: fail if output regresses
56
+ phylax check # exits 1 on failure
57
+ ```
58
+
59
+ That's it. Your CI now blocks LLM regressions.
60
+
61
+ ---
62
+
63
+ ## What Phylax is NOT
64
+
65
+ - ❌ **Not monitoring** — no metrics, no dashboards
66
+ - ❌ **Not observability** — no traces-to-cloud, no analytics
67
+ - ❌ **Not AI judgment** — rules are deterministic, not LLM-based
68
+ - ❌ **Not cloud-dependent** — runs entirely local
69
+ - ❌ **Not prompt engineering** — tests outputs, not prompts
70
+
71
+ Phylax is a **test framework**. It tells you when LLM behavior changes.
72
+
73
+ ---
74
+
75
+ ## CI Integration
76
+
77
+ ```yaml
78
+ # .github/workflows/phylax.yml
79
+ - run: phylax check
80
+ env:
81
+ GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
82
+ ```
83
+
84
+ **Exit codes:**
85
+ - `0` — All golden traces pass
86
+ - `1` — Regression detected
87
+
88
+ ---
89
+
90
+ ## Expectations (Deterministic Rules)
91
+
92
+ ```python
93
+ @expect(
94
+ must_include=["word"], # Required content
95
+ must_not_include=["sorry"], # Forbidden content
96
+ max_latency_ms=2000, # Performance gate
97
+ min_tokens=10 # Minimum length
98
+ )
99
+ ```
100
+
101
+ All rules are deterministic. No AI judgment. No ambiguity.
102
+
103
+ ---
104
+
105
+ ## Commands
106
+
107
+ | Command | What it does |
108
+ |---------|--------------|
109
+ | `phylax init` | Initialize config |
110
+ | `phylax server` | Start API server |
111
+ | `phylax list` | List traces |
112
+ | `phylax list --failed` | Show only failed traces |
113
+ | `phylax show <id>` | Show trace details |
114
+ | `phylax replay <id>` | Re-run a trace |
115
+ | `phylax bless <id>` | Mark as golden baseline |
116
+ | `phylax check` | CI regression check |
117
+
118
+ ---
119
+
120
+ ## Features
121
+
122
+ | Feature | Description |
123
+ |---------|-------------|
124
+ | **Trace Capture** | Record every LLM call automatically |
125
+ | **Expectations** | Define PASS/FAIL rules (4 deterministic rules) |
126
+ | **Golden Traces** | Baseline comparisons with hash verification |
127
+ | **CI Integration** | `phylax check` exits 1 on regression |
128
+ | **Execution Graphs** | Visualize multi-step agent workflows |
129
+ | **Forensics Mode** | Debug failures with guided investigation |
130
+
131
+ ---
132
+
133
+ ## Stability Guarantee
134
+
135
+ Phylax v1.0.0 is **API-frozen**:
136
+
137
+ - No breaking changes in v1.x
138
+ - `trace`, `expect`, `execution` are stable
139
+ - Exit codes are stable
140
+ - Schema is stable
141
+
142
+ See [docs/contract.md](https://github.com/xXMohitXx/Phylax/blob/main/docs/contract.md) for full guarantees.
143
+
144
+ ---
145
+
146
+ ## Documentation
147
+
148
+ - [Quickstart](https://github.com/xXMohitXx/Phylax/blob/main/docs/quickstart.md)
149
+ - [Mental Model](https://github.com/xXMohitXx/Phylax/blob/main/docs/mental-model.md)
150
+ - [API Contract](https://github.com/xXMohitXx/Phylax/blob/main/docs/contract.md)
151
+
152
+ ---
153
+
154
+ ## License
155
+
156
+ MIT License
@@ -0,0 +1,44 @@
1
+ """
2
+ Phylax - Deterministic regression enforcement for LLM systems.
3
+
4
+ Public API:
5
+ trace - Decorator to trace LLM calls
6
+ expect - Decorator to add expectations
7
+ execution - Context manager for grouping traces
8
+ Trace - Trace data model
9
+ Verdict - Verdict enum (PASS, FAIL, TAINTED)
10
+ """
11
+
12
+ from phylax._internal.schema import (
13
+ Trace,
14
+ TraceRequest,
15
+ TraceResponse,
16
+ TraceRuntime,
17
+ Verdict,
18
+ )
19
+ from phylax._internal.decorator import trace, expect
20
+ from phylax._internal.context import execution
21
+ from phylax._internal.graph import ExecutionGraph, NodeRole, GraphStage, GraphDiff, NodeDiff
22
+
23
+ __version__ = "1.0.0"
24
+ __all__ = [
25
+ # Core decorators
26
+ "trace",
27
+ "expect",
28
+ # Context manager
29
+ "execution",
30
+ # Data models
31
+ "Trace",
32
+ "TraceRequest",
33
+ "TraceResponse",
34
+ "TraceRuntime",
35
+ "Verdict",
36
+ # Graph (advanced)
37
+ "ExecutionGraph",
38
+ "NodeRole",
39
+ "GraphStage",
40
+ "GraphDiff",
41
+ "NodeDiff",
42
+ # Version
43
+ "__version__",
44
+ ]
@@ -0,0 +1,6 @@
1
+ """
2
+ Phylax internal modules.
3
+
4
+ These are implementation details and should not be imported directly.
5
+ Use the public API from `phylax` instead.
6
+ """
@@ -0,0 +1 @@
1
+ """Phylax internal adapters for LLM providers."""
@@ -0,0 +1,154 @@
1
+ """
2
+ Gemini Adapter
3
+
4
+ Provides integration with Google's Gemini API.
5
+ """
6
+
7
+ from typing import Any, Optional
8
+
9
+ from phylax._internal.capture import CaptureLayer, get_capture_layer
10
+ from phylax._internal.schema import Trace
11
+
12
+
13
+ class GeminiAdapter:
14
+ """
15
+ Adapter for Google Gemini API.
16
+
17
+ Usage:
18
+ adapter = GeminiAdapter(api_key="your-key")
19
+ response = adapter.chat_completion(
20
+ model="gemini-2.5-flash",
21
+ messages=[{"role": "user", "content": "Hello!"}]
22
+ )
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ api_key: Optional[str] = None,
28
+ capture_layer: Optional[CaptureLayer] = None,
29
+ ):
30
+ """
31
+ Initialize the Gemini adapter.
32
+
33
+ Args:
34
+ api_key: Optional API key (uses GOOGLE_API_KEY env var if not provided)
35
+ capture_layer: Optional custom capture layer
36
+ """
37
+ self.api_key = api_key
38
+ self.capture_layer = capture_layer or get_capture_layer()
39
+ self._client = None
40
+
41
+ def _get_client(self, model: str):
42
+ """Get or create the Gemini client."""
43
+ try:
44
+ import google.generativeai as genai
45
+
46
+ if self.api_key:
47
+ genai.configure(api_key=self.api_key)
48
+
49
+ return genai.GenerativeModel(model)
50
+ except ImportError:
51
+ raise ImportError(
52
+ "google-generativeai package not installed. "
53
+ "Install with: pip install google-generativeai"
54
+ )
55
+
56
+ def chat_completion(
57
+ self,
58
+ model: str = "gemini-2.5-flash",
59
+ messages: list[dict[str, str]] = None,
60
+ temperature: float = 0.7,
61
+ max_tokens: int = 256,
62
+ **kwargs,
63
+ ) -> tuple[Any, Trace]:
64
+ """
65
+ Create a chat completion with automatic tracing.
66
+
67
+ Args:
68
+ model: The model to use (e.g., "gemini-2.5-flash")
69
+ messages: List of messages with role and content
70
+ temperature: Sampling temperature
71
+ max_tokens: Maximum tokens to generate
72
+ **kwargs: Additional parameters
73
+
74
+ Returns:
75
+ Tuple of (Gemini response, Trace)
76
+ """
77
+ messages = messages or []
78
+ parameters = {
79
+ "temperature": temperature,
80
+ "max_tokens": max_tokens,
81
+ **kwargs,
82
+ }
83
+
84
+ def make_call():
85
+ client = self._get_client(model)
86
+
87
+ # Convert messages to Gemini format
88
+ # Gemini uses a different format - combine into a single prompt or use chat
89
+ contents = []
90
+ for msg in messages:
91
+ role = msg.get("role", "user")
92
+ content = msg.get("content", "")
93
+
94
+ # Map roles to Gemini format
95
+ if role == "system":
96
+ # Prepend system message to first user message
97
+ contents.append({"role": "user", "parts": [content]})
98
+ elif role == "assistant":
99
+ contents.append({"role": "model", "parts": [content]})
100
+ else:
101
+ contents.append({"role": "user", "parts": [content]})
102
+
103
+ # Create generation config
104
+ generation_config = {
105
+ "temperature": temperature,
106
+ "max_output_tokens": max_tokens,
107
+ }
108
+
109
+ # Make the call
110
+ response = client.generate_content(
111
+ contents,
112
+ generation_config=generation_config,
113
+ )
114
+
115
+ return response
116
+
117
+ response, trace = self.capture_layer.capture(
118
+ provider="gemini",
119
+ model=model,
120
+ messages=messages,
121
+ parameters=parameters,
122
+ call_fn=make_call,
123
+ )
124
+
125
+ return response, trace
126
+
127
+ def generate(
128
+ self,
129
+ prompt: str,
130
+ model: str = "gemini-2.5-flash",
131
+ temperature: float = 0.7,
132
+ max_tokens: int = 256,
133
+ **kwargs,
134
+ ) -> tuple[Any, Trace]:
135
+ """
136
+ Simple text generation with a prompt.
137
+
138
+ Args:
139
+ prompt: The prompt text
140
+ model: The model to use
141
+ temperature: Sampling temperature
142
+ max_tokens: Maximum tokens
143
+
144
+ Returns:
145
+ Tuple of (response, Trace)
146
+ """
147
+ messages = [{"role": "user", "content": prompt}]
148
+ return self.chat_completion(
149
+ model=model,
150
+ messages=messages,
151
+ temperature=temperature,
152
+ max_tokens=max_tokens,
153
+ **kwargs,
154
+ )