semantic-trace 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- semantic_trace-0.1.0/.gitignore +48 -0
- semantic_trace-0.1.0/CHANGELOG.md +33 -0
- semantic_trace-0.1.0/CONTRIBUTING.md +85 -0
- semantic_trace-0.1.0/LICENSE +21 -0
- semantic_trace-0.1.0/PKG-INFO +251 -0
- semantic_trace-0.1.0/README.md +216 -0
- semantic_trace-0.1.0/examples/demo.jsonl +2 -0
- semantic_trace-0.1.0/examples/drift_detection_demo.py +116 -0
- semantic_trace-0.1.0/examples/minimal_demo.py +82 -0
- semantic_trace-0.1.0/pyproject.toml +78 -0
- semantic_trace-0.1.0/src/agent_trace/__init__.py +78 -0
- semantic_trace-0.1.0/src/agent_trace/cli.py +158 -0
- semantic_trace-0.1.0/src/agent_trace/core/__init__.py +39 -0
- semantic_trace-0.1.0/src/agent_trace/core/schema.py +401 -0
- semantic_trace-0.1.0/src/agent_trace/core/serializer.py +175 -0
- semantic_trace-0.1.0/src/agent_trace/engine/__init__.py +33 -0
- semantic_trace-0.1.0/src/agent_trace/engine/invariants.py +340 -0
- semantic_trace-0.1.0/src/agent_trace/engine/replay.py +173 -0
- semantic_trace-0.1.0/src/agent_trace/integrations/__init__.py +18 -0
- semantic_trace-0.1.0/src/agent_trace/integrations/langgraph.py +213 -0
- semantic_trace-0.1.0/test-judge.jsonl +2 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
|
|
7
|
+
# Distribution / packaging
|
|
8
|
+
build/
|
|
9
|
+
dist/
|
|
10
|
+
*.egg-info/
|
|
11
|
+
*.egg
|
|
12
|
+
|
|
13
|
+
# Virtual environments
|
|
14
|
+
.venv/
|
|
15
|
+
venv/
|
|
16
|
+
env/
|
|
17
|
+
|
|
18
|
+
# IDE
|
|
19
|
+
.vscode/
|
|
20
|
+
.idea/
|
|
21
|
+
*.swp
|
|
22
|
+
*.swo
|
|
23
|
+
*~
|
|
24
|
+
|
|
25
|
+
# OS
|
|
26
|
+
.DS_Store
|
|
27
|
+
Thumbs.db
|
|
28
|
+
|
|
29
|
+
# Testing
|
|
30
|
+
.pytest_cache/
|
|
31
|
+
.coverage
|
|
32
|
+
htmlcov/
|
|
33
|
+
.tox/
|
|
34
|
+
|
|
35
|
+
# Type checking
|
|
36
|
+
.mypy_cache/
|
|
37
|
+
.dmypy.json
|
|
38
|
+
dmypy.json
|
|
39
|
+
|
|
40
|
+
# Ruff
|
|
41
|
+
.ruff_cache/
|
|
42
|
+
|
|
43
|
+
# Trace output files (uncomment to ignore)
|
|
44
|
+
# traces/
|
|
45
|
+
# *.jsonl
|
|
46
|
+
|
|
47
|
+
# Misc
|
|
48
|
+
=
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.1.0] - 2026-04-07
|
|
11
|
+
|
|
12
|
+
Initial release.
|
|
13
|
+
|
|
14
|
+
### Added
|
|
15
|
+
- Core Pydantic models: `TraceModel`, `Span`, `IntentInvariant`, `TraceMetadata`
|
|
16
|
+
- `Trace` context manager for simplified capture workflow with auto-serialization
|
|
17
|
+
- `ReplayReport` class with `.summary()`, `.print_violations()`, `.is_clean`, `.pass_rate`
|
|
18
|
+
- `InvariantResult` dataclass for per-invariant check details
|
|
19
|
+
- Enum types: `InvariantType`, `ActionType`
|
|
20
|
+
- JSONL serialization with orjson and file locking
|
|
21
|
+
- Built-in invariant checkers: `SchemaInvariantChecker`, `SubstringInvariantChecker`, `LLMAsJudgeChecker`
|
|
22
|
+
- `BaseInvariantChecker` ABC for custom checkers
|
|
23
|
+
- Mechanical replay (structural validation)
|
|
24
|
+
- Semantic replay (invariant checking)
|
|
25
|
+
- `validate_trace()` for combined mechanical + semantic validation
|
|
26
|
+
- LangGraph callback handler integration
|
|
27
|
+
- CLI with `info`, `validate`, `replay`, `spans` commands and `--json` output
|
|
28
|
+
- Optional extras: `langgraph`, `llm-judge`, `dev`
|
|
29
|
+
- Comprehensive test suite (71 tests)
|
|
30
|
+
- GitHub Actions CI workflow
|
|
31
|
+
|
|
32
|
+
[Unreleased]: https://github.com/sznmelvin/agent-trace/compare/v0.1.0...HEAD
|
|
33
|
+
[0.1.0]: https://github.com/sznmelvin/agent-trace/releases/tag/v0.1.0
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# Contributing to agent-trace
|
|
2
|
+
|
|
3
|
+
Thank you for your interest in contributing! agent-trace is built on the belief that AI agent observability should be simple, composable, and open. Every contribution, whether a typo fix, a new checker, or a full integration, is welcome.
|
|
4
|
+
|
|
5
|
+
## How to Contribute
|
|
6
|
+
|
|
7
|
+
### Reporting Bugs
|
|
8
|
+
|
|
9
|
+
- Check the [issue tracker](https://github.com/sznmelvin/agent-trace/issues) to see if it's already reported.
|
|
10
|
+
- Open a new issue with a clear title, description, and minimal reproduction if possible.
|
|
11
|
+
- Include your Python version and `agent-trace` version.
|
|
12
|
+
|
|
13
|
+
### Suggesting Features
|
|
14
|
+
|
|
15
|
+
- Open an issue with the label `enhancement`.
|
|
16
|
+
- Describe the problem you're trying to solve, not just the solution you have in mind.
|
|
17
|
+
- Keep scope focused; agent-trace values minimalism.
|
|
18
|
+
|
|
19
|
+
### Pull Requests
|
|
20
|
+
|
|
21
|
+
1. **Fork** the repository and create your branch from `main`.
|
|
22
|
+
2. **Install dev dependencies**:
|
|
23
|
+
```bash
|
|
24
|
+
pip install -e ".[dev]"
|
|
25
|
+
```
|
|
26
|
+
3. **Make your changes** with clear, minimal code and docstrings.
|
|
27
|
+
4. **Add tests** for new functionality. We use pytest.
|
|
28
|
+
5. **Run the checks**:
|
|
29
|
+
```bash
|
|
30
|
+
ruff check src/agent_trace/ tests/ examples/
|
|
31
|
+
ruff format src/agent_trace/ tests/ examples/
|
|
32
|
+
pytest tests/ -v
|
|
33
|
+
```
|
|
34
|
+
6. **Open a PR** with a clear description of what and why.
|
|
35
|
+
|
|
36
|
+
## Code Style
|
|
37
|
+
|
|
38
|
+
- Follow [PEP 8](https://peps.python.org/pep-0008/) conventions.
|
|
39
|
+
- Use [ruff](https://github.com/astral-sh/ruff) for linting and formatting.
|
|
40
|
+
- Line length: 88 characters.
|
|
41
|
+
- Type hints on all public functions and methods.
|
|
42
|
+
- Docstrings on all public classes and functions.
|
|
43
|
+
|
|
44
|
+
## Design Principles
|
|
45
|
+
|
|
46
|
+
1. **Minimalism first**: If it can be one line, don't make it three.
|
|
47
|
+
2. **No hidden magic**: What you see is what you get. No metaclass sorcery.
|
|
48
|
+
3. **Composable**: Small pieces that work well together.
|
|
49
|
+
4. **Backward compatible**: Don't break existing code without a major version bump.
|
|
50
|
+
5. **Zero unnecessary dependencies**: Core is `pydantic` + `orjson`. Everything else is optional.
|
|
51
|
+
|
|
52
|
+
## Development Setup
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
# Clone your fork
|
|
56
|
+
git clone https://github.com/YOUR_USERNAME/agent-trace.git
|
|
57
|
+
cd agent-trace
|
|
58
|
+
|
|
59
|
+
# Create a virtual environment
|
|
60
|
+
python -m venv .venv
|
|
61
|
+
source .venv/bin/activate # On Windows: .venv\Scripts\activate
|
|
62
|
+
|
|
63
|
+
# Install in editable mode with dev deps
|
|
64
|
+
pip install -e ".[dev]"
|
|
65
|
+
|
|
66
|
+
# Run tests
|
|
67
|
+
pytest tests/ -v
|
|
68
|
+
|
|
69
|
+
# Run linter
|
|
70
|
+
ruff check src/agent_trace/ tests/ examples/
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Adding a New Invariant Checker
|
|
74
|
+
|
|
75
|
+
If you want to contribute a new built-in checker:
|
|
76
|
+
|
|
77
|
+
1. Add the checker class to `src/agent_trace/engine/invariants.py`.
|
|
78
|
+
2. Register it in `CHECKER_REGISTRY` with a new `InvariantType` (or reuse an existing one).
|
|
79
|
+
3. Add the type to the `InvariantType` enum in `src/agent_trace/core/schema.py`.
|
|
80
|
+
4. Write tests in `tests/test_invariants.py`.
|
|
81
|
+
5. Update the README invariant types table.
|
|
82
|
+
|
|
83
|
+
## License
|
|
84
|
+
|
|
85
|
+
By contributing, you agree that your contributions will be licensed under the MIT License.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 melvin
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: semantic-trace
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Semantic tracing primitive for AI agents
|
|
5
|
+
Project-URL: Homepage, https://github.com/sznmelvin/agent-trace
|
|
6
|
+
Project-URL: Repository, https://github.com/sznmelvin/agent-trace
|
|
7
|
+
Project-URL: Documentation, https://github.com/sznmelvin/agent-trace#readme
|
|
8
|
+
Author: Trace Contributors
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: agents,ai,drift-detection,invariants,observability,replay,tracing
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Classifier: Typing :: Typed
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Requires-Dist: orjson>=3.0
|
|
26
|
+
Requires-Dist: pydantic>=2.0
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
30
|
+
Provides-Extra: langgraph
|
|
31
|
+
Requires-Dist: langgraph>=0.2.0; extra == 'langgraph'
|
|
32
|
+
Provides-Extra: llm-judge
|
|
33
|
+
Requires-Dist: httpx>=0.27; extra == 'llm-judge'
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
# agent-trace
|
|
37
|
+
|
|
38
|
+
[](https://pypi.org/project/semantic-trace/)
|
|
39
|
+
[](https://pypi.org/project/semantic-trace/)
|
|
40
|
+
[](https://github.com/sznmelvin/agent-trace/blob/main/LICENSE)
|
|
41
|
+
[](https://github.com/sznmelvin/agent-trace/actions/workflows/ci.yml)
|
|
42
|
+
|
|
43
|
+
**Semantic tracing primitive for AI agents.**
|
|
44
|
+
|
|
45
|
+
Intent-anchored execution. Deterministic replay. Runtime drift detection.
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Philosophy
|
|
50
|
+
|
|
51
|
+
Existing observability tools log **what** happened: every keystroke, token, and HTTP request. But they don't capture **intent**.
|
|
52
|
+
|
|
53
|
+
agent-trace flips this. Instead of dumping raw logs, you attach *invariants* to your agent's actions:
|
|
54
|
+
|
|
55
|
+
> "This LLM call should return a JSON object with `action` and `params` keys."
|
|
56
|
+
> "This tool output must contain the substring `success`."
|
|
57
|
+
|
|
58
|
+
When the agent runs, those invariants travel with the trace. Later, you replay the trace and check whether every invariant still holds. If a model upgrade, prompt change, or tool regression breaks an invariant, you catch it immediately.
|
|
59
|
+
|
|
60
|
+
**agent-trace is a primitive, not a platform.** No web servers. No databases. No UI frameworks. Just strictly-typed Python data structures and JSONL files you can version-control, diff, and grep.
|
|
61
|
+
|
|
62
|
+
## Installation
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pip install semantic-trace
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
With optional integrations:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
pip install semantic-trace[langgraph] # LangGraph callback handler
|
|
72
|
+
pip install semantic-trace[llm-judge] # LLM-as-Judge invariant checker
|
|
73
|
+
pip install semantic-trace[dev] # pytest + ruff for contributors
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Quick Start
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from agent_trace import Trace, IntentInvariant, InvariantType, semantic_replay
|
|
80
|
+
|
|
81
|
+
# 1. Define invariants: what your agent's output MUST satisfy
|
|
82
|
+
invariants = [
|
|
83
|
+
IntentInvariant(
|
|
84
|
+
id="valid-json",
|
|
85
|
+
description="Output must be valid JSON with 'summary' key",
|
|
86
|
+
invariant_type=InvariantType.SUBSTRING_CHECK,
|
|
87
|
+
config={"substring": '"summary"'},
|
|
88
|
+
fidelity_threshold=1.0,
|
|
89
|
+
),
|
|
90
|
+
IntentInvariant(
|
|
91
|
+
id="no-hallucination",
|
|
92
|
+
description="Must not invent fake citations",
|
|
93
|
+
invariant_type=InvariantType.LLM_AS_JUDGE,
|
|
94
|
+
config={"api_key": "your-key", "model": "qwen/qwen3.6-plus:free"},
|
|
95
|
+
fidelity_threshold=0.85,
|
|
96
|
+
),
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
# 2. Capture trace: invariants auto-attach to every span
|
|
100
|
+
with Trace(
|
|
101
|
+
name="research-assistant",
|
|
102
|
+
invariants=invariants,
|
|
103
|
+
output_file="traces/run.jsonl",
|
|
104
|
+
) as trace:
|
|
105
|
+
# Your agent code here (LangGraph, CrewAI, custom, etc.)
|
|
106
|
+
# Spans are captured automatically via integrations
|
|
107
|
+
# or manually:
|
|
108
|
+
from agent_trace import Span, ActionType
|
|
109
|
+
|
|
110
|
+
trace.add_span(Span(
|
|
111
|
+
trace_id=trace.trace_id,
|
|
112
|
+
action_type=ActionType.LLM_CALL,
|
|
113
|
+
input_data={"prompt": "Summarize this document..."},
|
|
114
|
+
output_data={"summary": "The document discusses..."},
|
|
115
|
+
duration_ms=342.0,
|
|
116
|
+
))
|
|
117
|
+
|
|
118
|
+
# 3. Later: replay and check all invariants
|
|
119
|
+
report = semantic_replay("traces/run.jsonl")
|
|
120
|
+
print(report.summary())
|
|
121
|
+
report.print_violations()
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Why agent-trace?
|
|
125
|
+
|
|
126
|
+
| Problem | agent-trace solution |
|
|
127
|
+
|---------|---------------------|
|
|
128
|
+
| Agent behavior changes silently after a model upgrade | Replay old traces with invariants to catch regressions |
|
|
129
|
+
| No way to codify "what good looks like" for agent output | Attach invariants as executable specifications |
|
|
130
|
+
| Observability platforms are expensive and complex | JSONL files you own, version-control, and grep |
|
|
131
|
+
| Testing agents is hard and non-deterministic | Semantic replay checks intent, not exact output |
|
|
132
|
+
|
|
133
|
+
## Invariant Types
|
|
134
|
+
|
|
135
|
+
| Type | What it does | Config |
|
|
136
|
+
|------|-------------|--------|
|
|
137
|
+
| `SUBSTRING_CHECK` | Checks for a target substring in JSON output | `{"substring": "..."}` |
|
|
138
|
+
| `SCHEMA_MATCH` | Validates output against a Pydantic type | `{"schema": dict[str, str]}` |
|
|
139
|
+
| `LLM_AS_JUDGE` | Uses an LLM to semantically evaluate | `{"api_key": "...", "model": "..."}` |
|
|
140
|
+
| `CUSTOM` | Your own checker via `BaseInvariantChecker` | Any |
|
|
141
|
+
|
|
142
|
+
## CLI
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
# Show trace metadata
|
|
146
|
+
trace info traces/run.jsonl
|
|
147
|
+
|
|
148
|
+
# Structural validation
|
|
149
|
+
trace validate traces/run.jsonl
|
|
150
|
+
|
|
151
|
+
# Full semantic replay (mechanical + invariant checks)
|
|
152
|
+
trace replay traces/run.jsonl
|
|
153
|
+
|
|
154
|
+
# List all spans with durations
|
|
155
|
+
trace spans traces/run.jsonl
|
|
156
|
+
|
|
157
|
+
# Machine-readable JSON output
|
|
158
|
+
trace replay traces/run.jsonl --json
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## Architecture
|
|
162
|
+
|
|
163
|
+
```
|
|
164
|
+
agent-trace/
|
|
165
|
+
├── src/agent_trace/
|
|
166
|
+
│ ├── __init__.py # Public API re-exports
|
|
167
|
+
│ ├── cli.py # `trace` CLI entry point
|
|
168
|
+
│ ├── core/
|
|
169
|
+
│ │ ├── schema.py # Pydantic models + Trace context manager
|
|
170
|
+
│ │ └── serializer.py # JSONL read/write with orjson + file locking
|
|
171
|
+
│ ├── engine/
|
|
172
|
+
│ │ ├── invariants.py # ABC + built-in checkers
|
|
173
|
+
│ │ └── replay.py # Mechanical + semantic replay
|
|
174
|
+
│ └── integrations/
|
|
175
|
+
│ └── langgraph.py # Lazy-loaded LangGraph callback handler
|
|
176
|
+
├── examples/
|
|
177
|
+
│ ├── minimal_demo.py # Full workflow demo
|
|
178
|
+
│ └── drift_detection_demo.py # Catching regressions with replay
|
|
179
|
+
├── tests/ # Comprehensive test suite
|
|
180
|
+
├── pyproject.toml
|
|
181
|
+
└── README.md
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## Examples
|
|
185
|
+
|
|
186
|
+
- [**minimal_demo.py**](examples/minimal_demo.py): Complete workflow: define invariants, capture trace, replay
|
|
187
|
+
- [**drift_detection_demo.py**](examples/drift_detection_demo.py): Simulate a model regression and catch it with semantic replay
|
|
188
|
+
|
|
189
|
+
## Extending
|
|
190
|
+
|
|
191
|
+
### Custom Checker
|
|
192
|
+
|
|
193
|
+
Write your own checker by subclassing `BaseInvariantChecker`:
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
from agent_trace import BaseInvariantChecker, Span, IntentInvariant
|
|
197
|
+
|
|
198
|
+
class EmbeddingSimilarityChecker(BaseInvariantChecker):
|
|
199
|
+
def check(self, span: Span, invariant: IntentInvariant) -> float:
|
|
200
|
+
# Your embedding-based logic here
|
|
201
|
+
expected = invariant.config["expected_embedding"]
|
|
202
|
+
actual = get_embedding(span.output_data["text"])
|
|
203
|
+
return cosine_similarity(expected, actual)
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
### LLM-as-Judge
|
|
207
|
+
|
|
208
|
+
Use an LLM to semantically evaluate whether a span's output satisfies an invariant.
|
|
209
|
+
Requires `pip install agent-trace[llm-judge]`.
|
|
210
|
+
|
|
211
|
+
```python
|
|
212
|
+
from agent_trace import IntentInvariant, InvariantType
|
|
213
|
+
|
|
214
|
+
invariant = IntentInvariant(
|
|
215
|
+
id="quality-check",
|
|
216
|
+
description="The response should be helpful and well-structured",
|
|
217
|
+
invariant_type=InvariantType.LLM_AS_JUDGE,
|
|
218
|
+
config={
|
|
219
|
+
"api_key": "sk-or-your-key",
|
|
220
|
+
"model": "qwen/qwen3.6-plus:free",
|
|
221
|
+
},
|
|
222
|
+
fidelity_threshold=0.7,
|
|
223
|
+
)
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
The judge sends the span context to an LLM (default: OpenRouter) and parses a
|
|
227
|
+
structured JSON score. On any failure it returns `0.0` and logs a warning;
|
|
228
|
+
it never crashes your replay pipeline.
|
|
229
|
+
|
|
230
|
+
### LangGraph Integration
|
|
231
|
+
|
|
232
|
+
```python
|
|
233
|
+
from agent_trace.integrations.langgraph import TraceCallbackHandler
|
|
234
|
+
|
|
235
|
+
handler = TraceCallbackHandler(
|
|
236
|
+
trace_file="traces/run.jsonl",
|
|
237
|
+
session_id="session-1",
|
|
238
|
+
agent_name="my-agent",
|
|
239
|
+
default_invariants=[invariant],
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
graph = create_react_agent(..., callbacks=[handler])
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
## Contributing
|
|
246
|
+
|
|
247
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, code style, and how to submit PRs.
|
|
248
|
+
|
|
249
|
+
## License
|
|
250
|
+
|
|
251
|
+
MIT
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# agent-trace
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/semantic-trace/)
|
|
4
|
+
[](https://pypi.org/project/semantic-trace/)
|
|
5
|
+
[](https://github.com/sznmelvin/agent-trace/blob/main/LICENSE)
|
|
6
|
+
[](https://github.com/sznmelvin/agent-trace/actions/workflows/ci.yml)
|
|
7
|
+
|
|
8
|
+
**Semantic tracing primitive for AI agents.**
|
|
9
|
+
|
|
10
|
+
Intent-anchored execution. Deterministic replay. Runtime drift detection.
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Philosophy
|
|
15
|
+
|
|
16
|
+
Existing observability tools log **what** happened: every keystroke, token, and HTTP request. But they don't capture **intent**.
|
|
17
|
+
|
|
18
|
+
agent-trace flips this. Instead of dumping raw logs, you attach *invariants* to your agent's actions:
|
|
19
|
+
|
|
20
|
+
> "This LLM call should return a JSON object with `action` and `params` keys."
|
|
21
|
+
> "This tool output must contain the substring `success`."
|
|
22
|
+
|
|
23
|
+
When the agent runs, those invariants travel with the trace. Later, you replay the trace and check whether every invariant still holds. If a model upgrade, prompt change, or tool regression breaks an invariant, you catch it immediately.
|
|
24
|
+
|
|
25
|
+
**agent-trace is a primitive, not a platform.** No web servers. No databases. No UI frameworks. Just strictly-typed Python data structures and JSONL files you can version-control, diff, and grep.
|
|
26
|
+
|
|
27
|
+
## Installation
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install semantic-trace
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
With optional integrations:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install semantic-trace[langgraph] # LangGraph callback handler
|
|
37
|
+
pip install semantic-trace[llm-judge] # LLM-as-Judge invariant checker
|
|
38
|
+
pip install semantic-trace[dev] # pytest + ruff for contributors
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Quick Start
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from agent_trace import Trace, IntentInvariant, InvariantType, semantic_replay
|
|
45
|
+
|
|
46
|
+
# 1. Define invariants: what your agent's output MUST satisfy
|
|
47
|
+
invariants = [
|
|
48
|
+
IntentInvariant(
|
|
49
|
+
id="valid-json",
|
|
50
|
+
description="Output must be valid JSON with 'summary' key",
|
|
51
|
+
invariant_type=InvariantType.SUBSTRING_CHECK,
|
|
52
|
+
config={"substring": '"summary"'},
|
|
53
|
+
fidelity_threshold=1.0,
|
|
54
|
+
),
|
|
55
|
+
IntentInvariant(
|
|
56
|
+
id="no-hallucination",
|
|
57
|
+
description="Must not invent fake citations",
|
|
58
|
+
invariant_type=InvariantType.LLM_AS_JUDGE,
|
|
59
|
+
config={"api_key": "your-key", "model": "qwen/qwen3.6-plus:free"},
|
|
60
|
+
fidelity_threshold=0.85,
|
|
61
|
+
),
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
# 2. Capture trace: invariants auto-attach to every span
|
|
65
|
+
with Trace(
|
|
66
|
+
name="research-assistant",
|
|
67
|
+
invariants=invariants,
|
|
68
|
+
output_file="traces/run.jsonl",
|
|
69
|
+
) as trace:
|
|
70
|
+
# Your agent code here (LangGraph, CrewAI, custom, etc.)
|
|
71
|
+
# Spans are captured automatically via integrations
|
|
72
|
+
# or manually:
|
|
73
|
+
from agent_trace import Span, ActionType
|
|
74
|
+
|
|
75
|
+
trace.add_span(Span(
|
|
76
|
+
trace_id=trace.trace_id,
|
|
77
|
+
action_type=ActionType.LLM_CALL,
|
|
78
|
+
input_data={"prompt": "Summarize this document..."},
|
|
79
|
+
output_data={"summary": "The document discusses..."},
|
|
80
|
+
duration_ms=342.0,
|
|
81
|
+
))
|
|
82
|
+
|
|
83
|
+
# 3. Later: replay and check all invariants
|
|
84
|
+
report = semantic_replay("traces/run.jsonl")
|
|
85
|
+
print(report.summary())
|
|
86
|
+
report.print_violations()
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Why agent-trace?
|
|
90
|
+
|
|
91
|
+
| Problem | agent-trace solution |
|
|
92
|
+
|---------|---------------------|
|
|
93
|
+
| Agent behavior changes silently after a model upgrade | Replay old traces with invariants to catch regressions |
|
|
94
|
+
| No way to codify "what good looks like" for agent output | Attach invariants as executable specifications |
|
|
95
|
+
| Observability platforms are expensive and complex | JSONL files you own, version-control, and grep |
|
|
96
|
+
| Testing agents is hard and non-deterministic | Semantic replay checks intent, not exact output |
|
|
97
|
+
|
|
98
|
+
## Invariant Types
|
|
99
|
+
|
|
100
|
+
| Type | What it does | Config |
|
|
101
|
+
|------|-------------|--------|
|
|
102
|
+
| `SUBSTRING_CHECK` | Checks for a target substring in JSON output | `{"substring": "..."}` |
|
|
103
|
+
| `SCHEMA_MATCH` | Validates output against a Pydantic type | `{"schema": dict[str, str]}` |
|
|
104
|
+
| `LLM_AS_JUDGE` | Uses an LLM to semantically evaluate | `{"api_key": "...", "model": "..."}` |
|
|
105
|
+
| `CUSTOM` | Your own checker via `BaseInvariantChecker` | Any |
|
|
106
|
+
|
|
107
|
+
## CLI
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
# Show trace metadata
|
|
111
|
+
trace info traces/run.jsonl
|
|
112
|
+
|
|
113
|
+
# Structural validation
|
|
114
|
+
trace validate traces/run.jsonl
|
|
115
|
+
|
|
116
|
+
# Full semantic replay (mechanical + invariant checks)
|
|
117
|
+
trace replay traces/run.jsonl
|
|
118
|
+
|
|
119
|
+
# List all spans with durations
|
|
120
|
+
trace spans traces/run.jsonl
|
|
121
|
+
|
|
122
|
+
# Machine-readable JSON output
|
|
123
|
+
trace replay traces/run.jsonl --json
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Architecture
|
|
127
|
+
|
|
128
|
+
```
|
|
129
|
+
agent-trace/
|
|
130
|
+
├── src/agent_trace/
|
|
131
|
+
│ ├── __init__.py # Public API re-exports
|
|
132
|
+
│ ├── cli.py # `trace` CLI entry point
|
|
133
|
+
│ ├── core/
|
|
134
|
+
│ │ ├── schema.py # Pydantic models + Trace context manager
|
|
135
|
+
│ │ └── serializer.py # JSONL read/write with orjson + file locking
|
|
136
|
+
│ ├── engine/
|
|
137
|
+
│ │ ├── invariants.py # ABC + built-in checkers
|
|
138
|
+
│ │ └── replay.py # Mechanical + semantic replay
|
|
139
|
+
│ └── integrations/
|
|
140
|
+
│ └── langgraph.py # Lazy-loaded LangGraph callback handler
|
|
141
|
+
├── examples/
|
|
142
|
+
│ ├── minimal_demo.py # Full workflow demo
|
|
143
|
+
│ └── drift_detection_demo.py # Catching regressions with replay
|
|
144
|
+
├── tests/ # Comprehensive test suite
|
|
145
|
+
├── pyproject.toml
|
|
146
|
+
└── README.md
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## Examples
|
|
150
|
+
|
|
151
|
+
- [**minimal_demo.py**](examples/minimal_demo.py): Complete workflow: define invariants, capture trace, replay
|
|
152
|
+
- [**drift_detection_demo.py**](examples/drift_detection_demo.py): Simulate a model regression and catch it with semantic replay
|
|
153
|
+
|
|
154
|
+
## Extending
|
|
155
|
+
|
|
156
|
+
### Custom Checker
|
|
157
|
+
|
|
158
|
+
Write your own checker by subclassing `BaseInvariantChecker`:
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
from agent_trace import BaseInvariantChecker, Span, IntentInvariant
|
|
162
|
+
|
|
163
|
+
class EmbeddingSimilarityChecker(BaseInvariantChecker):
|
|
164
|
+
def check(self, span: Span, invariant: IntentInvariant) -> float:
|
|
165
|
+
# Your embedding-based logic here
|
|
166
|
+
expected = invariant.config["expected_embedding"]
|
|
167
|
+
actual = get_embedding(span.output_data["text"])
|
|
168
|
+
return cosine_similarity(expected, actual)
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### LLM-as-Judge
|
|
172
|
+
|
|
173
|
+
Use an LLM to semantically evaluate whether a span's output satisfies an invariant.
|
|
174
|
+
Requires `pip install agent-trace[llm-judge]`.
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
from agent_trace import IntentInvariant, InvariantType
|
|
178
|
+
|
|
179
|
+
invariant = IntentInvariant(
|
|
180
|
+
id="quality-check",
|
|
181
|
+
description="The response should be helpful and well-structured",
|
|
182
|
+
invariant_type=InvariantType.LLM_AS_JUDGE,
|
|
183
|
+
config={
|
|
184
|
+
"api_key": "sk-or-your-key",
|
|
185
|
+
"model": "qwen/qwen3.6-plus:free",
|
|
186
|
+
},
|
|
187
|
+
fidelity_threshold=0.7,
|
|
188
|
+
)
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
The judge sends the span context to an LLM (default: OpenRouter) and parses a
|
|
192
|
+
structured JSON score. On any failure it returns `0.0` and logs a warning;
|
|
193
|
+
it never crashes your replay pipeline.
|
|
194
|
+
|
|
195
|
+
### LangGraph Integration
|
|
196
|
+
|
|
197
|
+
```python
|
|
198
|
+
from agent_trace.integrations.langgraph import TraceCallbackHandler
|
|
199
|
+
|
|
200
|
+
handler = TraceCallbackHandler(
|
|
201
|
+
trace_file="traces/run.jsonl",
|
|
202
|
+
session_id="session-1",
|
|
203
|
+
agent_name="my-agent",
|
|
204
|
+
default_invariants=[invariant],
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
graph = create_react_agent(..., callbacks=[handler])
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
## Contributing
|
|
211
|
+
|
|
212
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, code style, and how to submit PRs.
|
|
213
|
+
|
|
214
|
+
## License
|
|
215
|
+
|
|
216
|
+
MIT
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
{"__metadata__":{"trace_id":"c32a5679-f8fe-4f65-99d3-256109898037","session_id":"demo","agent_name":"my-agent","start_time":"2026-04-07T12:02:51.293579Z","end_time":null}}
|
|
2
|
+
{"span_id":"95b605f0-aad5-4d92-b8c4-fc583c62e62d","parent_id":null,"trace_id":"c32a5679-f8fe-4f65-99d3-256109898037","timestamp":"2026-04-07T12:02:51.293737Z","action_type":"llm_call","input_data":{"prompt":"Summarize this"},"output_data":{"summary":"Here is the summary"},"duration_ms":342.0,"attached_invariants":[{"id":"has-summary","description":"Output must contain a summary","invariant_type":"SUBSTRING_CHECK","config":{"substring":"summary"},"fidelity_threshold":1.0}],"invariant_results":null}
|