llm-leash 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_leash-1.3.0/.claude/agent-memory/project-auditor/MEMORY.md +1 -0
- llm_leash-1.3.0/.claude/agent-memory/project-auditor/project_agent_guard.md +17 -0
- llm_leash-1.3.0/.github/workflows/ci.yml +30 -0
- llm_leash-1.3.0/.github/workflows/publish.yml +60 -0
- llm_leash-1.3.0/.gitignore +39 -0
- llm_leash-1.3.0/.great_cto/.auditor-probe +0 -0
- llm_leash-1.3.0/.great_cto/PROJECT.md +48 -0
- llm_leash-1.3.0/AGENTS.md +84 -0
- llm_leash-1.3.0/API.md +250 -0
- llm_leash-1.3.0/ARCHITECTURE.md +497 -0
- llm_leash-1.3.0/CHANGELOG.md +150 -0
- llm_leash-1.3.0/LICENSE +21 -0
- llm_leash-1.3.0/PKG-INFO +256 -0
- llm_leash-1.3.0/PRODUCT.md +230 -0
- llm_leash-1.3.0/README.md +195 -0
- llm_leash-1.3.0/demo.py +79 -0
- llm_leash-1.3.0/docs/SOC2.md +108 -0
- llm_leash-1.3.0/docs/TESTING.md +78 -0
- llm_leash-1.3.0/docs/adr/ADR-001-python-first.md +36 -0
- llm_leash-1.3.0/docs/superpowers/plans/2026-05-16-llm-leash-v0.1-green.md +2480 -0
- llm_leash-1.3.0/docs/superpowers/plans/2026-05-16-llm-leash-v0.2-policy.md +1286 -0
- llm_leash-1.3.0/docs/superpowers/plans/2026-05-16-llm-leash-v0.3-sql-shell.md +548 -0
- llm_leash-1.3.0/docs/superpowers/plans/2026-05-16-llm-leash-v0.4-redis.md +575 -0
- llm_leash-1.3.0/docs/superpowers/plans/2026-05-16-llm-leash-v0.5-hitl.md +759 -0
- llm_leash-1.3.0/docs/superpowers/plans/2026-05-16-llm-leash-v1.3-soc2.md +320 -0
- llm_leash-1.3.0/pyproject.toml +113 -0
- llm_leash-1.3.0/scripts/release_check.sh +47 -0
- llm_leash-1.3.0/src/llm_leash/__init__.py +83 -0
- llm_leash-1.3.0/src/llm_leash/adapters/__init__.py +4 -0
- llm_leash-1.3.0/src/llm_leash/adapters/anthropic.py +208 -0
- llm_leash-1.3.0/src/llm_leash/adapters/crewai.py +212 -0
- llm_leash-1.3.0/src/llm_leash/adapters/langgraph.py +240 -0
- llm_leash-1.3.0/src/llm_leash/adapters/mcp.py +136 -0
- llm_leash-1.3.0/src/llm_leash/adapters/openai.py +205 -0
- llm_leash-1.3.0/src/llm_leash/adapters/openhands.py +209 -0
- llm_leash-1.3.0/src/llm_leash/adapters/pydantic_ai.py +207 -0
- llm_leash-1.3.0/src/llm_leash/audit/__init__.py +16 -0
- llm_leash-1.3.0/src/llm_leash/audit/replay.py +80 -0
- llm_leash-1.3.0/src/llm_leash/audit/schema.py +90 -0
- llm_leash-1.3.0/src/llm_leash/audit/soc2.py +542 -0
- llm_leash-1.3.0/src/llm_leash/audit/verify.py +58 -0
- llm_leash-1.3.0/src/llm_leash/audit/writer.py +87 -0
- llm_leash-1.3.0/src/llm_leash/budget/__init__.py +19 -0
- llm_leash-1.3.0/src/llm_leash/budget/pricing.py +64 -0
- llm_leash-1.3.0/src/llm_leash/budget/redis_store.py +48 -0
- llm_leash-1.3.0/src/llm_leash/budget/sqlite_store.py +90 -0
- llm_leash-1.3.0/src/llm_leash/budget/store.py +43 -0
- llm_leash-1.3.0/src/llm_leash/budget/tracker.py +88 -0
- llm_leash-1.3.0/src/llm_leash/cli.py +165 -0
- llm_leash-1.3.0/src/llm_leash/firewall.py +199 -0
- llm_leash-1.3.0/src/llm_leash/hitl/__init__.py +21 -0
- llm_leash-1.3.0/src/llm_leash/hitl/gate.py +41 -0
- llm_leash-1.3.0/src/llm_leash/hitl/queue.py +300 -0
- llm_leash-1.3.0/src/llm_leash/hitl/resume.py +34 -0
- llm_leash-1.3.0/src/llm_leash/hitl/webhook.py +85 -0
- llm_leash-1.3.0/src/llm_leash/kill/__init__.py +13 -0
- llm_leash-1.3.0/src/llm_leash/kill/redis_registry.py +53 -0
- llm_leash-1.3.0/src/llm_leash/kill/registry.py +43 -0
- llm_leash-1.3.0/src/llm_leash/kill/sqlite_registry.py +84 -0
- llm_leash-1.3.0/src/llm_leash/kill/transport_http.py +87 -0
- llm_leash-1.3.0/src/llm_leash/kill/transport_inproc.py +0 -0
- llm_leash-1.3.0/src/llm_leash/kill/transport_redis.py +0 -0
- llm_leash-1.3.0/src/llm_leash/policy/__init__.py +16 -0
- llm_leash-1.3.0/src/llm_leash/policy/engine.py +24 -0
- llm_leash-1.3.0/src/llm_leash/policy/external.py +158 -0
- llm_leash-1.3.0/src/llm_leash/policy/pii.py +54 -0
- llm_leash-1.3.0/src/llm_leash/policy/predicates.py +235 -0
- llm_leash-1.3.0/src/llm_leash/types.py +87 -0
- llm_leash-1.3.0/tests/__init__.py +0 -0
- llm_leash-1.3.0/tests/adapters/__init__.py +0 -0
- llm_leash-1.3.0/tests/adapters/test_anthropic.py +135 -0
- llm_leash-1.3.0/tests/adapters/test_crewai.py +165 -0
- llm_leash-1.3.0/tests/adapters/test_langgraph.py +227 -0
- llm_leash-1.3.0/tests/adapters/test_mcp.py +182 -0
- llm_leash-1.3.0/tests/adapters/test_openai.py +197 -0
- llm_leash-1.3.0/tests/adapters/test_openhands.py +123 -0
- llm_leash-1.3.0/tests/adapters/test_pydantic_ai.py +168 -0
- llm_leash-1.3.0/tests/audit/__init__.py +0 -0
- llm_leash-1.3.0/tests/audit/test_replay_export.py +145 -0
- llm_leash-1.3.0/tests/audit/test_schema.py +52 -0
- llm_leash-1.3.0/tests/audit/test_soc2.py +333 -0
- llm_leash-1.3.0/tests/audit/test_verify.py +69 -0
- llm_leash-1.3.0/tests/audit/test_writer.py +77 -0
- llm_leash-1.3.0/tests/audit/test_writer_property.py +78 -0
- llm_leash-1.3.0/tests/budget/__init__.py +0 -0
- llm_leash-1.3.0/tests/budget/test_pricing.py +58 -0
- llm_leash-1.3.0/tests/budget/test_redis_store.py +87 -0
- llm_leash-1.3.0/tests/budget/test_sqlite_store.py +65 -0
- llm_leash-1.3.0/tests/budget/test_store.py +50 -0
- llm_leash-1.3.0/tests/budget/test_tracker.py +61 -0
- llm_leash-1.3.0/tests/hitl/__init__.py +0 -0
- llm_leash-1.3.0/tests/hitl/test_gate.py +63 -0
- llm_leash-1.3.0/tests/hitl/test_queue.py +196 -0
- llm_leash-1.3.0/tests/hitl/test_resume.py +45 -0
- llm_leash-1.3.0/tests/hitl/test_webhook.py +104 -0
- llm_leash-1.3.0/tests/kill/__init__.py +0 -0
- llm_leash-1.3.0/tests/kill/test_redis_registry.py +82 -0
- llm_leash-1.3.0/tests/kill/test_registry.py +41 -0
- llm_leash-1.3.0/tests/kill/test_sqlite_registry.py +64 -0
- llm_leash-1.3.0/tests/kill/test_transport_http.py +125 -0
- llm_leash-1.3.0/tests/policy/__init__.py +0 -0
- llm_leash-1.3.0/tests/policy/test_engine.py +67 -0
- llm_leash-1.3.0/tests/policy/test_external.py +178 -0
- llm_leash-1.3.0/tests/policy/test_pii.py +76 -0
- llm_leash-1.3.0/tests/policy/test_predicates.py +319 -0
- llm_leash-1.3.0/tests/smoke/__init__.py +0 -0
- llm_leash-1.3.0/tests/smoke/test_demo.py +35 -0
- llm_leash-1.3.0/tests/test_cli.py +175 -0
- llm_leash-1.3.0/tests/test_demo.py +34 -0
- llm_leash-1.3.0/tests/test_firewall_integration.py +288 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
- [agent-guard project context](project_agent_guard.md) — pre-alpha OSS Python library for LLM agent safety (budget caps, audit log, kill switch, HITL); 0 tests, 0 CI, most modules are empty stubs as of 2026-05-16
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: agent-guard project context
|
|
3
|
+
description: Core facts about the agent-guard repo — pre-alpha OSS library for LLM agent safety (budget caps, audit log, kill switch, HITL)
|
|
4
|
+
type: project
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
agent-guard is a pre-alpha Python library (v0.0.1) that provides runtime enforcement for LLM agents: hard USD budget caps, append-only hash-chained JSONL audit log, kill switch, and human-in-the-loop webhook. One import, one class: `from agent_guard import Firewall`.
|
|
8
|
+
|
|
9
|
+
**Why:** Targets the uncontested B2B layer — cost enforcement and compliance evidence — explicitly not competing with content-safety scanners (LlamaFirewall, Invariant, NeMo Guardrails).
|
|
10
|
+
|
|
11
|
+
**How to apply:** When suggesting implementations, enforce the core constraints: zero runtime deps for core (stdlib only), adapters ≤300 LOC each, async-first sync-compatible, JSONL audit format is a wire contract. The Firewall facade surface (3 properties, 3 methods, 1 constructor) is locked in API.md.
|
|
12
|
+
|
|
13
|
+
Current state as of 2026-05-16:
|
|
14
|
+
- firewall.py and types.py have ~229 LOC of real code; all other modules (budget, audit, kill, hitl, policy, adapters, cli) are 0-LOC stubs.
|
|
15
|
+
- 0 tests. No CI. No CVE scanner.
|
|
16
|
+
- 5 P0 tasks, 5 P1 tasks, 4 P2 tasks in Beads (prefix: dreamy-swirles-89f100).
|
|
17
|
+
- Next milestone: v0.1 — core firewall + Anthropic/OpenAI adapters + CLI (replay, verify).
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main, "claude/**"]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
release-check:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
strategy:
|
|
12
|
+
fail-fast: false
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.11", "3.12"]
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
19
|
+
uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: ${{ matrix.python-version }}
|
|
22
|
+
cache: pip
|
|
23
|
+
|
|
24
|
+
- name: Install
|
|
25
|
+
run: |
|
|
26
|
+
python -m pip install --upgrade pip
|
|
27
|
+
pip install -e ".[dev]"
|
|
28
|
+
|
|
29
|
+
- name: Release acceptance gate
|
|
30
|
+
run: bash scripts/release_check.sh
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*.*.*"
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
acceptance:
|
|
10
|
+
name: Re-run acceptance gate
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
- uses: actions/setup-python@v5
|
|
15
|
+
with:
|
|
16
|
+
python-version: "3.12"
|
|
17
|
+
cache: pip
|
|
18
|
+
- name: Install
|
|
19
|
+
run: |
|
|
20
|
+
python -m pip install --upgrade pip
|
|
21
|
+
pip install -e ".[dev]"
|
|
22
|
+
- name: Release acceptance gate
|
|
23
|
+
run: bash scripts/release_check.sh
|
|
24
|
+
|
|
25
|
+
build-and-publish:
|
|
26
|
+
name: Build & publish
|
|
27
|
+
needs: acceptance
|
|
28
|
+
runs-on: ubuntu-latest
|
|
29
|
+
permissions:
|
|
30
|
+
id-token: write # OIDC trusted-publisher to PyPI
|
|
31
|
+
contents: read
|
|
32
|
+
environment:
|
|
33
|
+
name: pypi
|
|
34
|
+
url: https://pypi.org/p/llm-leash
|
|
35
|
+
steps:
|
|
36
|
+
- uses: actions/checkout@v4
|
|
37
|
+
|
|
38
|
+
- uses: actions/setup-python@v5
|
|
39
|
+
with:
|
|
40
|
+
python-version: "3.12"
|
|
41
|
+
|
|
42
|
+
- name: Install build tooling
|
|
43
|
+
run: |
|
|
44
|
+
python -m pip install --upgrade pip
|
|
45
|
+
pip install build
|
|
46
|
+
|
|
47
|
+
- name: Verify tag matches package version
|
|
48
|
+
run: |
|
|
49
|
+
PKG_VERSION=$(python -c "import tomllib; print(tomllib.loads(open('pyproject.toml','rb').read().decode())['project']['version'])")
|
|
50
|
+
TAG_VERSION="${GITHUB_REF#refs/tags/v}"
|
|
51
|
+
if [ "$PKG_VERSION" != "$TAG_VERSION" ]; then
|
|
52
|
+
echo "::error::Tag v$TAG_VERSION does not match pyproject version $PKG_VERSION"
|
|
53
|
+
exit 1
|
|
54
|
+
fi
|
|
55
|
+
|
|
56
|
+
- name: Build sdist and wheel
|
|
57
|
+
run: python -m build
|
|
58
|
+
|
|
59
|
+
- name: Publish to PyPI (trusted publisher / OIDC)
|
|
60
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.venv/
|
|
6
|
+
venv/
|
|
7
|
+
.eggs/
|
|
8
|
+
build/
|
|
9
|
+
dist/
|
|
10
|
+
|
|
11
|
+
# Test/Coverage
|
|
12
|
+
.coverage
|
|
13
|
+
.coverage.*
|
|
14
|
+
htmlcov/
|
|
15
|
+
.pytest_cache/
|
|
16
|
+
.mypy_cache/
|
|
17
|
+
.ruff_cache/
|
|
18
|
+
.hypothesis/
|
|
19
|
+
|
|
20
|
+
# IDE
|
|
21
|
+
.vscode/
|
|
22
|
+
.idea/
|
|
23
|
+
*.swp
|
|
24
|
+
|
|
25
|
+
# OS
|
|
26
|
+
.DS_Store
|
|
27
|
+
|
|
28
|
+
# Local audit logs created by examples / tests
|
|
29
|
+
*.jsonl
|
|
30
|
+
!docs/**/*.jsonl
|
|
31
|
+
!examples/**/sample-*.jsonl
|
|
32
|
+
|
|
33
|
+
# Secrets
|
|
34
|
+
.env
|
|
35
|
+
.env.*
|
|
36
|
+
!.env.example
|
|
37
|
+
|
|
38
|
+
# Hatchling
|
|
39
|
+
.hatch/
|
|
File without changes
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# llm-leash
|
|
2
|
+
|
|
3
|
+
## Type
|
|
4
|
+
primary: library-sdk
|
|
5
|
+
secondary: agent-platform
|
|
6
|
+
archetype: devtools
|
|
7
|
+
approval-level: gates-only
|
|
8
|
+
review_mode: auto
|
|
9
|
+
|
|
10
|
+
## Stack
|
|
11
|
+
- python: 3.11.8 (supports 3.11–3.13)
|
|
12
|
+
- build-backend: hatchling>=1.21
|
|
13
|
+
- test: pytest>=8.0, pytest-asyncio>=0.24, hypothesis>=6.100, vcrpy>=6.0
|
|
14
|
+
- lint: ruff>=0.6
|
|
15
|
+
- types: mypy>=1.10 (strict)
|
|
16
|
+
- optional-anthropic: anthropic>=0.40
|
|
17
|
+
- optional-openai: openai>=1.50
|
|
18
|
+
- optional-langgraph: langgraph>=0.2
|
|
19
|
+
- optional-crewai: crewai>=0.80
|
|
20
|
+
- optional-redis: redis>=5.0
|
|
21
|
+
- infra: none (library, no server infra)
|
|
22
|
+
|
|
23
|
+
## Domain
|
|
24
|
+
agent-safety, llm-cost-control, audit-compliance, kill-switch, human-in-the-loop
|
|
25
|
+
|
|
26
|
+
## Compliance signals
|
|
27
|
+
- EU AI Act Article 12 (audit log evidence)
|
|
28
|
+
- SOC 2 Type II (planned v1.0)
|
|
29
|
+
- Zero runtime deps core (non-negotiable per PRODUCT.md)
|
|
30
|
+
|
|
31
|
+
## Env
|
|
32
|
+
- no runtime env vars required for core
|
|
33
|
+
- AUDIT_LOG_PATH: optional, path to JSONL audit file
|
|
34
|
+
- REDIS_URL: optional, for RedisStore / RedisKillRegistry
|
|
35
|
+
|
|
36
|
+
## L3
|
|
37
|
+
p0-threshold: test_suite_failures > 0
|
|
38
|
+
p1-threshold: mypy_errors > 0
|
|
39
|
+
error-log: (no server — library; errors raised as LeashBlocked / LeashKilled)
|
|
40
|
+
|
|
41
|
+
## Team
|
|
42
|
+
size: 1
|
|
43
|
+
authors: avelikiy@users.noreply.github.com
|
|
44
|
+
|
|
45
|
+
## Last Audit
|
|
46
|
+
date: 2026-05-16
|
|
47
|
+
findings: P0:5 P1:5 P2:4 P3:0
|
|
48
|
+
next-audit: 2026-08-14
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Agent Instructions
|
|
2
|
+
|
|
3
|
+
This project uses **bd** (beads) for issue tracking. Run `bd onboard` to get started.
|
|
4
|
+
|
|
5
|
+
## Quick Reference
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
bd ready # Find available work
|
|
9
|
+
bd show <id> # View issue details
|
|
10
|
+
bd update <id> --claim # Claim work atomically
|
|
11
|
+
bd close <id> # Complete work
|
|
12
|
+
bd dolt push # Push beads data to remote
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Non-Interactive Shell Commands
|
|
16
|
+
|
|
17
|
+
**ALWAYS use non-interactive flags** with file operations to avoid hanging on confirmation prompts.
|
|
18
|
+
|
|
19
|
+
Shell commands like `cp`, `mv`, and `rm` may be aliased to include `-i` (interactive) mode on some systems, causing the agent to hang indefinitely waiting for y/n input.
|
|
20
|
+
|
|
21
|
+
**Use these forms instead:**
|
|
22
|
+
```bash
|
|
23
|
+
# Force overwrite without prompting
|
|
24
|
+
cp -f source dest # NOT: cp source dest
|
|
25
|
+
mv -f source dest # NOT: mv source dest
|
|
26
|
+
rm -f file # NOT: rm file
|
|
27
|
+
|
|
28
|
+
# For recursive operations
|
|
29
|
+
rm -rf directory # NOT: rm -r directory
|
|
30
|
+
cp -rf source dest # NOT: cp -r source dest
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
**Other commands that may prompt:**
|
|
34
|
+
- `scp` - use `-o BatchMode=yes` for non-interactive
|
|
35
|
+
- `ssh` - use `-o BatchMode=yes` to fail instead of prompting
|
|
36
|
+
- `apt-get` - use `-y` flag
|
|
37
|
+
- `brew` - use `HOMEBREW_NO_AUTO_UPDATE=1` env var
|
|
38
|
+
|
|
39
|
+
<!-- BEGIN BEADS INTEGRATION v:1 profile:minimal hash:ca08a54f -->
|
|
40
|
+
## Beads Issue Tracker
|
|
41
|
+
|
|
42
|
+
This project uses **bd (beads)** for issue tracking. Run `bd prime` to see full workflow context and commands.
|
|
43
|
+
|
|
44
|
+
### Quick Reference
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
bd ready # Find available work
|
|
48
|
+
bd show <id> # View issue details
|
|
49
|
+
bd update <id> --claim # Claim work
|
|
50
|
+
bd close <id> # Complete work
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Rules
|
|
54
|
+
|
|
55
|
+
- Use `bd` for ALL task tracking — do NOT use TodoWrite, TaskCreate, or markdown TODO lists
|
|
56
|
+
- Run `bd prime` for detailed command reference and session close protocol
|
|
57
|
+
- Use `bd remember` for persistent knowledge — do NOT use MEMORY.md files
|
|
58
|
+
|
|
59
|
+
## Session Completion
|
|
60
|
+
|
|
61
|
+
**When ending a work session**, you MUST complete ALL steps below. Work is NOT complete until `git push` succeeds.
|
|
62
|
+
|
|
63
|
+
**MANDATORY WORKFLOW:**
|
|
64
|
+
|
|
65
|
+
1. **File issues for remaining work** - Create issues for anything that needs follow-up
|
|
66
|
+
2. **Run quality gates** (if code changed) - Tests, linters, builds
|
|
67
|
+
3. **Update issue status** - Close finished work, update in-progress items
|
|
68
|
+
4. **PUSH TO REMOTE** - This is MANDATORY:
|
|
69
|
+
```bash
|
|
70
|
+
git pull --rebase
|
|
71
|
+
bd dolt push
|
|
72
|
+
git push
|
|
73
|
+
git status # MUST show "up to date with origin"
|
|
74
|
+
```
|
|
75
|
+
5. **Clean up** - Clear stashes, prune remote branches
|
|
76
|
+
6. **Verify** - All changes committed AND pushed
|
|
77
|
+
7. **Hand off** - Provide context for next session
|
|
78
|
+
|
|
79
|
+
**CRITICAL RULES:**
|
|
80
|
+
- Work is NOT complete until `git push` succeeds
|
|
81
|
+
- NEVER stop before pushing - that leaves work stranded locally
|
|
82
|
+
- NEVER say "ready to push when you are" - YOU must push
|
|
83
|
+
- If push fails, resolve and retry until it succeeds
|
|
84
|
+
<!-- END BEADS INTEGRATION -->
|
llm_leash-1.3.0/API.md
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
# API Reference
|
|
2
|
+
|
|
3
|
+
Stable, semver-guaranteed surface for `llm-leash` v1.0.
|
|
4
|
+
|
|
5
|
+
Anything imported from `llm_leash` at the top level is **stable**: breaking
|
|
6
|
+
changes require a major-version bump. Everything else (`llm_leash.adapters.*`,
|
|
7
|
+
`llm_leash.audit.*`, `llm_leash.policy.*`) is **public but versioned at the
|
|
8
|
+
minor level**; new methods and parameters can land in minor releases, but
|
|
9
|
+
existing signatures will not change without a deprecation cycle.
|
|
10
|
+
|
|
11
|
+
## Top-level imports
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from llm_leash import (
|
|
15
|
+
Firewall, # facade
|
|
16
|
+
# exception types
|
|
17
|
+
LeashKilled, LeashBlocked, LeashRejectedByHuman,
|
|
18
|
+
# rule types
|
|
19
|
+
Rule, Decision, Action, PolicyContext, ToolCall, PolicyEngine,
|
|
20
|
+
# built-in rules
|
|
21
|
+
BlockedPatterns, BlockedShell, BlockedSql, BudgetThreshold,
|
|
22
|
+
HitlThreshold, PiiRedactor,
|
|
23
|
+
# HITL
|
|
24
|
+
HitlGate, InMemoryHitlGate, WebhookHitlGate,
|
|
25
|
+
# backing stores
|
|
26
|
+
RedisBudgetStore, RedisKillRegistry,
|
|
27
|
+
SQLiteBudgetStore, SQLiteKillRegistry,
|
|
28
|
+
)
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## `Firewall`
|
|
32
|
+
|
|
33
|
+
The single entry point.
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
Firewall(
|
|
37
|
+
*,
|
|
38
|
+
budget_usd: float | None = None,
|
|
39
|
+
budget_soft_usd: float | None = None,
|
|
40
|
+
audit_log: str | None = None,
|
|
41
|
+
kill_registry: KillRegistry | None = None,
|
|
42
|
+
session_id: str | None = None,
|
|
43
|
+
tenant_id: str | None = None,
|
|
44
|
+
rules: list[Rule] | None = None,
|
|
45
|
+
pii_redactor: PiiRedactor | None = None,
|
|
46
|
+
hitl_gate: HitlGate | None = None,
|
|
47
|
+
)
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Methods
|
|
51
|
+
|
|
52
|
+
| Method | Returns | Description |
|
|
53
|
+
|---|---|---|
|
|
54
|
+
| `wrap(client)` | wrapped client | Dispatches to the right adapter by client shape |
|
|
55
|
+
| `await kill(reason)` | `None` | Marks the session killed; next call raises `LeashKilled` |
|
|
56
|
+
| `await cumulative_usd()` | `float` | Total spend for this session so far |
|
|
57
|
+
| `await aclose()` | `None` | Flushes audit log |
|
|
58
|
+
|
|
59
|
+
### Adapter dispatch
|
|
60
|
+
|
|
61
|
+
`fw.wrap(client)` detects the client shape and chooses an adapter:
|
|
62
|
+
|
|
63
|
+
| Client shape | Adapter |
|
|
64
|
+
|---|---|
|
|
65
|
+
| `client.messages.create(...)` | **Anthropic** |
|
|
66
|
+
| `client.chat.completions.create(...)` | **OpenAI** |
|
|
67
|
+
| `client.invoke(messages)` + `.model_name` or `.model` | **LangChain / LangGraph** |
|
|
68
|
+
| `client.call_tool(name, args)` | **MCP** |
|
|
69
|
+
| `client.call(messages)` + `.model` | **CrewAI** |
|
|
70
|
+
| Unknown | pass-through (no wrapping) |
|
|
71
|
+
|
|
72
|
+
The wrapped client preserves the original surface — all unrelated attributes
|
|
73
|
+
delegate to the underlying object.
|
|
74
|
+
|
|
75
|
+
## Exceptions
|
|
76
|
+
|
|
77
|
+
| Exception | When |
|
|
78
|
+
|---|---|
|
|
79
|
+
| `LeashKilled` | Hard budget cap exceeded **or** kill switch tripped |
|
|
80
|
+
| `LeashBlocked` | A policy rule returned `action="block"` |
|
|
81
|
+
| `LeashRejectedByHuman` | A HITL gate denied (explicitly or by timeout) |
|
|
82
|
+
|
|
83
|
+
Each carries `rule_id: str` and `reason: str`.
|
|
84
|
+
|
|
85
|
+
## Rules
|
|
86
|
+
|
|
87
|
+
A `Rule` is any object implementing the protocol:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
class MyRule:
|
|
91
|
+
id: str = "my_rule"
|
|
92
|
+
def evaluate(self, call: ToolCall, ctx: PolicyContext) -> Decision | None: ...
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Return `None` to abstain; return `Decision(action=..., rule_id=..., reason=...)`
|
|
96
|
+
to act. The engine evaluates rules in order; the **first non-`None`** decision
|
|
97
|
+
wins.
|
|
98
|
+
|
|
99
|
+
`Decision.action` is one of: `"allow"`, `"block"`, `"warn"`, `"hitl"`.
|
|
100
|
+
|
|
101
|
+
### Built-in rules
|
|
102
|
+
|
|
103
|
+
| Rule | Purpose |
|
|
104
|
+
|---|---|
|
|
105
|
+
| `BlockedPatterns([regex, ...])` | Block requests whose stringified args match any pattern |
|
|
106
|
+
| `BlockedSql(mode="read_only" \| "no_ddl", extra_deny=[...])` | SQL keyword denylist |
|
|
107
|
+
| `BlockedShell()` | 10-pattern shell command blocklist (rm -rf, curl\|bash, etc.) |
|
|
108
|
+
| `BudgetThreshold(threshold_pct=0.8)` | Emit `warn` decisions as the cap approaches |
|
|
109
|
+
| `HitlThreshold(threshold_pct=0.8)` | Trigger HITL approval as the cap approaches |
|
|
110
|
+
| `PiiRedactor()` | Not a rule — passed via `pii_redactor=` to scrub args |
|
|
111
|
+
|
|
112
|
+
### Custom rule example
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from llm_leash import Decision, Firewall
|
|
116
|
+
|
|
117
|
+
class DenyToolByName:
|
|
118
|
+
id = "deny_tool"
|
|
119
|
+
def __init__(self, names: set[str]) -> None:
|
|
120
|
+
self._names = names
|
|
121
|
+
def evaluate(self, call, ctx):
|
|
122
|
+
if call.tool in self._names:
|
|
123
|
+
return Decision(action="block", rule_id=self.id,
|
|
124
|
+
reason=f"tool {call.tool} is on the denylist")
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
fw = Firewall(rules=[DenyToolByName({"shell_exec", "delete_file"})])
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Human-in-the-loop
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
class HitlGate(Protocol):
|
|
134
|
+
async def request(self, call: ToolCall, decision: Decision) -> None: ...
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
A call that returns from `request()` is **approved**. To reject, raise
|
|
138
|
+
`LeashRejectedByHuman`.
|
|
139
|
+
|
|
140
|
+
Built-in implementations:
|
|
141
|
+
|
|
142
|
+
- **`InMemoryHitlGate(default="approve" | "reject")`** — fixtures and tests.
|
|
143
|
+
- **`WebhookHitlGate(url, timeout_secs=60, poll_interval_secs=2)`** —
|
|
144
|
+
`POST {url}/requests` then poll `GET {url}/requests/{id}`. Stdlib `urllib`,
|
|
145
|
+
zero deps.
|
|
146
|
+
|
|
147
|
+
If a rule returns `Decision(action="hitl")` and **no gate is configured**,
|
|
148
|
+
the call is **default-denied** with `LeashRejectedByHuman`.
|
|
149
|
+
|
|
150
|
+
## Audit log
|
|
151
|
+
|
|
152
|
+
Every model call and tool call writes one JSON line, sha256-chained:
|
|
153
|
+
|
|
154
|
+
```jsonc
|
|
155
|
+
{
|
|
156
|
+
"kind": "model_call",
|
|
157
|
+
"ts": "2026-05-16T12:34:56.789Z",
|
|
158
|
+
"session_id": "s_…", "tenant_id": null,
|
|
159
|
+
"seq": 42, "prev_hash": "9af…", "hash": "0c1…",
|
|
160
|
+
"provider": "anthropic", "model": "claude-opus-4-7",
|
|
161
|
+
"input_tokens": 1024, "output_tokens": 256,
|
|
162
|
+
"cost_usd": 0.1234,
|
|
163
|
+
"request_hash": "…", "response_hash": "…"
|
|
164
|
+
}
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
Event kinds: `model_call`, `tool_call`, `budget`, `kill`.
|
|
168
|
+
|
|
169
|
+
### CLI
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
llm-leash verify audit.jsonl # exits 0 if chain is intact, 2 if broken
|
|
173
|
+
llm-leash replay audit.jsonl # prints events as JSON lines
|
|
174
|
+
llm-leash export audit.jsonl --format csv # CSV with stable column order
|
|
175
|
+
llm-leash export audit.jsonl --format json # full JSON array
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Programmatic access
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
from llm_leash.audit.replay import replay, export_csv, export_json
|
|
182
|
+
from llm_leash.audit.verify import verify_chain
|
|
183
|
+
|
|
184
|
+
verify_chain("audit.jsonl") # → int (event count); raises ChainBroken
|
|
185
|
+
list(replay("audit.jsonl")) # iterator of event dicts
|
|
186
|
+
export_csv("audit.jsonl", sys.stdout) # CSV writer
|
|
187
|
+
export_json("audit.jsonl", sys.stdout) # JSON-array writer
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
## Backing stores
|
|
191
|
+
|
|
192
|
+
By default, budget and kill state live in-process and are lost on restart.
|
|
193
|
+
For multi-worker production, swap in a persistent backend:
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
from llm_leash import Firewall, SQLiteBudgetStore, SQLiteKillRegistry
|
|
197
|
+
|
|
198
|
+
budget_store = SQLiteBudgetStore("/var/lib/myapp/budget.db")
|
|
199
|
+
kill_registry = SQLiteKillRegistry("/var/lib/myapp/kill.db")
|
|
200
|
+
|
|
201
|
+
fw = Firewall(budget_usd=100.0, kill_registry=kill_registry)
|
|
202
|
+
fw._budget._store = budget_store # explicit swap; constructor wiring lands in v1.1
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
Redis equivalents — `RedisBudgetStore(redis_client)` / `RedisKillRegistry(redis_client)`
|
|
206
|
+
— accept any duck-typed Redis client (e.g. `redis.Redis`, `fakeredis.FakeRedis`).
|
|
207
|
+
|
|
208
|
+
## Type definitions
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
@dataclass(frozen=True)
|
|
212
|
+
class ToolCall:
|
|
213
|
+
tool: str
|
|
214
|
+
args: dict[str, Any]
|
|
215
|
+
session_id: str
|
|
216
|
+
tenant_id: str | None
|
|
217
|
+
|
|
218
|
+
@dataclass(frozen=True)
|
|
219
|
+
class PolicyContext:
|
|
220
|
+
cumulative_usd: float
|
|
221
|
+
budget_cap_usd: float | None
|
|
222
|
+
soft_cap_usd: float | None
|
|
223
|
+
tenant_id: str | None
|
|
224
|
+
|
|
225
|
+
@dataclass(frozen=True)
|
|
226
|
+
class Decision:
|
|
227
|
+
action: Action # "allow" | "block" | "warn" | "hitl"
|
|
228
|
+
rule_id: str
|
|
229
|
+
reason: str
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
## Version
|
|
233
|
+
|
|
234
|
+
```python
|
|
235
|
+
import llm_leash
|
|
236
|
+
llm_leash.__version__ # → "1.0.0"
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
## Stability promise
|
|
240
|
+
|
|
241
|
+
From v1.0:
|
|
242
|
+
- The top-level exports listed above are **semver-stable**.
|
|
243
|
+
- The audit JSONL schema is **stable**: existing fields will not be removed
|
|
244
|
+
or change meaning. New fields may be added.
|
|
245
|
+
- The CLI sub-commands `verify`, `replay`, `export` are **stable**.
|
|
246
|
+
- Breaking changes in any of the above require a **major-version** bump.
|
|
247
|
+
- Anything in `llm_leash.adapters.*`, `llm_leash.audit.*`, `llm_leash.policy.*`,
|
|
248
|
+
`llm_leash.budget.*`, `llm_leash.kill.*` is **public but versioned at the
|
|
249
|
+
minor level** — back-compat across minor releases unless a deprecation
|
|
250
|
+
warning was emitted in the prior minor.
|