brooder 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- brooder-0.1.0/.github/ISSUE_TEMPLATE/bug_report.md +23 -0
- brooder-0.1.0/.github/ISSUE_TEMPLATE/feature_request.md +15 -0
- brooder-0.1.0/.github/dependabot.yml +23 -0
- brooder-0.1.0/.github/pull_request_template.md +11 -0
- brooder-0.1.0/.github/workflows/ci.yml +76 -0
- brooder-0.1.0/.github/workflows/release.yml +55 -0
- brooder-0.1.0/.gitignore +85 -0
- brooder-0.1.0/.pre-commit-config.yaml +28 -0
- brooder-0.1.0/CHANGELOG.md +96 -0
- brooder-0.1.0/CONTRIBUTING.md +64 -0
- brooder-0.1.0/DCO +34 -0
- brooder-0.1.0/LICENSE +201 -0
- brooder-0.1.0/LICENSING.md +108 -0
- brooder-0.1.0/NOTICE +7 -0
- brooder-0.1.0/PKG-INFO +338 -0
- brooder-0.1.0/README.md +291 -0
- brooder-0.1.0/ROADMAP.md +134 -0
- brooder-0.1.0/SECURITY.md +14 -0
- brooder-0.1.0/TRADEMARKS.md +27 -0
- brooder-0.1.0/action.yml +100 -0
- brooder-0.1.0/assets/banner.svg +21 -0
- brooder-0.1.0/assets/demo.svg +21 -0
- brooder-0.1.0/assets/record-demo.sh +24 -0
- brooder-0.1.0/design/framework-adapters.md +295 -0
- brooder-0.1.0/design/trajectory.md +142 -0
- brooder-0.1.0/docs/api.md +45 -0
- brooder-0.1.0/docs/index.md +48 -0
- brooder-0.1.0/examples/flaky_agent.py +33 -0
- brooder-0.1.0/examples/github-action.yml +28 -0
- brooder-0.1.0/examples/loop_agent.py +60 -0
- brooder-0.1.0/examples/regressing_agent.py +65 -0
- brooder-0.1.0/examples/stable_agent.py +36 -0
- brooder-0.1.0/mkdocs.yml +50 -0
- brooder-0.1.0/pyproject.toml +121 -0
- brooder-0.1.0/src/brooder/__init__.py +31 -0
- brooder-0.1.0/src/brooder/analysis.py +79 -0
- brooder-0.1.0/src/brooder/cli.py +281 -0
- brooder-0.1.0/src/brooder/config.py +88 -0
- brooder-0.1.0/src/brooder/diffing.py +217 -0
- brooder-0.1.0/src/brooder/errors.py +31 -0
- brooder-0.1.0/src/brooder/integrations/__init__.py +75 -0
- brooder-0.1.0/src/brooder/integrations/anthropic.py +46 -0
- brooder-0.1.0/src/brooder/integrations/base.py +170 -0
- brooder-0.1.0/src/brooder/integrations/bedrock.py +49 -0
- brooder-0.1.0/src/brooder/integrations/claude_agent.py +164 -0
- brooder-0.1.0/src/brooder/integrations/google.py +61 -0
- brooder-0.1.0/src/brooder/integrations/langchain.py +321 -0
- brooder-0.1.0/src/brooder/integrations/openai.py +43 -0
- brooder-0.1.0/src/brooder/integrations/openai_agents.py +208 -0
- brooder-0.1.0/src/brooder/integrations/otel.py +216 -0
- brooder-0.1.0/src/brooder/judges.py +109 -0
- brooder-0.1.0/src/brooder/log.py +33 -0
- brooder-0.1.0/src/brooder/metrics.py +116 -0
- brooder-0.1.0/src/brooder/models.py +148 -0
- brooder-0.1.0/src/brooder/py.typed +1 -0
- brooder-0.1.0/src/brooder/recorder.py +342 -0
- brooder-0.1.0/src/brooder/report.py +261 -0
- brooder-0.1.0/src/brooder/storage.py +150 -0
- brooder-0.1.0/tests/test_action.py +57 -0
- brooder-0.1.0/tests/test_analysis.py +54 -0
- brooder-0.1.0/tests/test_async_capture.py +209 -0
- brooder-0.1.0/tests/test_capture_core.py +168 -0
- brooder-0.1.0/tests/test_claude_agent.py +174 -0
- brooder-0.1.0/tests/test_cli.py +194 -0
- brooder-0.1.0/tests/test_config.py +44 -0
- brooder-0.1.0/tests/test_diffing.py +54 -0
- brooder-0.1.0/tests/test_integrations.py +202 -0
- brooder-0.1.0/tests/test_judges.py +31 -0
- brooder-0.1.0/tests/test_langchain.py +192 -0
- brooder-0.1.0/tests/test_metrics.py +126 -0
- brooder-0.1.0/tests/test_openai_agents.py +246 -0
- brooder-0.1.0/tests/test_otel.py +203 -0
- brooder-0.1.0/tests/test_output.py +99 -0
- brooder-0.1.0/tests/test_recorder.py +97 -0
- brooder-0.1.0/tests/test_storage.py +39 -0
- brooder-0.1.0/tests/test_trajectory.py +23 -0
- brooder-0.1.0/tests/test_trajectory_diff.py +180 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Bug report
|
|
3
|
+
about: Something isn't working as expected
|
|
4
|
+
title: "[bug] "
|
|
5
|
+
labels: bug
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
**What happened**
|
|
9
|
+
A clear description of the bug.
|
|
10
|
+
|
|
11
|
+
**To reproduce**
|
|
12
|
+
Steps / minimal agent script + the exact `brooder` command.
|
|
13
|
+
|
|
14
|
+
**Expected behavior**
|
|
15
|
+
What you expected instead.
|
|
16
|
+
|
|
17
|
+
**Environment**
|
|
18
|
+
- brooder version (`brooder --version`):
|
|
19
|
+
- Python version:
|
|
20
|
+
- OS:
|
|
21
|
+
|
|
22
|
+
**Diff / output**
|
|
23
|
+
Paste the relevant `brooder` output (use `-v` for debug logs).
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Feature request
|
|
3
|
+
about: Suggest an idea for Brooder
|
|
4
|
+
title: "[feat] "
|
|
5
|
+
labels: enhancement
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
**Problem**
|
|
9
|
+
What are you trying to do that Brooder doesn't support today?
|
|
10
|
+
|
|
11
|
+
**Proposed solution**
|
|
12
|
+
What you'd like to see (a command, a config option, an integration…).
|
|
13
|
+
|
|
14
|
+
**Alternatives**
|
|
15
|
+
Anything you've tried or considered.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
updates:
|
|
3
|
+
# Keep the GitHub Actions used by ci.yml / release.yml / the composite action current.
|
|
4
|
+
# Grouped into a single PR so Dependabot doesn't open one PR per action.
|
|
5
|
+
- package-ecosystem: github-actions
|
|
6
|
+
directory: "/"
|
|
7
|
+
schedule:
|
|
8
|
+
interval: weekly
|
|
9
|
+
commit-message:
|
|
10
|
+
prefix: ci
|
|
11
|
+
groups:
|
|
12
|
+
actions:
|
|
13
|
+
patterns: ["*"]
|
|
14
|
+
# Keep Python dependencies current (grouped to avoid PR noise).
|
|
15
|
+
- package-ecosystem: pip
|
|
16
|
+
directory: "/"
|
|
17
|
+
schedule:
|
|
18
|
+
interval: weekly
|
|
19
|
+
commit-message:
|
|
20
|
+
prefix: deps
|
|
21
|
+
groups:
|
|
22
|
+
python-deps:
|
|
23
|
+
patterns: ["*"]
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
## What & why
|
|
2
|
+
|
|
3
|
+
Briefly describe the change and the motivation.
|
|
4
|
+
|
|
5
|
+
## Checklist
|
|
6
|
+
|
|
7
|
+
- [ ] `ruff check .` and `ruff format --check .` pass
|
|
8
|
+
- [ ] `mypy` passes (strict)
|
|
9
|
+
- [ ] `pytest` passes; new/changed behavior has a test
|
|
10
|
+
- [ ] Updated `CHANGELOG.md` under **Unreleased** if user-facing
|
|
11
|
+
- [ ] Docs/README updated if needed
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
name: ci
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
lint:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
- uses: actions/setup-python@v5
|
|
14
|
+
with:
|
|
15
|
+
python-version: "3.12"
|
|
16
|
+
- run: pip install -e ".[dev]"
|
|
17
|
+
- name: Ruff lint
|
|
18
|
+
run: ruff check .
|
|
19
|
+
- name: Ruff format check
|
|
20
|
+
run: ruff format --check .
|
|
21
|
+
- name: Mypy (strict)
|
|
22
|
+
run: mypy
|
|
23
|
+
|
|
24
|
+
test:
|
|
25
|
+
runs-on: ubuntu-latest
|
|
26
|
+
strategy:
|
|
27
|
+
matrix:
|
|
28
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
29
|
+
steps:
|
|
30
|
+
- uses: actions/checkout@v4
|
|
31
|
+
- uses: actions/setup-python@v5
|
|
32
|
+
with:
|
|
33
|
+
python-version: ${{ matrix.python-version }}
|
|
34
|
+
- run: pip install -e ".[dev]"
|
|
35
|
+
- name: Unit tests
|
|
36
|
+
run: pytest --cov=brooder --cov-report=term-missing
|
|
37
|
+
- name: Smoke test the CLI (record then re-run the stable agent)
|
|
38
|
+
run: |
|
|
39
|
+
brooder record examples/stable_agent.py
|
|
40
|
+
brooder run --model gpt-5-new examples/stable_agent.py
|
|
41
|
+
|
|
42
|
+
docs:
|
|
43
|
+
runs-on: ubuntu-latest
|
|
44
|
+
steps:
|
|
45
|
+
- uses: actions/checkout@v4
|
|
46
|
+
- uses: actions/setup-python@v5
|
|
47
|
+
with:
|
|
48
|
+
python-version: "3.12"
|
|
49
|
+
- run: pip install -e ".[docs]"
|
|
50
|
+
- name: Build docs
|
|
51
|
+
run: mkdocs build
|
|
52
|
+
|
|
53
|
+
licenses:
|
|
54
|
+
# Keep the Apache-2.0 core free of strong copyleft (GPL/AGPL/SSPL). LGPL is allowed.
|
|
55
|
+
runs-on: ubuntu-latest
|
|
56
|
+
steps:
|
|
57
|
+
- uses: actions/checkout@v4
|
|
58
|
+
- uses: actions/setup-python@v5
|
|
59
|
+
with:
|
|
60
|
+
python-version: "3.12"
|
|
61
|
+
- name: Install runtime deps only
|
|
62
|
+
run: python -m pip install .
|
|
63
|
+
- name: Check dependency licenses
|
|
64
|
+
run: |
|
|
65
|
+
python -m pip install pip-licenses
|
|
66
|
+
echo "Dependency licenses:"
|
|
67
|
+
pip-licenses --format=markdown
|
|
68
|
+
copyleft=$(pip-licenses --format=csv \
|
|
69
|
+
| grep -Eiv 'lgpl|lesser general public' \
|
|
70
|
+
| grep -Ei 'gpl|affero|sspl|server side public' || true)
|
|
71
|
+
if [ -n "$copyleft" ]; then
|
|
72
|
+
echo "::error::strong copyleft dependency detected (Apache-2.0 core must stay clean):"
|
|
73
|
+
echo "$copyleft"
|
|
74
|
+
exit 1
|
|
75
|
+
fi
|
|
76
|
+
echo "OK: no strong copyleft in dependencies."
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Publish to PyPI on a version tag, via PyPI Trusted Publishing (OIDC) — no API tokens.
|
|
2
|
+
#
|
|
3
|
+
# Requires a PyPI Trusted Publisher configured for this repo with:
|
|
4
|
+
# workflow = release.yml environment = pypi
|
|
5
|
+
# (Set it at https://pypi.org/manage/project/brooder/settings/publishing/ once the project exists,
|
|
6
|
+
# or as a pending publisher before the first release.)
|
|
7
|
+
#
|
|
8
|
+
# Release flow:
|
|
9
|
+
# 1. bump `version` in pyproject.toml
|
|
10
|
+
# 2. git tag -a vX.Y.Z -m "brooder X.Y.Z" && git push origin vX.Y.Z
|
|
11
|
+
# 3. (major tag for the Action) git tag -f vX && git push -f origin vX
|
|
12
|
+
name: release
|
|
13
|
+
|
|
14
|
+
on:
|
|
15
|
+
push:
|
|
16
|
+
tags:
|
|
17
|
+
- "v*.*.*" # semantic version tags only (the moving `v1` Action tag must not trigger a publish)
|
|
18
|
+
|
|
19
|
+
permissions:
|
|
20
|
+
contents: read
|
|
21
|
+
|
|
22
|
+
jobs:
|
|
23
|
+
build:
|
|
24
|
+
runs-on: ubuntu-latest
|
|
25
|
+
steps:
|
|
26
|
+
- uses: actions/checkout@v4
|
|
27
|
+
- uses: actions/setup-python@v5
|
|
28
|
+
with:
|
|
29
|
+
python-version: "3.12"
|
|
30
|
+
- name: Build sdist + wheel
|
|
31
|
+
run: |
|
|
32
|
+
python -m pip install --upgrade build
|
|
33
|
+
python -m build
|
|
34
|
+
- name: Validate metadata
|
|
35
|
+
run: |
|
|
36
|
+
python -m pip install --upgrade twine
|
|
37
|
+
python -m twine check dist/*
|
|
38
|
+
- uses: actions/upload-artifact@v4
|
|
39
|
+
with:
|
|
40
|
+
name: dist
|
|
41
|
+
path: dist/
|
|
42
|
+
|
|
43
|
+
publish:
|
|
44
|
+
needs: build
|
|
45
|
+
runs-on: ubuntu-latest
|
|
46
|
+
environment: pypi # must match the PyPI Trusted Publisher's environment
|
|
47
|
+
permissions:
|
|
48
|
+
id-token: write # OIDC token for Trusted Publishing
|
|
49
|
+
steps:
|
|
50
|
+
- uses: actions/download-artifact@v4
|
|
51
|
+
with:
|
|
52
|
+
name: dist
|
|
53
|
+
path: dist/
|
|
54
|
+
- name: Publish to PyPI
|
|
55
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
brooder-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# ── Brooder local artifacts ────────────────────────────────────────────────
|
|
2
|
+
# Baselines under .brooder/baselines/ ARE committed (they're your golden records).
|
|
3
|
+
# Everything else Brooder writes locally is not.
|
|
4
|
+
.brooder/runs/
|
|
5
|
+
.brooder/results/
|
|
6
|
+
.brooder/cache/
|
|
7
|
+
|
|
8
|
+
# ── Python ──────────────────────────────────────────────────────────────────
|
|
9
|
+
__pycache__/
|
|
10
|
+
*.py[cod]
|
|
11
|
+
*$py.class
|
|
12
|
+
*.so
|
|
13
|
+
.Python
|
|
14
|
+
build/
|
|
15
|
+
develop-eggs/
|
|
16
|
+
dist/
|
|
17
|
+
downloads/
|
|
18
|
+
eggs/
|
|
19
|
+
.eggs/
|
|
20
|
+
lib/
|
|
21
|
+
lib64/
|
|
22
|
+
parts/
|
|
23
|
+
sdist/
|
|
24
|
+
var/
|
|
25
|
+
wheels/
|
|
26
|
+
share/python-wheels/
|
|
27
|
+
*.egg-info/
|
|
28
|
+
.installed.cfg
|
|
29
|
+
*.egg
|
|
30
|
+
MANIFEST
|
|
31
|
+
|
|
32
|
+
# ── Virtual environments ────────────────────────────────────────────────────
|
|
33
|
+
.venv/
|
|
34
|
+
venv/
|
|
35
|
+
env/
|
|
36
|
+
ENV/
|
|
37
|
+
.python-version
|
|
38
|
+
|
|
39
|
+
# ── Environment / secrets ───────────────────────────────────────────────────
|
|
40
|
+
.env
|
|
41
|
+
.env.*
|
|
42
|
+
!.env.example
|
|
43
|
+
*.pem
|
|
44
|
+
*.key
|
|
45
|
+
|
|
46
|
+
# ── Testing / typing / linting caches ───────────────────────────────────────
|
|
47
|
+
.pytest_cache/
|
|
48
|
+
.mypy_cache/
|
|
49
|
+
.ruff_cache/
|
|
50
|
+
.dmypy.json
|
|
51
|
+
.tox/
|
|
52
|
+
.nox/
|
|
53
|
+
.cache/
|
|
54
|
+
htmlcov/
|
|
55
|
+
.coverage
|
|
56
|
+
.coverage.*
|
|
57
|
+
coverage.xml
|
|
58
|
+
*.cover
|
|
59
|
+
|
|
60
|
+
# ── Build backends / packaging ──────────────────────────────────────────────
|
|
61
|
+
pip-wheel-metadata/
|
|
62
|
+
.pdm-python
|
|
63
|
+
.pdm-build/
|
|
64
|
+
|
|
65
|
+
# ── Docs (MkDocs output) ────────────────────────────────────────────────────
|
|
66
|
+
site/
|
|
67
|
+
|
|
68
|
+
# ── Node / TypeScript SDK (future) ──────────────────────────────────────────
|
|
69
|
+
node_modules/
|
|
70
|
+
npm-debug.log*
|
|
71
|
+
yarn-error.log*
|
|
72
|
+
*.tsbuildinfo
|
|
73
|
+
|
|
74
|
+
# ── OS ──────────────────────────────────────────────────────────────────────
|
|
75
|
+
.DS_Store
|
|
76
|
+
Thumbs.db
|
|
77
|
+
desktop.ini
|
|
78
|
+
|
|
79
|
+
# ── Editors / IDEs ──────────────────────────────────────────────────────────
|
|
80
|
+
.idea/
|
|
81
|
+
.vscode/
|
|
82
|
+
*.swp
|
|
83
|
+
*.swo
|
|
84
|
+
*~
|
|
85
|
+
.claude/
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# See https://pre-commit.com. Install with: pre-commit install
|
|
2
|
+
repos:
|
|
3
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
4
|
+
rev: v4.6.0
|
|
5
|
+
hooks:
|
|
6
|
+
- id: trailing-whitespace
|
|
7
|
+
- id: end-of-file-fixer
|
|
8
|
+
- id: check-yaml
|
|
9
|
+
- id: check-toml
|
|
10
|
+
- id: check-added-large-files
|
|
11
|
+
|
|
12
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
13
|
+
rev: v0.6.9
|
|
14
|
+
hooks:
|
|
15
|
+
- id: ruff
|
|
16
|
+
args: [--fix]
|
|
17
|
+
- id: ruff-format
|
|
18
|
+
|
|
19
|
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
20
|
+
rev: v1.11.2
|
|
21
|
+
hooks:
|
|
22
|
+
- id: mypy
|
|
23
|
+
files: ^src/
|
|
24
|
+
additional_dependencies:
|
|
25
|
+
- pydantic>=2.5
|
|
26
|
+
- types-PyYAML
|
|
27
|
+
- rich
|
|
28
|
+
- typer
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project are documented here. The format is based on
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to
|
|
5
|
+
[Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
|
+
|
|
7
|
+
## [Unreleased]
|
|
8
|
+
|
|
9
|
+
## [0.1.0] — 2026-07-02
|
|
10
|
+
|
|
11
|
+
First public release.
|
|
12
|
+
|
|
13
|
+
### Added
|
|
14
|
+
- Python SDK: `@brooder.record` decorator and `brooder.tool_call(...)` capture.
|
|
15
|
+
- CLI: `init`, `record`, `run`, `diff`, `approve`, `ci`, `migrate`, plus `--version` / `-v`.
|
|
16
|
+
- Structural behavioral diff engine (tool-call sequence, args, final output) + stability score.
|
|
17
|
+
- **Model Migration Report** (`brooder migrate --from X --to Y`).
|
|
18
|
+
- **Flakiness detection** (`brooder run --runs N`) — runs each case N times, verdict `FLAKY`.
|
|
19
|
+
- **Pluggable output judge** (`judge: exact | llm`) so equivalent wording isn't flagged as a regression.
|
|
20
|
+
- **Provider auto-capture** (`brooder.instrument(client)`) — records the model's tool calls
|
|
21
|
+
automatically for OpenAI, Azure OpenAI, Anthropic, AWS Bedrock, and Google (Gemini/Vertex).
|
|
22
|
+
- **First-class trajectory (phase 1)** — a `Run` captures an ordered `Step` trajectory
|
|
23
|
+
(TURN / TOOL / FINAL), exposed via `run.turns` and `run.step_count`. New `brooder.turn()` marks
|
|
24
|
+
a model turn; provider auto-capture emits turns and tool steps.
|
|
25
|
+
- **Trajectory-aware diff (phase 2)** — the diff engine LCS-aligns the baseline and current
|
|
26
|
+
trajectories, so an added, dropped, or reordered step is reported at the exact position where the
|
|
27
|
+
path diverged (`trajectory[N]`) instead of cascading. Turn- and step-counts surface as
|
|
28
|
+
`trajectory.turns` / `trajectory.steps` signals, the report leads with a
|
|
29
|
+
*"path diverged at step N"* headline, and the summary table gains a Trajectory column (so the
|
|
30
|
+
Model Migration Report shows loop/turn regressions at a glance).
|
|
31
|
+
- **Trajectory guardrails & observations (phase 3)** — `trajectory.max_steps` aborts a run that
|
|
32
|
+
exceeds the cap and records it as a `trajectory.runaway` regression, so infinite loops fail fast
|
|
33
|
+
instead of hanging. An agent that returns without a final answer is captured as a diffable
|
|
34
|
+
`trajectory.gave_up` terminal. Tool observations (results) can be diffed through the judge by
|
|
35
|
+
opting in with `trajectory.observations: true` — off by default because tool output is noisy.
|
|
36
|
+
- **Adapter capture core (P4a)** — a `RunHandle` + external-id registry (`brooder.recorder.open_run`
|
|
37
|
+
/ `get_run`) that lets framework adapters open a run driven by the framework's own events (a trace
|
|
38
|
+
or session), append steps as they arrive — possibly out of order and from other threads — and
|
|
39
|
+
finish it, all feeding the same `Step` trajectory. `@record` and provider auto-capture now run on
|
|
40
|
+
this one lifecycle. Adapter-owned runs *flag* a `max_steps` runaway instead of raising (they can't
|
|
41
|
+
abort the framework's loop). Foundation for the native framework adapters
|
|
42
|
+
(see [design/framework-adapters.md](design/framework-adapters.md)).
|
|
43
|
+
- **OpenTelemetry GenAI ingestion (P4b)** — `brooder.integrations.otel.BrooderSpanProcessor`, a
|
|
44
|
+
duck-typed OTel `SpanProcessor` that turns the GenAI/agent spans your stack already emits into a
|
|
45
|
+
trajectory (inference → TURN, `execute_tool` → TOOL, the agent-root's input/output → case identity
|
|
46
|
+
and FINAL). One adapter covers **LangGraph / CrewAI / AutoGen** and slots into existing OTel
|
|
47
|
+
pipelines. Spans are buffered per trace and sorted by start time at finalize, so out-of-order /
|
|
48
|
+
cross-thread delivery is handled; incomplete traces drain on `force_flush` / `shutdown`. Tool
|
|
49
|
+
*names* are always captured; when content capture is off it degrades gracefully and warns once
|
|
50
|
+
(set a `brooder.case_id` span attribute for a stable case id). Importing the module does not
|
|
51
|
+
require `opentelemetry` to be installed.
|
|
52
|
+
- **Claude Agent SDK capture (P4c)** — `brooder.claude_agent_hooks(agent=...)` returns a hooks
|
|
53
|
+
mapping for `ClaudeAgentOptions(hooks=...)` that records the agent's tool trajectory with no manual
|
|
54
|
+
`tool_call`. `UserPromptSubmit` opens a run (the prompt is the case identity), `PostToolUse` /
|
|
55
|
+
`PostToolUseFailure` become TOOL steps, and `Stop` finalizes. Each user turn in a session is one
|
|
56
|
+
case. Since the Python SDK's `Stop` hook carries no final text, capture the answer with the
|
|
57
|
+
optional `brooder.integrations.claude_agent.record_output(session_id, result)` (fed from the
|
|
58
|
+
`ResultMessage` stream); the trajectory — the core diff signal — is captured regardless.
|
|
59
|
+
- **OpenAI Agents SDK capture (P4d)** — `brooder.integrations.openai_agents.install(agent=...)`
|
|
60
|
+
registers a trace processor (appended, so the SDK's default exporter keeps working; no OpenAI API
|
|
61
|
+
key needed to capture) that maps the SDK's spans into a trajectory: `generation`/`response` → TURN,
|
|
62
|
+
`function` → TOOL, `handoff` → a `handoff` TOOL step (`{from, to}`), `guardrail` → a `guardrail`
|
|
63
|
+
TOOL step (`{name, triggered}`). Spans are buffered per trace and sorted by `started_at` at
|
|
64
|
+
`on_trace_end` (handling parallel tool calls / concurrent agents); the initial input and final
|
|
65
|
+
answer are recovered from the response spans. Importing the module does not require `openai-agents`.
|
|
66
|
+
- **LangChain / LangGraph capture (P4e)** — `brooder.integrations.langchain.callback_handler(agent=...)`
|
|
67
|
+
returns a `BaseCallbackHandler` for `config={"callbacks": [handler]}` that records the trajectory
|
|
68
|
+
with no manual `tool_call` and no OpenTelemetry wiring (covers LangGraph, which runs on LangChain's
|
|
69
|
+
callbacks). The root `on_chain_start` opens a run (its input is the case identity), model starts →
|
|
70
|
+
TURN steps, `on_tool_start`+`on_tool_end` → TOOL steps (paired by the tool's `run_id`, with
|
|
71
|
+
`on_tool_error` captured too), and the root `on_chain_end` finalizes. Nested calls are mapped to
|
|
72
|
+
their root run; capture is thread-safe (sync handlers can run on worker threads during async runs).
|
|
73
|
+
The capture logic imports without `langchain-core`; only building the handler needs it.
|
|
74
|
+
- **Async auto-capture** — `@brooder.record` and `brooder.instrument(...)` now work on
|
|
75
|
+
`async def` agents and async clients (`AsyncOpenAI`, `AsyncAzureOpenAI`, `AsyncAnthropic`, and
|
|
76
|
+
Google's `generate_content_async`). The active-run context stays bound across the agent's `await`
|
|
77
|
+
points (and into child tasks), so tool calls are captured just like the sync path. (Async Bedrock
|
|
78
|
+
via aioboto3 is not covered yet.)
|
|
79
|
+
- **Machine-readable output** — `brooder run` / `ci` / `diff` accept `--format table|json|markdown`
|
|
80
|
+
(with `--json` as a shortcut). JSON emits a stable, versioned summary (per-case verdict, stability,
|
|
81
|
+
changes, and aggregate counts) for CI to parse; Markdown renders a PR-comment-ready table + diff.
|
|
82
|
+
Exit codes are unchanged.
|
|
83
|
+
- **OTLP metric emission** — set `metrics.otlp_endpoint` (or `OTEL_EXPORTER_OTLP_ENDPOINT`) and each
|
|
84
|
+
run emits a snapshot of gauges (`brooder.cases.*`, `brooder.stability.mean`) over a single OTLP
|
|
85
|
+
exporter — one emitter reaching Datadog / Grafana / Honeycomb / CloudWatch. Duck-typed and
|
|
86
|
+
best-effort: it warns once and no-ops if the `otel` extra isn't installed, and never fails a run.
|
|
87
|
+
- **GitHub Action** — a composite action (`action.yml`, usage in
|
|
88
|
+
[examples/github-action.yml](examples/github-action.yml)) that runs `brooder ci` on pull requests,
|
|
89
|
+
upserts a comment with the behavioral diff, and fails the check on regression.
|
|
90
|
+
- Offline example agents (`stable_agent`, `regressing_agent`, `flaky_agent`, multi-step
|
|
91
|
+
`loop_agent`) — no API keys required.
|
|
92
|
+
- Strict typing (`py.typed`), atomic storage writes, typed config, structured logging.
|
|
93
|
+
- Tooling: ruff, mypy (strict), pre-commit, pytest + coverage, CI matrix (3.10–3.12).
|
|
94
|
+
|
|
95
|
+
[Unreleased]: https://github.com/agentbrooder/brooder/compare/v0.1.0...HEAD
|
|
96
|
+
[0.1.0]: https://github.com/agentbrooder/brooder/releases/tag/v0.1.0
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# Contributing to Brooder
|
|
2
|
+
|
|
3
|
+
Thanks for helping build the safety net for AI agents. This project is being built in public and
|
|
4
|
+
early — issues, ideas, and PRs are all welcome.
|
|
5
|
+
|
|
6
|
+
## Dev setup
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
git clone https://github.com/agentbrooder/brooder && cd brooder
|
|
10
|
+
python -m venv .venv && source .venv/bin/activate
|
|
11
|
+
pip install -e ".[dev]"
|
|
12
|
+
pytest
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Try the demo
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
brooder migrate --from gpt-4o --to gpt-5-new examples/regressing_agent.py
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Where things live
|
|
22
|
+
|
|
23
|
+
- `src/brooder/recorder.py` — the `@record` decorator + `tool_call` capture.
|
|
24
|
+
- `src/brooder/diffing.py` — the behavioral diff engine (the core).
|
|
25
|
+
- `src/brooder/cli.py` — the `brooder` commands.
|
|
26
|
+
- `examples/` — runnable demo agents (offline, no API keys).
|
|
27
|
+
|
|
28
|
+
## Good first issues
|
|
29
|
+
|
|
30
|
+
- Add an OpenAI or Anthropic provider wrapper so tool calls are captured automatically.
|
|
31
|
+
- Add a semantic (LLM-judge) diff mode behind `judge: llm` in config.
|
|
32
|
+
- Add flakiness scoring (run each case N times, report variance).
|
|
33
|
+
|
|
34
|
+
## Docs
|
|
35
|
+
|
|
36
|
+
The API docs are generated from docstrings with MkDocs + mkdocstrings.
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install -e ".[docs]"
|
|
40
|
+
mkdocs serve # live preview at http://127.0.0.1:8000
|
|
41
|
+
mkdocs build # what CI runs
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Public modules, classes, and functions use **Google-style docstrings** (Args/Returns/Raises),
|
|
45
|
+
enforced by ruff's `D` rules. New public API without a docstring will fail lint.
|
|
46
|
+
|
|
47
|
+
## Signing your commits (DCO)
|
|
48
|
+
|
|
49
|
+
Contributions are accepted under the [Developer Certificate of Origin](DCO). Certify that you wrote
|
|
50
|
+
(or have the right to submit) your change by signing off each commit:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
git commit -s -m "your message"
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
This appends a `Signed-off-by: Your Name <you@example.com>` line. No paperwork, no account — just the
|
|
57
|
+
sign-off. See [LICENSING.md](LICENSING.md) for the full licensing picture.
|
|
58
|
+
|
|
59
|
+
## Guidelines
|
|
60
|
+
|
|
61
|
+
- Keep the core engine model-agnostic and runnable offline (the example agents must never require keys).
|
|
62
|
+
- Prefer small, reviewable PRs. Add a test in `tests/` for engine changes.
|
|
63
|
+
- Public API gets a Google-style docstring (ruff `D` enforces it; docs generate from them).
|
|
64
|
+
- Be kind. We're all figuring agents out together.
|
brooder-0.1.0/DCO
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
Developer Certificate of Origin
|
|
2
|
+
Version 1.1
|
|
3
|
+
|
|
4
|
+
Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
|
|
5
|
+
|
|
6
|
+
Everyone is permitted to copy and distribute verbatim copies of this
|
|
7
|
+
license document, but changing it is not allowed.
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
Developer's Certificate of Origin 1.1
|
|
11
|
+
|
|
12
|
+
By making a contribution to this project, I certify that:
|
|
13
|
+
|
|
14
|
+
(a) The contribution was created in whole or in part by me and I
|
|
15
|
+
have the right to submit it under the open source license
|
|
16
|
+
indicated in the file; or
|
|
17
|
+
|
|
18
|
+
(b) The contribution is based upon previous work that, to the best
|
|
19
|
+
of my knowledge, is covered under an appropriate open source
|
|
20
|
+
license and I have the right under that license to submit that
|
|
21
|
+
work with modifications, whether created in whole or in part
|
|
22
|
+
by me, under the same open source license (unless I am
|
|
23
|
+
permitted to submit under a different license), as indicated
|
|
24
|
+
in the file; or
|
|
25
|
+
|
|
26
|
+
(c) The contribution was provided directly to me by some other
|
|
27
|
+
person who certified (a), (b) or (c) and I have not modified
|
|
28
|
+
it.
|
|
29
|
+
|
|
30
|
+
(d) I understand and agree that this project and the contribution
|
|
31
|
+
are public and that a record of the contribution (including all
|
|
32
|
+
personal information I submit with it, including my sign-off) is
|
|
33
|
+
maintained indefinitely and may be redistributed consistent with
|
|
34
|
+
this project or the open source license(s) involved.
|