agentix-toolkit 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentix_toolkit-0.2.0/.editorconfig +18 -0
- agentix_toolkit-0.2.0/.github/ISSUE_TEMPLATE/bug_report.yml +43 -0
- agentix_toolkit-0.2.0/.github/ISSUE_TEMPLATE/config.yml +8 -0
- agentix_toolkit-0.2.0/.github/ISSUE_TEMPLATE/feature_request.yml +32 -0
- agentix_toolkit-0.2.0/.github/PULL_REQUEST_TEMPLATE.md +21 -0
- agentix_toolkit-0.2.0/.github/dependabot.yml +20 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/.github/workflows/ci.yml +4 -4
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/.github/workflows/release.yml +4 -4
- agentix_toolkit-0.2.0/.pre-commit-config.yaml +27 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/CHANGELOG.md +34 -1
- agentix_toolkit-0.2.0/CODE_OF_CONDUCT.md +64 -0
- agentix_toolkit-0.2.0/CONTRIBUTING.md +67 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/PKG-INFO +57 -9
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/PLAN.md +20 -2
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/README.md +54 -8
- agentix_toolkit-0.2.0/SECURITY.md +46 -0
- agentix_toolkit-0.2.0/examples/15_permissions.py +91 -0
- agentix_toolkit-0.2.0/examples/16_reliability.py +72 -0
- agentix_toolkit-0.2.0/examples/17_eval.py +57 -0
- agentix_toolkit-0.2.0/examples/18_verification.py +72 -0
- agentix_toolkit-0.2.0/examples/19_tracing.py +77 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/README.md +5 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/pyproject.toml +7 -1
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/__init__.py +44 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/agent.py +58 -0
- agentix_toolkit-0.2.0/src/agentix/consistency.py +76 -0
- agentix_toolkit-0.2.0/src/agentix/evals.py +262 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/guards/__init__.py +12 -0
- agentix_toolkit-0.2.0/src/agentix/guards/judge.py +53 -0
- agentix_toolkit-0.2.0/src/agentix/guards/permissions.py +81 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/providers/anthropic.py +13 -8
- agentix_toolkit-0.2.0/src/agentix/resilience.py +95 -0
- agentix_toolkit-0.2.0/src/agentix/tracing.py +122 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/types.py +1 -0
- agentix_toolkit-0.2.0/src/agentix/validation.py +57 -0
- agentix_toolkit-0.2.0/tests/test_consistency.py +78 -0
- agentix_toolkit-0.2.0/tests/test_evals.py +168 -0
- agentix_toolkit-0.2.0/tests/test_judge.py +61 -0
- agentix_toolkit-0.2.0/tests/test_permissions.py +187 -0
- agentix_toolkit-0.2.0/tests/test_resilience.py +105 -0
- agentix_toolkit-0.2.0/tests/test_tracing.py +195 -0
- agentix_toolkit-0.2.0/tests/test_validation.py +149 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/uv.lock +19 -3
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/.gitignore +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/.python-version +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/LICENSE +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/RELEASING.md +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/01_hello_agent.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/02_tool_use.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/03_async_dynamic_loop.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/04_policy_and_trust.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/05_anthropic_model.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/06_tool_decorator.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/07_guards.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/08_persistence.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/09_streaming.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/10_concurrency.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/11_mcp.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/12_context.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/13_subagents.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/14_cost_and_interrupt.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/concurrency.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/confirm.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/context.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/control.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/errors.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/events.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/executors.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/guards/base.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/guards/injection.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/guards/pii.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/guards/tiers.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/guards/trust.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/mcp.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/model.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/policy.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/pricing.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/providers/__init__.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/providers/mock.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/py.typed +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/serde.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/store.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/streaming.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/subagents.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/tools.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_agent.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_anthropic_adapter.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_concurrency.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_context.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_cost_and_interrupt.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_guards.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_mcp.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_persistence.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_streaming.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_subagents.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_tools.py +0 -0
- {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_types.py +0 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
root = true
|
|
2
|
+
|
|
3
|
+
[*]
|
|
4
|
+
charset = utf-8
|
|
5
|
+
end_of_line = lf
|
|
6
|
+
insert_final_newline = true
|
|
7
|
+
trim_trailing_whitespace = true
|
|
8
|
+
indent_style = space
|
|
9
|
+
|
|
10
|
+
[*.py]
|
|
11
|
+
indent_size = 4
|
|
12
|
+
max_line_length = 100
|
|
13
|
+
|
|
14
|
+
[*.{yml,yaml,toml,json}]
|
|
15
|
+
indent_size = 2
|
|
16
|
+
|
|
17
|
+
[*.md]
|
|
18
|
+
trim_trailing_whitespace = false
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
name: Bug report
|
|
2
|
+
description: Something isn't working as documented.
|
|
3
|
+
labels: ["bug"]
|
|
4
|
+
body:
|
|
5
|
+
- type: markdown
|
|
6
|
+
attributes:
|
|
7
|
+
value: |
|
|
8
|
+
Thanks for the report! For **security vulnerabilities**, do not file a
|
|
9
|
+
public issue — see [SECURITY.md](../blob/main/SECURITY.md).
|
|
10
|
+
- type: textarea
|
|
11
|
+
id: what-happened
|
|
12
|
+
attributes:
|
|
13
|
+
label: What happened?
|
|
14
|
+
description: What did you expect, and what happened instead?
|
|
15
|
+
validations:
|
|
16
|
+
required: true
|
|
17
|
+
- type: textarea
|
|
18
|
+
id: repro
|
|
19
|
+
attributes:
|
|
20
|
+
label: Minimal reproduction
|
|
21
|
+
description: The smallest code snippet that reproduces it. Prefer `MockModel` so it runs without an API key.
|
|
22
|
+
render: python
|
|
23
|
+
validations:
|
|
24
|
+
required: true
|
|
25
|
+
- type: input
|
|
26
|
+
id: version
|
|
27
|
+
attributes:
|
|
28
|
+
label: agentix-toolkit version
|
|
29
|
+
placeholder: "0.1.0"
|
|
30
|
+
validations:
|
|
31
|
+
required: true
|
|
32
|
+
- type: input
|
|
33
|
+
id: python
|
|
34
|
+
attributes:
|
|
35
|
+
label: Python version
|
|
36
|
+
placeholder: "3.12"
|
|
37
|
+
validations:
|
|
38
|
+
required: true
|
|
39
|
+
- type: textarea
|
|
40
|
+
id: extra
|
|
41
|
+
attributes:
|
|
42
|
+
label: Anything else?
|
|
43
|
+
description: Traceback, environment, relevant config (model/provider, guards, etc.).
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
blank_issues_enabled: false
|
|
2
|
+
contact_links:
|
|
3
|
+
- name: Security vulnerability
|
|
4
|
+
url: https://github.com/skwijeratne/agentix-toolkit/security/advisories/new
|
|
5
|
+
about: Report security issues privately — please do not open a public issue.
|
|
6
|
+
- name: Question / usage help
|
|
7
|
+
url: https://github.com/skwijeratne/agentix-toolkit/discussions
|
|
8
|
+
about: Ask questions and discuss ideas (if Discussions is enabled).
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
name: Feature request
|
|
2
|
+
description: Suggest a capability or improvement.
|
|
3
|
+
labels: ["enhancement"]
|
|
4
|
+
body:
|
|
5
|
+
- type: textarea
|
|
6
|
+
id: problem
|
|
7
|
+
attributes:
|
|
8
|
+
label: What problem does this solve?
|
|
9
|
+
description: The use case or pain point. What are you trying to build?
|
|
10
|
+
validations:
|
|
11
|
+
required: true
|
|
12
|
+
- type: textarea
|
|
13
|
+
id: proposal
|
|
14
|
+
attributes:
|
|
15
|
+
label: Proposed solution
|
|
16
|
+
description: |
|
|
17
|
+
What would the API look like? Remember agentix's design: small shared
|
|
18
|
+
core, with capabilities injected (a guard / tool / strategy / adapter)
|
|
19
|
+
rather than baked into the loop.
|
|
20
|
+
validations:
|
|
21
|
+
required: true
|
|
22
|
+
- type: textarea
|
|
23
|
+
id: alternatives
|
|
24
|
+
attributes:
|
|
25
|
+
label: Alternatives considered
|
|
26
|
+
- type: checkboxes
|
|
27
|
+
id: scope
|
|
28
|
+
attributes:
|
|
29
|
+
label: Fit
|
|
30
|
+
options:
|
|
31
|
+
- label: This keeps the core provider-agnostic (no coupling to one model vendor).
|
|
32
|
+
- label: I'm willing to help implement it.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
<!-- Thanks for contributing! Keep PRs focused. -->
|
|
2
|
+
|
|
3
|
+
## What & why
|
|
4
|
+
|
|
5
|
+
<!-- What does this change, and what problem does it solve? Link the issue. -->
|
|
6
|
+
|
|
7
|
+
Closes #
|
|
8
|
+
|
|
9
|
+
## Checklist
|
|
10
|
+
|
|
11
|
+
- [ ] Tests added/updated (new behavior covered; bug fixes have a regression test)
|
|
12
|
+
- [ ] `uv run pytest` passes
|
|
13
|
+
- [ ] `uv run ruff check src tests` passes
|
|
14
|
+
- [ ] `uv run mypy` passes
|
|
15
|
+
- [ ] Docs updated where relevant (docstrings / README / an `examples/` script)
|
|
16
|
+
- [ ] `CHANGELOG.md` updated under `[Unreleased]`
|
|
17
|
+
- [ ] Change is opt-in / composable and keeps the core provider-agnostic
|
|
18
|
+
|
|
19
|
+
## Notes for reviewers
|
|
20
|
+
|
|
21
|
+
<!-- Anything that needs context: design tradeoffs, follow-ups, etc. -->
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
updates:
|
|
3
|
+
# Python dependencies (resolved via uv.lock).
|
|
4
|
+
- package-ecosystem: "uv"
|
|
5
|
+
directory: "/"
|
|
6
|
+
schedule:
|
|
7
|
+
interval: "weekly"
|
|
8
|
+
open-pull-requests-limit: 5
|
|
9
|
+
groups:
|
|
10
|
+
python-deps:
|
|
11
|
+
patterns: ["*"]
|
|
12
|
+
|
|
13
|
+
# Keep GitHub Actions current (also resolves the Node-version action drift).
|
|
14
|
+
- package-ecosystem: "github-actions"
|
|
15
|
+
directory: "/"
|
|
16
|
+
schedule:
|
|
17
|
+
interval: "weekly"
|
|
18
|
+
groups:
|
|
19
|
+
actions:
|
|
20
|
+
patterns: ["*"]
|
|
@@ -14,9 +14,9 @@ jobs:
|
|
|
14
14
|
matrix:
|
|
15
15
|
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
16
16
|
steps:
|
|
17
|
-
- uses: actions/checkout@
|
|
17
|
+
- uses: actions/checkout@v7
|
|
18
18
|
- name: Install uv
|
|
19
|
-
uses: astral-sh/setup-uv@
|
|
19
|
+
uses: astral-sh/setup-uv@v7
|
|
20
20
|
with:
|
|
21
21
|
enable-cache: true
|
|
22
22
|
- run: uv python install ${{ matrix.python-version }}
|
|
@@ -27,9 +27,9 @@ jobs:
|
|
|
27
27
|
name: lint & types
|
|
28
28
|
runs-on: ubuntu-latest
|
|
29
29
|
steps:
|
|
30
|
-
- uses: actions/checkout@
|
|
30
|
+
- uses: actions/checkout@v7
|
|
31
31
|
- name: Install uv
|
|
32
|
-
uses: astral-sh/setup-uv@
|
|
32
|
+
uses: astral-sh/setup-uv@v7
|
|
33
33
|
with:
|
|
34
34
|
enable-cache: true
|
|
35
35
|
# --all-extras so mypy can resolve the optional anthropic/mcp imports.
|
|
@@ -12,13 +12,13 @@ jobs:
|
|
|
12
12
|
build:
|
|
13
13
|
runs-on: ubuntu-latest
|
|
14
14
|
steps:
|
|
15
|
-
- uses: actions/checkout@
|
|
15
|
+
- uses: actions/checkout@v7
|
|
16
16
|
- name: Install uv
|
|
17
|
-
uses: astral-sh/setup-uv@
|
|
17
|
+
uses: astral-sh/setup-uv@v7
|
|
18
18
|
- run: uv build
|
|
19
19
|
- name: Check the built artifacts
|
|
20
20
|
run: uvx twine check dist/*
|
|
21
|
-
- uses: actions/upload-artifact@
|
|
21
|
+
- uses: actions/upload-artifact@v7
|
|
22
22
|
with:
|
|
23
23
|
name: dist
|
|
24
24
|
path: dist/
|
|
@@ -30,7 +30,7 @@ jobs:
|
|
|
30
30
|
permissions:
|
|
31
31
|
id-token: write # required for Trusted Publishing
|
|
32
32
|
steps:
|
|
33
|
-
- uses: actions/download-artifact@
|
|
33
|
+
- uses: actions/download-artifact@v8
|
|
34
34
|
with:
|
|
35
35
|
name: dist
|
|
36
36
|
path: dist/
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Run `uv run pre-commit install` once to enable. Mirrors the CI gates.
|
|
2
|
+
repos:
|
|
3
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
4
|
+
rev: v5.0.0
|
|
5
|
+
hooks:
|
|
6
|
+
- id: trailing-whitespace
|
|
7
|
+
- id: end-of-file-fixer
|
|
8
|
+
- id: check-yaml
|
|
9
|
+
- id: check-toml
|
|
10
|
+
- id: check-merge-conflict
|
|
11
|
+
- id: check-added-large-files
|
|
12
|
+
|
|
13
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
14
|
+
rev: v0.15.18
|
|
15
|
+
hooks:
|
|
16
|
+
- id: ruff
|
|
17
|
+
args: [--fix]
|
|
18
|
+
|
|
19
|
+
# mypy --strict, matching CI. Uses the project's own env via uv.
|
|
20
|
+
- repo: local
|
|
21
|
+
hooks:
|
|
22
|
+
- id: mypy
|
|
23
|
+
name: mypy (strict)
|
|
24
|
+
entry: uv run mypy
|
|
25
|
+
language: system
|
|
26
|
+
types: [python]
|
|
27
|
+
pass_filenames: false
|
|
@@ -6,6 +6,38 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
|
6
6
|
|
|
7
7
|
## [Unreleased]
|
|
8
8
|
|
|
9
|
+
## [0.2.0] - 2026-06-23
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- Subagents: `subagent_tool(agent, ...)` exposes a child agent as a delegable
|
|
13
|
+
tool (its own model/system prompt/tools/guards); composes with the loop and
|
|
14
|
+
`bounded_gather`.
|
|
15
|
+
- Cost & control: USD cost tracking (`pricing` module, `cost_usd`, and
|
|
16
|
+
`cost_usd` on `ModelResponse`/`AgentOutcome`; the Anthropic adapter fills
|
|
17
|
+
`input_tokens`/`output_tokens`/`cost_usd`); `AgentPolicy.max_budget_usd`; and
|
|
18
|
+
`Interrupt` to stop a run/stream at a safe boundary.
|
|
19
|
+
- Dynamic permissions: `CallbackGuard` (a `can_use_tool`-style per-call callback
|
|
20
|
+
returning allow/deny/confirm) and `ToolAllowlistGuard` (scope a run to a
|
|
21
|
+
subset of tools).
|
|
22
|
+
- Output validation + retry: `Agent(output_validator=, max_output_retries=)`
|
|
23
|
+
re-prompts on a failed validation and exposes `AgentOutcome.parsed`. Ships
|
|
24
|
+
`json_output`, `pydantic_output`, `regex_output`.
|
|
25
|
+
- Resilient model wrappers: `RetryModel` (backoff) and `FallbackModel`
|
|
26
|
+
(try-next-on-error), composable and drop-in.
|
|
27
|
+
- Eval harness (`agentix.evals`): `evaluate(...)` runs an agent over `Case`s and
|
|
28
|
+
returns an `EvalReport` with `pass_rate` / `format_success_rate` /
|
|
29
|
+
`assert_pass_rate()` (gate CI on regressions). Scorers: `exact_match`,
|
|
30
|
+
`contains`, `regex_match`, `predicate`, `llm_judge`.
|
|
31
|
+
- `SelfConsistencyModel`: sample a model N times per turn and return the majority
|
|
32
|
+
vote (drop-in `ModelFn`).
|
|
33
|
+
- `JudgeGuard`: an LLM reviews the final answer against a rubric and replaces it
|
|
34
|
+
on failure (an `on_answer` safety/on-brand/format gate).
|
|
35
|
+
- Anthropic adapter: structured-output passthrough documented
|
|
36
|
+
(`output_config={"format": ...}`) and `strict` tool schemas forwarded.
|
|
37
|
+
- OpenTelemetry tracing (`agentix[otel]`): `TracingModel`, `tracing_events`, and
|
|
38
|
+
`trace_run` produce a span tree (run → model/tool spans) for your observability
|
|
39
|
+
stack.
|
|
40
|
+
|
|
9
41
|
## [0.1.0] - 2026-06-22
|
|
10
42
|
|
|
11
43
|
Initial release.
|
|
@@ -43,5 +75,6 @@ Initial release.
|
|
|
43
75
|
`cost_usd`; `AgentPolicy.max_budget_usd` aborts a run over budget.
|
|
44
76
|
- `Interrupt` stops a run or stream at the next safe boundary.
|
|
45
77
|
|
|
46
|
-
[Unreleased]: https://github.com/skwijeratne/agentix-toolkit/compare/v0.
|
|
78
|
+
[Unreleased]: https://github.com/skwijeratne/agentix-toolkit/compare/v0.2.0...HEAD
|
|
79
|
+
[0.2.0]: https://github.com/skwijeratne/agentix-toolkit/compare/v0.1.0...v0.2.0
|
|
47
80
|
[0.1.0]: https://github.com/skwijeratne/agentix-toolkit/releases/tag/v0.1.0
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
|
2
|
+
|
|
3
|
+
## Our Pledge
|
|
4
|
+
|
|
5
|
+
We as members, contributors, and leaders pledge to make participation in our
|
|
6
|
+
community a harassment-free experience for everyone, regardless of age, body
|
|
7
|
+
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
|
8
|
+
identity and expression, level of experience, education, socio-economic status,
|
|
9
|
+
nationality, personal appearance, race, religion, or sexual identity and
|
|
10
|
+
orientation.
|
|
11
|
+
|
|
12
|
+
We pledge to act and interact in ways that contribute to an open, welcoming,
|
|
13
|
+
diverse, inclusive, and healthy community.
|
|
14
|
+
|
|
15
|
+
## Our Standards
|
|
16
|
+
|
|
17
|
+
Examples of behavior that contributes to a positive environment include:
|
|
18
|
+
|
|
19
|
+
- Demonstrating empathy and kindness toward other people
|
|
20
|
+
- Being respectful of differing opinions, viewpoints, and experiences
|
|
21
|
+
- Giving and gracefully accepting constructive feedback
|
|
22
|
+
- Accepting responsibility and apologizing to those affected by our mistakes,
|
|
23
|
+
and learning from the experience
|
|
24
|
+
- Focusing on what is best not just for us as individuals, but for the overall
|
|
25
|
+
community
|
|
26
|
+
|
|
27
|
+
Examples of unacceptable behavior include:
|
|
28
|
+
|
|
29
|
+
- The use of sexualized language or imagery, and sexual attention or advances of
|
|
30
|
+
any kind
|
|
31
|
+
- Trolling, insulting or derogatory comments, and personal or political attacks
|
|
32
|
+
- Public or private harassment
|
|
33
|
+
- Publishing others' private information, such as a physical or email address,
|
|
34
|
+
without their explicit permission
|
|
35
|
+
- Other conduct which could reasonably be considered inappropriate in a
|
|
36
|
+
professional setting
|
|
37
|
+
|
|
38
|
+
## Enforcement Responsibilities
|
|
39
|
+
|
|
40
|
+
Community leaders are responsible for clarifying and enforcing our standards of
|
|
41
|
+
acceptable behavior and will take appropriate and fair corrective action in
|
|
42
|
+
response to any behavior that they deem inappropriate, threatening, offensive,
|
|
43
|
+
or harmful.
|
|
44
|
+
|
|
45
|
+
## Scope
|
|
46
|
+
|
|
47
|
+
This Code of Conduct applies within all community spaces, and also applies when
|
|
48
|
+
an individual is officially representing the community in public spaces.
|
|
49
|
+
|
|
50
|
+
## Enforcement
|
|
51
|
+
|
|
52
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
|
53
|
+
reported to the community leaders responsible for enforcement at
|
|
54
|
+
**skwijeratne@gmail.com**. All complaints will be reviewed and investigated
|
|
55
|
+
promptly and fairly. Community leaders are obligated to respect the privacy and
|
|
56
|
+
security of the reporter of any incident.
|
|
57
|
+
|
|
58
|
+
## Attribution
|
|
59
|
+
|
|
60
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
|
61
|
+
version 2.1, available at
|
|
62
|
+
https://www.contributor-covenant.org/version/2/1/code_of_conduct.html.
|
|
63
|
+
|
|
64
|
+
[homepage]: https://www.contributor-covenant.org
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# Contributing to agentix
|
|
2
|
+
|
|
3
|
+
Thanks for your interest in improving agentix! This guide gets you set up and
|
|
4
|
+
explains what we look for in a contribution.
|
|
5
|
+
|
|
6
|
+
> The distribution is **`agentix-toolkit`** on PyPI; you import it as **`agentix`**.
|
|
7
|
+
|
|
8
|
+
## Development setup
|
|
9
|
+
|
|
10
|
+
This project uses [uv](https://docs.astral.sh/uv/).
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
git clone https://github.com/skwijeratne/agentix-toolkit
|
|
14
|
+
cd agentix-toolkit
|
|
15
|
+
uv sync --all-extras # create the venv, install deps + dev tools + extras
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## The checks (all three must pass)
|
|
19
|
+
|
|
20
|
+
CI runs these on every PR across Python 3.10–3.13, and they are **blocking**:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
uv run pytest # tests
|
|
24
|
+
uv run ruff check src tests # lint
|
|
25
|
+
uv run mypy # type-check (strict)
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Run them locally before pushing. Optionally enable the pre-commit hooks so
|
|
29
|
+
lint runs automatically:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
uv run pre-commit install
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Making a change
|
|
36
|
+
|
|
37
|
+
1. **Open an issue first** for anything non-trivial, so we can agree on the
|
|
38
|
+
approach before you invest time.
|
|
39
|
+
2. Branch off `main`.
|
|
40
|
+
3. Keep the change focused. Match the surrounding style — small, shared core;
|
|
41
|
+
load-bearing behavior is injected and configurable, not baked into the loop.
|
|
42
|
+
4. **Add tests.** New behavior needs coverage; bug fixes need a regression test.
|
|
43
|
+
Tests are plain `def` / `async def test_*` functions (pytest, `asyncio_mode`
|
|
44
|
+
is `auto`).
|
|
45
|
+
5. Update docs where relevant: docstrings, the README, an `examples/` script,
|
|
46
|
+
and a `CHANGELOG.md` entry under `[Unreleased]`.
|
|
47
|
+
6. Make sure all three checks pass.
|
|
48
|
+
7. Open a PR using the template; describe the change and link the issue.
|
|
49
|
+
|
|
50
|
+
## Design principles
|
|
51
|
+
|
|
52
|
+
- **Provider-agnostic core.** Don't couple the loop to a specific model
|
|
53
|
+
provider; provider code lives behind adapters (`providers/`).
|
|
54
|
+
- **Inject, don't bake in.** New capabilities should be opt-in and composable
|
|
55
|
+
(a guard, a tool, a strategy, an executor), not hard-coded into `agent.py`.
|
|
56
|
+
- **Security defaults are conservative.** When a guard is ambiguous, fail
|
|
57
|
+
closed. See `SECURITY.md`.
|
|
58
|
+
- **Typed and tested.** Public APIs are typed (`mypy --strict`) and exercised by
|
|
59
|
+
tests.
|
|
60
|
+
|
|
61
|
+
## Reporting bugs / requesting features
|
|
62
|
+
|
|
63
|
+
Use the issue templates. For **security vulnerabilities, do not open a public
|
|
64
|
+
issue** — see [`SECURITY.md`](./SECURITY.md).
|
|
65
|
+
|
|
66
|
+
By contributing, you agree that your contributions are licensed under the
|
|
67
|
+
project's [MIT License](./LICENSE).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agentix-toolkit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: A generic, batteries-included agent toolkit: configure the loop, tools, guards, and observability instead of rewriting them.
|
|
5
5
|
Project-URL: Homepage, https://github.com/skwijeratne/agentix-toolkit
|
|
6
6
|
Project-URL: Repository, https://github.com/skwijeratne/agentix-toolkit
|
|
@@ -22,6 +22,8 @@ Provides-Extra: anthropic
|
|
|
22
22
|
Requires-Dist: anthropic>=0.40; extra == 'anthropic'
|
|
23
23
|
Provides-Extra: mcp
|
|
24
24
|
Requires-Dist: mcp>=1.0; extra == 'mcp'
|
|
25
|
+
Provides-Extra: otel
|
|
26
|
+
Requires-Dist: opentelemetry-api>=1.20; extra == 'otel'
|
|
25
27
|
Description-Content-Type: text/markdown
|
|
26
28
|
|
|
27
29
|
# agentix
|
|
@@ -53,11 +55,17 @@ outcome = await agent.run("What's the weather in Lisbon?")
|
|
|
53
55
|
|
|
54
56
|
- **Async-first** core loop (`run` / `stream` / `resume`) with a sync wrapper.
|
|
55
57
|
- **Provider-agnostic** — bring any model; a real **Anthropic** adapter is included.
|
|
56
|
-
- **Tools from type hints** — one `@tool` decorator generates the JSON schema
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
58
|
+
- **Tools from type hints** — one `@tool` decorator generates the JSON schema;
|
|
59
|
+
**MCP** servers and **subagents** plug in as tools too.
|
|
60
|
+
- **Security, opt-in** — trust boundary, permission tiers + dynamic
|
|
61
|
+
`can_use_tool` callbacks, PII/injection guards, human confirmation, audit events.
|
|
62
|
+
- **Cost & control** — token **and USD** cost tracking, step/token/USD budgets,
|
|
63
|
+
cooperative `Interrupt`.
|
|
64
|
+
- **Reliability** — output **validation + retry** (`outcome.parsed`), model
|
|
65
|
+
**fallback/retry**, self-consistency, and LLM-as-judge.
|
|
66
|
+
- **Scale & ops** — streaming, checkpoint/resume, context trimming, fleet
|
|
67
|
+
backpressure, an **eval harness** (gate CI on quality), and **OpenTelemetry**
|
|
68
|
+
tracing.
|
|
61
69
|
|
|
62
70
|
> Status: **alpha**, under active development. APIs may change before `1.0`.
|
|
63
71
|
|
|
@@ -72,9 +80,9 @@ The distribution is **`agentix-toolkit`**; you import it as **`agentix`**.
|
|
|
72
80
|
With [uv](https://docs.astral.sh/uv/) (recommended):
|
|
73
81
|
|
|
74
82
|
```bash
|
|
75
|
-
uv add agentix-toolkit
|
|
76
|
-
uv add "agentix-toolkit[anthropic]"
|
|
77
|
-
uv add "agentix-toolkit[anthropic,mcp]"
|
|
83
|
+
uv add agentix-toolkit # core (no required deps)
|
|
84
|
+
uv add "agentix-toolkit[anthropic]" # + Anthropic adapter
|
|
85
|
+
uv add "agentix-toolkit[anthropic,mcp,otel]" # + MCP client + OpenTelemetry tracing
|
|
78
86
|
```
|
|
79
87
|
|
|
80
88
|
Or with pip:
|
|
@@ -83,6 +91,9 @@ Or with pip:
|
|
|
83
91
|
pip install "agentix-toolkit[anthropic]"
|
|
84
92
|
```
|
|
85
93
|
|
|
94
|
+
Extras are opt-in: `anthropic` (the model adapter), `mcp` (MCP client),
|
|
95
|
+
`otel` (OpenTelemetry tracing). The core has **no required dependencies**.
|
|
96
|
+
|
|
86
97
|
### 2. Run an agent with no API key
|
|
87
98
|
|
|
88
99
|
`MockModel` is a scripted, dependency-free model — perfect for trying the loop
|
|
@@ -169,6 +180,30 @@ async for event in agent.stream("Tell me about Lisbon."):
|
|
|
169
180
|
print("\n", event.outcome.status)
|
|
170
181
|
```
|
|
171
182
|
|
|
183
|
+
### 6. Make it production-safe (validate output, fall back, cap cost)
|
|
184
|
+
|
|
185
|
+
Stop malformed output from crashing downstream code: validate the final answer
|
|
186
|
+
and re-prompt on failure. Add a fallback model and a USD budget for resilience.
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
from agentix import Agent, AgentPolicy, FallbackModel, json_output
|
|
190
|
+
|
|
191
|
+
agent = Agent(
|
|
192
|
+
model=FallbackModel([primary_model, backup_model]), # survive a provider blip
|
|
193
|
+
system_prompt="Reply with a JSON object.",
|
|
194
|
+
tools=[...],
|
|
195
|
+
output_validator=json_output, # or pydantic_output(MyModel)
|
|
196
|
+
max_output_retries=2, # re-prompt the model on bad output
|
|
197
|
+
policy=AgentPolicy(max_budget_usd=0.50), # abort if it gets expensive
|
|
198
|
+
)
|
|
199
|
+
outcome = await agent.run("...")
|
|
200
|
+
outcome.parsed # a validated object — safe to use; outcome.cost_usd is tracked
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
Then **gate quality in CI** with the eval harness — `evaluate(...)` runs your
|
|
204
|
+
agent over golden cases and `assert_pass_rate(...)` fails the build on a
|
|
205
|
+
regression (see `examples/17_eval.py`).
|
|
206
|
+
|
|
172
207
|
---
|
|
173
208
|
|
|
174
209
|
## Feature tour
|
|
@@ -184,6 +219,13 @@ Each links to a runnable example in [`examples/`](./examples):
|
|
|
184
219
|
| Concurrency | `Limiter` + `bounded_gather` for fleets | `10_concurrency.py` |
|
|
185
220
|
| MCP | use any MCP server's tools | `11_mcp.py` |
|
|
186
221
|
| Context | bound the transcript (`TrimRounds`, …) | `12_context.py` |
|
|
222
|
+
| Subagents | delegate a subtask to a child agent | `13_subagents.py` |
|
|
223
|
+
| Cost & interrupt | USD budgets + stop a run mid-flight | `14_cost_and_interrupt.py` |
|
|
224
|
+
| Permissions | dynamic `can_use_tool` + tool allowlist | `15_permissions.py` |
|
|
225
|
+
| Reliability | output validation + retry, fallback/retry models | `16_reliability.py` |
|
|
226
|
+
| Eval | score golden cases, gate CI on pass rate | `17_eval.py` |
|
|
227
|
+
| Verify | self-consistency + LLM-as-judge | `18_verification.py` |
|
|
228
|
+
| Tracing | OpenTelemetry model/tool/run spans | `19_tracing.py` |
|
|
187
229
|
|
|
188
230
|
---
|
|
189
231
|
|
|
@@ -202,6 +244,12 @@ Run an example: `uv run python examples/01_hello_agent.py`.
|
|
|
202
244
|
See [`RELEASING.md`](./RELEASING.md) for the publish process and
|
|
203
245
|
[`PLAN.md`](./PLAN.md) for the roadmap.
|
|
204
246
|
|
|
247
|
+
## Contributing
|
|
248
|
+
|
|
249
|
+
Contributions are welcome! See [`CONTRIBUTING.md`](./CONTRIBUTING.md) for setup
|
|
250
|
+
and the PR checklist, [`CODE_OF_CONDUCT.md`](./CODE_OF_CONDUCT.md), and
|
|
251
|
+
[`SECURITY.md`](./SECURITY.md) for reporting vulnerabilities privately.
|
|
252
|
+
|
|
205
253
|
## License
|
|
206
254
|
|
|
207
255
|
MIT — see [`LICENSE`](./LICENSE).
|
|
@@ -109,8 +109,26 @@ agentix/
|
|
|
109
109
|
- **P10 — Cost + interrupt.** ✅ `pricing` (per-model table + `cost_usd`);
|
|
110
110
|
`ModelResponse`/`AgentOutcome` carry `cost_usd` (Anthropic adapter fills it);
|
|
111
111
|
`AgentPolicy.max_budget_usd` aborts; `Interrupt` stops a run/stream at a safe
|
|
112
|
-
boundary. Tests + example 14.
|
|
113
|
-
|
|
112
|
+
boundary. Tests + example 14.
|
|
113
|
+
- **P8 — Dynamic permissions.** ✅ `CallbackGuard(check)` (`can_use_tool`: a
|
|
114
|
+
per-call callback returning allow/deny/confirm or a bool) and
|
|
115
|
+
`ToolAllowlistGuard` (scope a run to a tool subset). Compose with the guard
|
|
116
|
+
pipeline. Tests + example 15.
|
|
117
|
+
- **P11 — Reliability & correctness.** ✅ Output validation + retry
|
|
118
|
+
(`Agent(output_validator=, max_output_retries=)` → `outcome.parsed`;
|
|
119
|
+
`json_output`/`pydantic_output`/`regex_output`); resilient model wrappers
|
|
120
|
+
(`RetryModel`, `FallbackModel`); `SelfConsistencyModel` (majority vote);
|
|
121
|
+
`JudgeGuard` (LLM answer gate); structured-output passthrough on the Anthropic
|
|
122
|
+
adapter. Examples 16 + 18.
|
|
123
|
+
- **P12 — Eval harness.** ✅ `agentix.evals`: `evaluate(dataset, agent, scorer=)`
|
|
124
|
+
→ `EvalReport` (`pass_rate`, `format_success_rate`, `assert_pass_rate()` to
|
|
125
|
+
gate CI). Scorers: `exact_match`/`contains`/`regex_match`/`predicate`/
|
|
126
|
+
`llm_judge`. Tests + example 17.
|
|
127
|
+
- **P13 — OpenTelemetry tracing.** ✅ `agentix.tracing` (`agentix[otel]`):
|
|
128
|
+
`TracingModel` (model spans), `tracing_events()` (tool spans + guard/confirm),
|
|
129
|
+
`trace_run()` (root span). Tests + example 19 (verified vs the real OTel SDK).
|
|
130
|
+
Roadmap remainders (prompt versioning, citation guard, eval loaders) in
|
|
131
|
+
`PLAN.gaps.md`.
|
|
114
132
|
|
|
115
133
|
> ⚠️ Streaming caveat: `on_answer` egress guards (PII redaction) can't un-send
|
|
116
134
|
> already-streamed deltas — deltas are raw; `Done.outcome.answer` is redacted.
|
|
@@ -27,11 +27,17 @@ outcome = await agent.run("What's the weather in Lisbon?")
|
|
|
27
27
|
|
|
28
28
|
- **Async-first** core loop (`run` / `stream` / `resume`) with a sync wrapper.
|
|
29
29
|
- **Provider-agnostic** — bring any model; a real **Anthropic** adapter is included.
|
|
30
|
-
- **Tools from type hints** — one `@tool` decorator generates the JSON schema
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
30
|
+
- **Tools from type hints** — one `@tool` decorator generates the JSON schema;
|
|
31
|
+
**MCP** servers and **subagents** plug in as tools too.
|
|
32
|
+
- **Security, opt-in** — trust boundary, permission tiers + dynamic
|
|
33
|
+
`can_use_tool` callbacks, PII/injection guards, human confirmation, audit events.
|
|
34
|
+
- **Cost & control** — token **and USD** cost tracking, step/token/USD budgets,
|
|
35
|
+
cooperative `Interrupt`.
|
|
36
|
+
- **Reliability** — output **validation + retry** (`outcome.parsed`), model
|
|
37
|
+
**fallback/retry**, self-consistency, and LLM-as-judge.
|
|
38
|
+
- **Scale & ops** — streaming, checkpoint/resume, context trimming, fleet
|
|
39
|
+
backpressure, an **eval harness** (gate CI on quality), and **OpenTelemetry**
|
|
40
|
+
tracing.
|
|
35
41
|
|
|
36
42
|
> Status: **alpha**, under active development. APIs may change before `1.0`.
|
|
37
43
|
|
|
@@ -46,9 +52,9 @@ The distribution is **`agentix-toolkit`**; you import it as **`agentix`**.
|
|
|
46
52
|
With [uv](https://docs.astral.sh/uv/) (recommended):
|
|
47
53
|
|
|
48
54
|
```bash
|
|
49
|
-
uv add agentix-toolkit
|
|
50
|
-
uv add "agentix-toolkit[anthropic]"
|
|
51
|
-
uv add "agentix-toolkit[anthropic,mcp]"
|
|
55
|
+
uv add agentix-toolkit # core (no required deps)
|
|
56
|
+
uv add "agentix-toolkit[anthropic]" # + Anthropic adapter
|
|
57
|
+
uv add "agentix-toolkit[anthropic,mcp,otel]" # + MCP client + OpenTelemetry tracing
|
|
52
58
|
```
|
|
53
59
|
|
|
54
60
|
Or with pip:
|
|
@@ -57,6 +63,9 @@ Or with pip:
|
|
|
57
63
|
pip install "agentix-toolkit[anthropic]"
|
|
58
64
|
```
|
|
59
65
|
|
|
66
|
+
Extras are opt-in: `anthropic` (the model adapter), `mcp` (MCP client),
|
|
67
|
+
`otel` (OpenTelemetry tracing). The core has **no required dependencies**.
|
|
68
|
+
|
|
60
69
|
### 2. Run an agent with no API key
|
|
61
70
|
|
|
62
71
|
`MockModel` is a scripted, dependency-free model — perfect for trying the loop
|
|
@@ -143,6 +152,30 @@ async for event in agent.stream("Tell me about Lisbon."):
|
|
|
143
152
|
print("\n", event.outcome.status)
|
|
144
153
|
```
|
|
145
154
|
|
|
155
|
+
### 6. Make it production-safe (validate output, fall back, cap cost)
|
|
156
|
+
|
|
157
|
+
Stop malformed output from crashing downstream code: validate the final answer
|
|
158
|
+
and re-prompt on failure. Add a fallback model and a USD budget for resilience.
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
from agentix import Agent, AgentPolicy, FallbackModel, json_output
|
|
162
|
+
|
|
163
|
+
agent = Agent(
|
|
164
|
+
model=FallbackModel([primary_model, backup_model]), # survive a provider blip
|
|
165
|
+
system_prompt="Reply with a JSON object.",
|
|
166
|
+
tools=[...],
|
|
167
|
+
output_validator=json_output, # or pydantic_output(MyModel)
|
|
168
|
+
max_output_retries=2, # re-prompt the model on bad output
|
|
169
|
+
policy=AgentPolicy(max_budget_usd=0.50), # abort if it gets expensive
|
|
170
|
+
)
|
|
171
|
+
outcome = await agent.run("...")
|
|
172
|
+
outcome.parsed # a validated object — safe to use; outcome.cost_usd is tracked
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
Then **gate quality in CI** with the eval harness — `evaluate(...)` runs your
|
|
176
|
+
agent over golden cases and `assert_pass_rate(...)` fails the build on a
|
|
177
|
+
regression (see `examples/17_eval.py`).
|
|
178
|
+
|
|
146
179
|
---
|
|
147
180
|
|
|
148
181
|
## Feature tour
|
|
@@ -158,6 +191,13 @@ Each links to a runnable example in [`examples/`](./examples):
|
|
|
158
191
|
| Concurrency | `Limiter` + `bounded_gather` for fleets | `10_concurrency.py` |
|
|
159
192
|
| MCP | use any MCP server's tools | `11_mcp.py` |
|
|
160
193
|
| Context | bound the transcript (`TrimRounds`, …) | `12_context.py` |
|
|
194
|
+
| Subagents | delegate a subtask to a child agent | `13_subagents.py` |
|
|
195
|
+
| Cost & interrupt | USD budgets + stop a run mid-flight | `14_cost_and_interrupt.py` |
|
|
196
|
+
| Permissions | dynamic `can_use_tool` + tool allowlist | `15_permissions.py` |
|
|
197
|
+
| Reliability | output validation + retry, fallback/retry models | `16_reliability.py` |
|
|
198
|
+
| Eval | score golden cases, gate CI on pass rate | `17_eval.py` |
|
|
199
|
+
| Verify | self-consistency + LLM-as-judge | `18_verification.py` |
|
|
200
|
+
| Tracing | OpenTelemetry model/tool/run spans | `19_tracing.py` |
|
|
161
201
|
|
|
162
202
|
---
|
|
163
203
|
|
|
@@ -176,6 +216,12 @@ Run an example: `uv run python examples/01_hello_agent.py`.
|
|
|
176
216
|
See [`RELEASING.md`](./RELEASING.md) for the publish process and
|
|
177
217
|
[`PLAN.md`](./PLAN.md) for the roadmap.
|
|
178
218
|
|
|
219
|
+
## Contributing
|
|
220
|
+
|
|
221
|
+
Contributions are welcome! See [`CONTRIBUTING.md`](./CONTRIBUTING.md) for setup
|
|
222
|
+
and the PR checklist, [`CODE_OF_CONDUCT.md`](./CODE_OF_CONDUCT.md), and
|
|
223
|
+
[`SECURITY.md`](./SECURITY.md) for reporting vulnerabilities privately.
|
|
224
|
+
|
|
179
225
|
## License
|
|
180
226
|
|
|
181
227
|
MIT — see [`LICENSE`](./LICENSE).
|