agentix-toolkit 0.2.1__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/CHANGELOG.md +49 -1
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/PKG-INFO +36 -7
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/PLAN.md +4 -2
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/README.md +25 -6
- agentix_toolkit-0.3.0/examples/20_prompts.py +47 -0
- agentix_toolkit-0.3.0/examples/21_providers.py +84 -0
- agentix_toolkit-0.3.0/examples/22_multimodal.py +71 -0
- agentix_toolkit-0.3.0/examples/23_sandbox.py +86 -0
- agentix_toolkit-0.3.0/examples/24_suspend_resume.py +87 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/README.md +5 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/pyproject.toml +14 -2
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/__init__.py +35 -1
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/agent.py +119 -18
- agentix_toolkit-0.3.0/src/agentix/content.py +159 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/context.py +1 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/events.py +1 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/executors.py +5 -2
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/pricing.py +14 -1
- agentix_toolkit-0.3.0/src/agentix/prompts.py +110 -0
- agentix_toolkit-0.3.0/src/agentix/providers/__init__.py +26 -0
- agentix_toolkit-0.3.0/src/agentix/providers/_openai_compat.py +176 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/providers/anthropic.py +108 -9
- agentix_toolkit-0.3.0/src/agentix/providers/bedrock.py +212 -0
- agentix_toolkit-0.3.0/src/agentix/providers/gemini.py +185 -0
- agentix_toolkit-0.3.0/src/agentix/providers/litellm.py +67 -0
- agentix_toolkit-0.3.0/src/agentix/providers/ollama.py +144 -0
- agentix_toolkit-0.3.0/src/agentix/providers/openai.py +155 -0
- agentix_toolkit-0.3.0/src/agentix/sandbox.py +282 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/serde.py +12 -2
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/types.py +32 -3
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_anthropic_adapter.py +64 -0
- agentix_toolkit-0.3.0/tests/test_bedrock_adapter.py +99 -0
- agentix_toolkit-0.3.0/tests/test_content.py +101 -0
- agentix_toolkit-0.3.0/tests/test_gemini_adapter.py +105 -0
- agentix_toolkit-0.3.0/tests/test_litellm_adapter.py +78 -0
- agentix_toolkit-0.3.0/tests/test_multimodal_adapters.py +200 -0
- agentix_toolkit-0.3.0/tests/test_ollama_adapter.py +75 -0
- agentix_toolkit-0.3.0/tests/test_openai_adapter.py +155 -0
- agentix_toolkit-0.3.0/tests/test_prompts.py +73 -0
- agentix_toolkit-0.3.0/tests/test_sandbox.py +163 -0
- agentix_toolkit-0.3.0/tests/test_suspend.py +168 -0
- agentix_toolkit-0.3.0/uv.lock +3140 -0
- agentix_toolkit-0.2.1/src/agentix/providers/__init__.py +0 -8
- agentix_toolkit-0.2.1/uv.lock +0 -1454
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/.editorconfig +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/.github/dependabot.yml +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/.github/workflows/ci.yml +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/.github/workflows/release.yml +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/.gitignore +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/.pre-commit-config.yaml +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/.python-version +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/CODE_OF_CONDUCT.md +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/CONTRIBUTING.md +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/LICENSE +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/RELEASING.md +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/SECURITY.md +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/01_hello_agent.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/02_tool_use.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/03_async_dynamic_loop.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/04_policy_and_trust.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/05_anthropic_model.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/06_tool_decorator.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/07_guards.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/08_persistence.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/09_streaming.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/10_concurrency.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/11_mcp.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/12_context.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/13_subagents.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/14_cost_and_interrupt.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/15_permissions.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/16_reliability.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/17_eval.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/18_verification.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/examples/19_tracing.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/concurrency.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/confirm.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/consistency.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/control.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/errors.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/evals.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/guards/__init__.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/guards/base.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/guards/injection.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/guards/judge.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/guards/permissions.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/guards/pii.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/guards/tiers.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/guards/trust.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/mcp.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/model.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/policy.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/providers/mock.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/py.typed +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/resilience.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/store.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/streaming.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/subagents.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/tools.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/tracing.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/src/agentix/validation.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_agent.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_concurrency.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_consistency.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_context.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_cost_and_interrupt.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_evals.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_guards.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_judge.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_mcp.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_permissions.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_persistence.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_resilience.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_streaming.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_subagents.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_tools.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_tracing.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_types.py +0 -0
- {agentix_toolkit-0.2.1 → agentix_toolkit-0.3.0}/tests/test_validation.py +0 -0
|
@@ -6,6 +6,53 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
|
6
6
|
|
|
7
7
|
## [Unreleased]
|
|
8
8
|
|
|
9
|
+
## [0.3.0] - 2026-06-23
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- Suspendable human-in-the-loop (P18) — `Agent(suspend_on_confirm=True)` pauses a
|
|
13
|
+
run when a tool needs confirmation instead of awaiting `confirm_fn` inline: it
|
|
14
|
+
checkpoints (with the assistant tool-turn as the tail) and returns
|
|
15
|
+
`AgentOutcome(status="suspended", pending=[PendingApproval(...)])`. A later
|
|
16
|
+
`resume(run_id, decisions={call_id: bool})` — on the same or a brand-new Agent,
|
|
17
|
+
since the state lives in the store — finishes that turn (approve/deny;
|
|
18
|
+
undecided pending calls fail closed) and continues. Requires a store + run_id;
|
|
19
|
+
adds the `on_suspend` event and the `PendingApproval` type. Built for
|
|
20
|
+
web/serverless flows where the request coroutine can't block. See
|
|
21
|
+
`examples/24_suspend_resume.py`.
|
|
22
|
+
- Sandboxed execution (P16) — `SubprocessExecutor` (a `ToolExecutor`) runs each
|
|
23
|
+
tool as a separate OS process and actually enforces the limits the loop passes:
|
|
24
|
+
network egress is **denied when `network_allowlist` is empty** (Linux network
|
|
25
|
+
namespace via `unshare`, auto-detected; **fails closed** if it can't isolate,
|
|
26
|
+
unless `require_network_isolation=False`), plus POSIX CPU/memory/file-size/
|
|
27
|
+
process rlimits, a fresh per-call temp working directory, a scrubbed
|
|
28
|
+
environment (no parent secrets leak), an output cap, and a timeout that kills
|
|
29
|
+
the process group. Ships `SandboxPolicy` and `Command`. This closes the gap
|
|
30
|
+
where `LocalToolExecutor` ignored `network_allowlist`. See
|
|
31
|
+
`examples/23_sandbox.py`.
|
|
32
|
+
- Multimodal input (P15) — `Message.content` is now `str | list[ContentPart]`,
|
|
33
|
+
with `TextPart`, `ImagePart`, `DocumentPart`, and `AudioPart` (build via
|
|
34
|
+
`from_path` / `from_bytes` / `from_base64` / `from_url`). `Message.text` gives
|
|
35
|
+
a string view. `Agent.run`/`run_sync`/`stream` accept a parts list anywhere a
|
|
36
|
+
string request goes. Every adapter translates supported media to its provider
|
|
37
|
+
format and raises a clear error otherwise (e.g. audio on Anthropic, URL images
|
|
38
|
+
on Bedrock). Plain-string content is fully backward compatible. See
|
|
39
|
+
`examples/22_multimodal.py`.
|
|
40
|
+
- Multi-provider adapters (P14) — the toolkit now ships five more model
|
|
41
|
+
backends alongside Anthropic, each behind its own extra and each a drop-in
|
|
42
|
+
`ModelFn`: `OpenAIModel` (`agentix[openai]`; Chat Completions, also drives any
|
|
43
|
+
OpenAI-compatible `base_url`, with streaming), `GeminiModel`
|
|
44
|
+
(`agentix[gemini]`), `BedrockModel` (`agentix[bedrock]`; AWS Converse API, run
|
|
45
|
+
off-thread), `OllamaModel` (`agentix[ollama]`; local models), and `LiteLLMModel`
|
|
46
|
+
(`agentix[litellm]`; one bridge to 100+ providers). Best-effort pricing added
|
|
47
|
+
for common OpenAI/Gemini models (override with `register_price`). See
|
|
48
|
+
`examples/21_providers.py`.
|
|
49
|
+
- `AnthropicModel` typed reasoning/cost knobs: `thinking` (`True`/`"adaptive"`/
|
|
50
|
+
`"summarized"`/`"disabled"`/dict), `effort` (`low`…`max`), and `task_budget`
|
|
51
|
+
(int; adds the required beta header) — previously only via opaque `extra`.
|
|
52
|
+
Docstring documents refusal-fallback behavior.
|
|
53
|
+
- `PromptRegistry`: lightweight in-process prompt versioning with `register` /
|
|
54
|
+
`get` / `rollback` / `render` and `to_dict`/`from_dict` persistence.
|
|
55
|
+
|
|
9
56
|
## [0.2.1] - 2026-06-23
|
|
10
57
|
|
|
11
58
|
### Fixed
|
|
@@ -82,7 +129,8 @@ Initial release.
|
|
|
82
129
|
`cost_usd`; `AgentPolicy.max_budget_usd` aborts a run over budget.
|
|
83
130
|
- `Interrupt` stops a run or stream at the next safe boundary.
|
|
84
131
|
|
|
85
|
-
[Unreleased]: https://github.com/skwijeratne/agentix-toolkit/compare/v0.
|
|
132
|
+
[Unreleased]: https://github.com/skwijeratne/agentix-toolkit/compare/v0.3.0...HEAD
|
|
133
|
+
[0.3.0]: https://github.com/skwijeratne/agentix-toolkit/compare/v0.2.1...v0.3.0
|
|
86
134
|
[0.2.1]: https://github.com/skwijeratne/agentix-toolkit/compare/v0.2.0...v0.2.1
|
|
87
135
|
[0.2.0]: https://github.com/skwijeratne/agentix-toolkit/compare/v0.1.0...v0.2.0
|
|
88
136
|
[0.1.0]: https://github.com/skwijeratne/agentix-toolkit/releases/tag/v0.1.0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agentix-toolkit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: A generic, batteries-included agent toolkit: configure the loop, tools, guards, and observability instead of rewriting them.
|
|
5
5
|
Project-URL: Homepage, https://github.com/skwijeratne/agentix-toolkit
|
|
6
6
|
Project-URL: Repository, https://github.com/skwijeratne/agentix-toolkit
|
|
@@ -20,8 +20,18 @@ Classifier: Typing :: Typed
|
|
|
20
20
|
Requires-Python: >=3.10
|
|
21
21
|
Provides-Extra: anthropic
|
|
22
22
|
Requires-Dist: anthropic>=0.40; extra == 'anthropic'
|
|
23
|
+
Provides-Extra: bedrock
|
|
24
|
+
Requires-Dist: boto3>=1.34; extra == 'bedrock'
|
|
25
|
+
Provides-Extra: gemini
|
|
26
|
+
Requires-Dist: google-genai>=0.3; extra == 'gemini'
|
|
27
|
+
Provides-Extra: litellm
|
|
28
|
+
Requires-Dist: litellm>=1.40; extra == 'litellm'
|
|
23
29
|
Provides-Extra: mcp
|
|
24
30
|
Requires-Dist: mcp>=1.0; extra == 'mcp'
|
|
31
|
+
Provides-Extra: ollama
|
|
32
|
+
Requires-Dist: ollama>=0.3; extra == 'ollama'
|
|
33
|
+
Provides-Extra: openai
|
|
34
|
+
Requires-Dist: openai>=1.0; extra == 'openai'
|
|
25
35
|
Provides-Extra: otel
|
|
26
36
|
Requires-Dist: opentelemetry-api>=1.20; extra == 'otel'
|
|
27
37
|
Description-Content-Type: text/markdown
|
|
@@ -54,18 +64,29 @@ outcome = await agent.run("What's the weather in Lisbon?")
|
|
|
54
64
|
```
|
|
55
65
|
|
|
56
66
|
- **Async-first** core loop (`run` / `stream` / `resume`) with a sync wrapper.
|
|
57
|
-
- **Provider-agnostic** — bring any model
|
|
67
|
+
- **Provider-agnostic** — bring any model, or use a shipped adapter:
|
|
68
|
+
**Anthropic**, **OpenAI** (+ any OpenAI-compatible URL), **Gemini**,
|
|
69
|
+
**Bedrock**, **Ollama** (local), and a **LiteLLM** bridge (100+ providers).
|
|
58
70
|
- **Tools from type hints** — one `@tool` decorator generates the JSON schema;
|
|
59
71
|
**MCP** servers and **subagents** plug in as tools too.
|
|
72
|
+
- **Multimodal input** — a message is a string *or* a list of parts: text plus
|
|
73
|
+
**images / PDFs / audio**, translated per adapter (clear errors for what a
|
|
74
|
+
given provider can't accept).
|
|
60
75
|
- **Security, opt-in** — trust boundary, permission tiers + dynamic
|
|
61
|
-
`can_use_tool` callbacks, PII/injection guards, human confirmation, audit events
|
|
76
|
+
`can_use_tool` callbacks, PII/injection guards, human confirmation, audit events,
|
|
77
|
+
and a **sandboxed executor** that runs untrusted / model-generated code in an
|
|
78
|
+
isolated subprocess (no network by default, plus CPU/memory/fs limits).
|
|
62
79
|
- **Cost & control** — token **and USD** cost tracking, step/token/USD budgets,
|
|
63
80
|
cooperative `Interrupt`.
|
|
81
|
+
- **Human-in-the-loop, durably** — `suspend_on_confirm` pauses at a confirmation,
|
|
82
|
+
persists, and returns `status="suspended"`; `resume(run_id, decisions=…)`
|
|
83
|
+
approves/denies on a later request (web/serverless-friendly), not just an
|
|
84
|
+
inline blocking prompt.
|
|
64
85
|
- **Reliability** — output **validation + retry** (`outcome.parsed`), model
|
|
65
86
|
**fallback/retry**, self-consistency, and LLM-as-judge.
|
|
66
87
|
- **Scale & ops** — streaming, checkpoint/resume, context trimming, fleet
|
|
67
|
-
backpressure, an **eval harness** (gate CI on quality),
|
|
68
|
-
tracing.
|
|
88
|
+
backpressure, an **eval harness** (gate CI on quality), **OpenTelemetry**
|
|
89
|
+
tracing, and **prompt versioning** (roll back a regressed prompt).
|
|
69
90
|
|
|
70
91
|
> Status: **alpha**, under active development. APIs may change before `1.0`.
|
|
71
92
|
|
|
@@ -82,6 +103,7 @@ With [uv](https://docs.astral.sh/uv/) (recommended):
|
|
|
82
103
|
```bash
|
|
83
104
|
uv add agentix-toolkit # core (no required deps)
|
|
84
105
|
uv add "agentix-toolkit[anthropic]" # + Anthropic adapter
|
|
106
|
+
uv add "agentix-toolkit[openai]" # + OpenAI adapter (pick your provider)
|
|
85
107
|
uv add "agentix-toolkit[anthropic,mcp,otel]" # + MCP client + OpenTelemetry tracing
|
|
86
108
|
```
|
|
87
109
|
|
|
@@ -91,8 +113,10 @@ Or with pip:
|
|
|
91
113
|
pip install "agentix-toolkit[anthropic]"
|
|
92
114
|
```
|
|
93
115
|
|
|
94
|
-
Extras are opt-in
|
|
95
|
-
`
|
|
116
|
+
Extras are opt-in and the core has **no required dependencies**. Provider
|
|
117
|
+
adapters: `anthropic`, `openai`, `gemini`, `bedrock`, `ollama`, `litellm`
|
|
118
|
+
(the LiteLLM bridge reaches 100+ providers on its own). Plus `mcp` (MCP client)
|
|
119
|
+
and `otel` (OpenTelemetry tracing).
|
|
96
120
|
|
|
97
121
|
### 2. Run an agent with no API key
|
|
98
122
|
|
|
@@ -226,6 +250,11 @@ Each links to a runnable example in [`examples/`](./examples):
|
|
|
226
250
|
| Eval | score golden cases, gate CI on pass rate | `17_eval.py` |
|
|
227
251
|
| Verify | self-consistency + LLM-as-judge | `18_verification.py` |
|
|
228
252
|
| Tracing | OpenTelemetry model/tool/run spans | `19_tracing.py` |
|
|
253
|
+
| Prompts | versioning + rollback; typed Anthropic reasoning knobs | `20_prompts.py` |
|
|
254
|
+
| Providers | OpenAI / Gemini / Bedrock / Ollama / LiteLLM, one-line swap | `21_providers.py` |
|
|
255
|
+
| Multimodal | text + image / PDF / audio parts; per-adapter translation | `22_multimodal.py` |
|
|
256
|
+
| Sandbox | run untrusted code in an isolated subprocess (no net, rlimits) | `23_sandbox.py` |
|
|
257
|
+
| Suspend/resume | pause for human approval, persist, resume on a later request | `24_suspend_resume.py` |
|
|
229
258
|
|
|
230
259
|
---
|
|
231
260
|
|
|
@@ -127,8 +127,10 @@ agentix/
|
|
|
127
127
|
- **P13 — OpenTelemetry tracing.** ✅ `agentix.tracing` (`agentix[otel]`):
|
|
128
128
|
`TracingModel` (model spans), `tracing_events()` (tool spans + guard/confirm),
|
|
129
129
|
`trace_run()` (root span). Tests + example 19 (verified vs the real OTel SDK).
|
|
130
|
-
|
|
131
|
-
`
|
|
130
|
+
- **Polish.** ✅ Typed `thinking`/`effort`/`task_budget` on `AnthropicModel`
|
|
131
|
+
(+ refusal-fallback docs); `PromptRegistry` (prompt versioning + rollback).
|
|
132
|
+
Example 20. The "perfect toolkit" frontier (P14+: more providers, multi-modal,
|
|
133
|
+
sandboxed executor, docs site, …) is tracked in `PLAN.gaps.md`.
|
|
132
134
|
|
|
133
135
|
> ⚠️ Streaming caveat: `on_answer` egress guards (PII redaction) can't un-send
|
|
134
136
|
> already-streamed deltas — deltas are raw; `Done.outcome.answer` is redacted.
|
|
@@ -26,18 +26,29 @@ outcome = await agent.run("What's the weather in Lisbon?")
|
|
|
26
26
|
```
|
|
27
27
|
|
|
28
28
|
- **Async-first** core loop (`run` / `stream` / `resume`) with a sync wrapper.
|
|
29
|
-
- **Provider-agnostic** — bring any model
|
|
29
|
+
- **Provider-agnostic** — bring any model, or use a shipped adapter:
|
|
30
|
+
**Anthropic**, **OpenAI** (+ any OpenAI-compatible URL), **Gemini**,
|
|
31
|
+
**Bedrock**, **Ollama** (local), and a **LiteLLM** bridge (100+ providers).
|
|
30
32
|
- **Tools from type hints** — one `@tool` decorator generates the JSON schema;
|
|
31
33
|
**MCP** servers and **subagents** plug in as tools too.
|
|
34
|
+
- **Multimodal input** — a message is a string *or* a list of parts: text plus
|
|
35
|
+
**images / PDFs / audio**, translated per adapter (clear errors for what a
|
|
36
|
+
given provider can't accept).
|
|
32
37
|
- **Security, opt-in** — trust boundary, permission tiers + dynamic
|
|
33
|
-
`can_use_tool` callbacks, PII/injection guards, human confirmation, audit events
|
|
38
|
+
`can_use_tool` callbacks, PII/injection guards, human confirmation, audit events,
|
|
39
|
+
and a **sandboxed executor** that runs untrusted / model-generated code in an
|
|
40
|
+
isolated subprocess (no network by default, plus CPU/memory/fs limits).
|
|
34
41
|
- **Cost & control** — token **and USD** cost tracking, step/token/USD budgets,
|
|
35
42
|
cooperative `Interrupt`.
|
|
43
|
+
- **Human-in-the-loop, durably** — `suspend_on_confirm` pauses at a confirmation,
|
|
44
|
+
persists, and returns `status="suspended"`; `resume(run_id, decisions=…)`
|
|
45
|
+
approves/denies on a later request (web/serverless-friendly), not just an
|
|
46
|
+
inline blocking prompt.
|
|
36
47
|
- **Reliability** — output **validation + retry** (`outcome.parsed`), model
|
|
37
48
|
**fallback/retry**, self-consistency, and LLM-as-judge.
|
|
38
49
|
- **Scale & ops** — streaming, checkpoint/resume, context trimming, fleet
|
|
39
|
-
backpressure, an **eval harness** (gate CI on quality),
|
|
40
|
-
tracing.
|
|
50
|
+
backpressure, an **eval harness** (gate CI on quality), **OpenTelemetry**
|
|
51
|
+
tracing, and **prompt versioning** (roll back a regressed prompt).
|
|
41
52
|
|
|
42
53
|
> Status: **alpha**, under active development. APIs may change before `1.0`.
|
|
43
54
|
|
|
@@ -54,6 +65,7 @@ With [uv](https://docs.astral.sh/uv/) (recommended):
|
|
|
54
65
|
```bash
|
|
55
66
|
uv add agentix-toolkit # core (no required deps)
|
|
56
67
|
uv add "agentix-toolkit[anthropic]" # + Anthropic adapter
|
|
68
|
+
uv add "agentix-toolkit[openai]" # + OpenAI adapter (pick your provider)
|
|
57
69
|
uv add "agentix-toolkit[anthropic,mcp,otel]" # + MCP client + OpenTelemetry tracing
|
|
58
70
|
```
|
|
59
71
|
|
|
@@ -63,8 +75,10 @@ Or with pip:
|
|
|
63
75
|
pip install "agentix-toolkit[anthropic]"
|
|
64
76
|
```
|
|
65
77
|
|
|
66
|
-
Extras are opt-in
|
|
67
|
-
`
|
|
78
|
+
Extras are opt-in and the core has **no required dependencies**. Provider
|
|
79
|
+
adapters: `anthropic`, `openai`, `gemini`, `bedrock`, `ollama`, `litellm`
|
|
80
|
+
(the LiteLLM bridge reaches 100+ providers on its own). Plus `mcp` (MCP client)
|
|
81
|
+
and `otel` (OpenTelemetry tracing).
|
|
68
82
|
|
|
69
83
|
### 2. Run an agent with no API key
|
|
70
84
|
|
|
@@ -198,6 +212,11 @@ Each links to a runnable example in [`examples/`](./examples):
|
|
|
198
212
|
| Eval | score golden cases, gate CI on pass rate | `17_eval.py` |
|
|
199
213
|
| Verify | self-consistency + LLM-as-judge | `18_verification.py` |
|
|
200
214
|
| Tracing | OpenTelemetry model/tool/run spans | `19_tracing.py` |
|
|
215
|
+
| Prompts | versioning + rollback; typed Anthropic reasoning knobs | `20_prompts.py` |
|
|
216
|
+
| Providers | OpenAI / Gemini / Bedrock / Ollama / LiteLLM, one-line swap | `21_providers.py` |
|
|
217
|
+
| Multimodal | text + image / PDF / audio parts; per-adapter translation | `22_multimodal.py` |
|
|
218
|
+
| Sandbox | run untrusted code in an isolated subprocess (no net, rlimits) | `23_sandbox.py` |
|
|
219
|
+
| Suspend/resume | pause for human approval, persist, resume on a later request | `24_suspend_resume.py` |
|
|
201
220
|
|
|
202
221
|
---
|
|
203
222
|
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""20 — Prompt registry & versioning (+ provider reasoning knobs).
|
|
2
|
+
|
|
3
|
+
`PromptRegistry` keeps named prompts under version control so you can roll back a
|
|
4
|
+
change that regressed. Combine it with the eval harness to compare versions.
|
|
5
|
+
|
|
6
|
+
Also shown: the typed reasoning/cost knobs on the Anthropic adapter
|
|
7
|
+
(`thinking` / `effort` / `task_budget`) — configured below but not called (no API
|
|
8
|
+
key needed for this demo).
|
|
9
|
+
|
|
10
|
+
Run:
|
|
11
|
+
PYTHONPATH=src python examples/20_prompts.py
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from agentix import PromptRegistry
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def main() -> None:
|
|
20
|
+
prompts = PromptRegistry()
|
|
21
|
+
|
|
22
|
+
v1 = prompts.register("assistant", "You are a helpful assistant.")
|
|
23
|
+
v2 = prompts.register("assistant", "You are a terse, snarky assistant.") # regressed
|
|
24
|
+
print(f"registered v{v1} and v{v2}; active = v{prompts.active('assistant')}")
|
|
25
|
+
print("active prompt:", prompts.get("assistant"))
|
|
26
|
+
|
|
27
|
+
# The v2 change tanked your eval pass-rate — roll back.
|
|
28
|
+
prompts.rollback("assistant", v1)
|
|
29
|
+
print(f"\nrolled back; active = v{prompts.active('assistant')}")
|
|
30
|
+
print("active prompt:", prompts.get("assistant"))
|
|
31
|
+
|
|
32
|
+
# Templating + persistence.
|
|
33
|
+
prompts.register("greet", "Hello {name}, welcome to {product}.")
|
|
34
|
+
print("\nrendered:", prompts.render("greet", name="Sam", product="agentix"))
|
|
35
|
+
blob = prompts.to_dict() # persist via a Store / JSON
|
|
36
|
+
restored = PromptRegistry.from_dict(blob)
|
|
37
|
+
print("restored active 'assistant':", restored.get("assistant"))
|
|
38
|
+
|
|
39
|
+
# --- reasoning / cost knobs on the Anthropic adapter (config only) ---
|
|
40
|
+
print("\nAnthropic reasoning/cost knobs (typed, no opaque extra):")
|
|
41
|
+
print(" AnthropicModel(thinking='summarized', effort='low') # cheaper, shows reasoning")
|
|
42
|
+
print(" AnthropicModel(effort='xhigh') # max quality for hard tasks")
|
|
43
|
+
print(" AnthropicModel(task_budget=50_000) # self-moderated loop budget")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
if __name__ == "__main__":
|
|
47
|
+
main()
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""21 — One agent, many providers.
|
|
2
|
+
|
|
3
|
+
`agentix` is provider-agnostic: the loop, tools, and guards are identical no
|
|
4
|
+
matter which model backs them. Swapping providers is a one-line change to the
|
|
5
|
+
`model=` argument. This example is the gallery of those one-liners, plus a
|
|
6
|
+
dependency-free demo proving the loop itself doesn't care which you pick.
|
|
7
|
+
|
|
8
|
+
Each adapter defers its SDK import to construction, so importing the classes is
|
|
9
|
+
always safe; you only need the matching extra to actually *run* one:
|
|
10
|
+
|
|
11
|
+
pip install "agentix[openai]" # OpenAIModel (+ any OpenAI-compatible URL)
|
|
12
|
+
pip install "agentix[gemini]" # GeminiModel
|
|
13
|
+
pip install "agentix[bedrock]" # BedrockModel (AWS Converse API)
|
|
14
|
+
pip install "agentix[ollama]" # OllamaModel (local models)
|
|
15
|
+
pip install "agentix[litellm]" # LiteLLMModel (100+ providers via one bridge)
|
|
16
|
+
|
|
17
|
+
Run:
|
|
18
|
+
python examples/21_providers.py
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import asyncio
|
|
24
|
+
|
|
25
|
+
from agentix import Agent, MockModel, ModelResponse, ToolCall, tool
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@tool
|
|
29
|
+
def get_weather(city: str) -> str:
|
|
30
|
+
"""Get the current weather for a city.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
city: City name, e.g. 'Paris'.
|
|
34
|
+
"""
|
|
35
|
+
return f"{city}: 21C, partly cloudy."
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# ── How you'd construct each provider (copy the line you need) ───────────────
|
|
39
|
+
#
|
|
40
|
+
# from agentix.providers.openai import OpenAIModel
|
|
41
|
+
# model = OpenAIModel(model="gpt-4o") # reads OPENAI_API_KEY
|
|
42
|
+
# model = OpenAIModel(base_url="http://localhost:11434/v1", api_key="ollama",
|
|
43
|
+
# model="llama3.1") # any OpenAI-compatible URL
|
|
44
|
+
#
|
|
45
|
+
# from agentix.providers.gemini import GeminiModel
|
|
46
|
+
# model = GeminiModel(model="gemini-2.0-flash") # reads GOOGLE_API_KEY
|
|
47
|
+
#
|
|
48
|
+
# from agentix.providers.bedrock import BedrockModel
|
|
49
|
+
# model = BedrockModel(model="anthropic.claude-3-5-sonnet-20241022-v2:0")
|
|
50
|
+
# # uses the AWS cred chain
|
|
51
|
+
# from agentix.providers.ollama import OllamaModel
|
|
52
|
+
# model = OllamaModel(model="llama3.1") # local; needs `ollama serve`
|
|
53
|
+
#
|
|
54
|
+
# from agentix.providers.litellm import LiteLLMModel
|
|
55
|
+
# model = LiteLLMModel(model="anthropic/claude-opus-4-8") # provider-prefixed ids
|
|
56
|
+
#
|
|
57
|
+
# Then it's the same Agent for every one of them:
|
|
58
|
+
#
|
|
59
|
+
# agent = Agent(model=model, system_prompt="...", tools=[get_weather])
|
|
60
|
+
# outcome = await agent.run("What's the weather in Paris?")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
async def main() -> None:
|
|
64
|
+
# The dependency-free proof: a scripted MockModel drives the exact same loop
|
|
65
|
+
# (tool call -> tool result -> final answer) that every real adapter drives.
|
|
66
|
+
model = MockModel(
|
|
67
|
+
[
|
|
68
|
+
ModelResponse(tool_calls=[ToolCall("get_weather", {"city": "Paris"})]),
|
|
69
|
+
ModelResponse(text="It's 21C and partly cloudy in Paris."),
|
|
70
|
+
]
|
|
71
|
+
)
|
|
72
|
+
agent = Agent(
|
|
73
|
+
model=model,
|
|
74
|
+
system_prompt="You are a concise weather assistant.",
|
|
75
|
+
tools=[get_weather],
|
|
76
|
+
)
|
|
77
|
+
outcome = await agent.run("What's the weather in Paris?")
|
|
78
|
+
|
|
79
|
+
print("answer:", outcome.answer)
|
|
80
|
+
print("(swap `model=` for any provider above — nothing else changes.)")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
if __name__ == "__main__":
|
|
84
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""22 — Multimodal input (vision, documents, audio).
|
|
2
|
+
|
|
3
|
+
A message's content can be a plain string *or* a list of parts: text interleaved
|
|
4
|
+
with images, documents (PDF), and audio. You build the parts the same way for
|
|
5
|
+
every provider; each adapter translates them to that vendor's wire format (and
|
|
6
|
+
raises a clear error for media a provider can't accept — e.g. audio on Anthropic,
|
|
7
|
+
URL images on Bedrock).
|
|
8
|
+
|
|
9
|
+
This demo is dependency-free: a MockModel stands in for a vision model so you can
|
|
10
|
+
see the plumbing without a key. Swap `model=` for any vision-capable adapter
|
|
11
|
+
(AnthropicModel, OpenAIModel, GeminiModel, …) and it works unchanged.
|
|
12
|
+
|
|
13
|
+
Run:
|
|
14
|
+
python examples/22_multimodal.py
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
|
|
21
|
+
from agentix import (
|
|
22
|
+
Agent,
|
|
23
|
+
DocumentPart,
|
|
24
|
+
ImagePart,
|
|
25
|
+
MockModel,
|
|
26
|
+
ModelResponse,
|
|
27
|
+
TextPart,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# A 1x1 transparent PNG — stands in for "an image you loaded".
|
|
31
|
+
_PNG = bytes.fromhex(
|
|
32
|
+
"89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
|
|
33
|
+
"890000000a49444154789c6300010000050001"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
async def main() -> None:
|
|
38
|
+
# Build a multimodal user turn. Parts can come from bytes, a file, a URL,
|
|
39
|
+
# or raw base64 — pick whichever you have:
|
|
40
|
+
user_turn = [
|
|
41
|
+
TextPart("What's in this image, and how does it relate to the attached doc?"),
|
|
42
|
+
ImagePart.from_bytes(_PNG, "image/png"), # inline bytes
|
|
43
|
+
# ImagePart.from_path("diagram.png"), # a local file (mime inferred)
|
|
44
|
+
# ImagePart.from_url("https://example.com/x.jpg"),# a remote URL
|
|
45
|
+
DocumentPart.from_url("https://example.com/spec.pdf", "application/pdf"),
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
agent = Agent(
|
|
49
|
+
model=MockModel([ModelResponse(text="A 1x1 PNG; the doc is a linked PDF spec.")]),
|
|
50
|
+
system_prompt="You are a concise visual assistant.",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# The loop accepts the parts list anywhere a string request would go.
|
|
54
|
+
outcome = await agent.run(user_turn)
|
|
55
|
+
print("answer:", outcome.answer)
|
|
56
|
+
|
|
57
|
+
# `.text` gives a string view of any message (media parts contribute nothing).
|
|
58
|
+
from agentix import Message, Role
|
|
59
|
+
|
|
60
|
+
print("text view:", Message(Role.USER, user_turn).text)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# To use a real vision model instead of the mock:
|
|
64
|
+
#
|
|
65
|
+
# from agentix.providers.anthropic import AnthropicModel
|
|
66
|
+
# agent = Agent(model=AnthropicModel(), system_prompt="...")
|
|
67
|
+
# await agent.run([TextPart("Describe this"), ImagePart.from_path("cat.png")])
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
if __name__ == "__main__":
|
|
71
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""23 — Sandboxed execution of untrusted / model-generated code.
|
|
2
|
+
|
|
3
|
+
`LocalToolExecutor` runs tools in-process — great for trusted tools, but it can't
|
|
4
|
+
contain code you don't trust and can't honour `network_allowlist`. `SubprocessExecutor`
|
|
5
|
+
runs each tool as a separate OS process with real limits: no network (Linux
|
|
6
|
+
netns, fail-closed), CPU/memory/file-size/process rlimits, an isolated temp
|
|
7
|
+
working directory, a scrubbed environment, and a hard timeout.
|
|
8
|
+
|
|
9
|
+
Run:
|
|
10
|
+
python examples/23_sandbox.py
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import sys
|
|
17
|
+
|
|
18
|
+
from agentix import Command, SubprocessExecutor
|
|
19
|
+
from agentix.types import ToolCall
|
|
20
|
+
|
|
21
|
+
# A "run this Python" tool: argv runs the interpreter reading code from stdin.
|
|
22
|
+
run_python = {"run_python": Command(argv=[sys.executable, "-"], stdin="code")}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
async def main() -> None:
|
|
26
|
+
# network_allowlist is what the Agent loop passes from AgentPolicy. Empty =>
|
|
27
|
+
# egress denied (enforced via a network namespace; the call fails closed if
|
|
28
|
+
# the host can't provide one). A non-empty list => network allowed.
|
|
29
|
+
allow_net = ["pypi.org"]
|
|
30
|
+
|
|
31
|
+
executor = SubprocessExecutor(run_python)
|
|
32
|
+
|
|
33
|
+
# 1) Run some code; capture stdout.
|
|
34
|
+
res = await executor(
|
|
35
|
+
ToolCall("run_python", {"code": "print(6 * 7)"}),
|
|
36
|
+
network_allowlist=allow_net,
|
|
37
|
+
)
|
|
38
|
+
print("1) output:", res.content.strip(), "| ok:", res.ok)
|
|
39
|
+
|
|
40
|
+
# 2) Secrets in this process do NOT leak into the child by default.
|
|
41
|
+
import os
|
|
42
|
+
|
|
43
|
+
os.environ["MY_API_KEY"] = "sk-secret"
|
|
44
|
+
peek = "import os; print(os.environ.get('MY_API_KEY', 'HIDDEN'))"
|
|
45
|
+
res = await executor(
|
|
46
|
+
ToolCall("run_python", {"code": peek}),
|
|
47
|
+
network_allowlist=allow_net,
|
|
48
|
+
)
|
|
49
|
+
print("2) child sees the key?:", res.content.strip())
|
|
50
|
+
|
|
51
|
+
# 3) A runaway is killed by the timeout (not the full 30s).
|
|
52
|
+
res = await executor(
|
|
53
|
+
ToolCall("run_python", {"code": "import time; time.sleep(30)"}),
|
|
54
|
+
network_allowlist=allow_net,
|
|
55
|
+
timeout_s=0.5,
|
|
56
|
+
)
|
|
57
|
+
print("3) runaway:", res.content.strip(), "| ok:", res.ok)
|
|
58
|
+
|
|
59
|
+
# 4) The headline guarantee: deny network. With the default policy
|
|
60
|
+
# (require_network_isolation=True), if the host can't isolate the network the
|
|
61
|
+
# tool REFUSES rather than running untrusted code with egress.
|
|
62
|
+
fetch = "import urllib.request; urllib.request.urlopen('http://example.com')"
|
|
63
|
+
res = await executor(
|
|
64
|
+
ToolCall("run_python", {"code": fetch}),
|
|
65
|
+
network_allowlist=[], # deny all egress
|
|
66
|
+
)
|
|
67
|
+
print("4) network-denied result ok?:", res.ok)
|
|
68
|
+
print(" ->", res.content.strip()[:120])
|
|
69
|
+
|
|
70
|
+
# Wiring into an Agent: pass the executor + tool_schemas, and the loop hands
|
|
71
|
+
# it AgentPolicy.network_allowlist / tool_timeout_s automatically:
|
|
72
|
+
#
|
|
73
|
+
# agent = Agent(
|
|
74
|
+
# model=my_model,
|
|
75
|
+
# system_prompt="Use run_python to compute things.",
|
|
76
|
+
# tool_executor=SubprocessExecutor(run_python),
|
|
77
|
+
# tool_schemas=[{"name": "run_python", "description": "...",
|
|
78
|
+
# "parameters": {"type": "object",
|
|
79
|
+
# "properties": {"code": {"type": "string"}},
|
|
80
|
+
# "required": ["code"]}}],
|
|
81
|
+
# policy=AgentPolicy(network_allowlist=[]), # no egress for tools
|
|
82
|
+
# )
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
if __name__ == "__main__":
|
|
86
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""24 — Suspendable human-in-the-loop (pause → persist → resume).
|
|
2
|
+
|
|
3
|
+
A web or serverless agent can't block a request coroutine waiting for a human to
|
|
4
|
+
click "approve" minutes later. With `suspend_on_confirm=True`, when a tool needs
|
|
5
|
+
confirmation the loop *checkpoints to the store and returns* `status="suspended"`
|
|
6
|
+
with the pending approvals — the request can end. A later request (even in a new
|
|
7
|
+
process) calls `resume(run_id, decisions=...)` to approve/deny and continue.
|
|
8
|
+
|
|
9
|
+
This demo uses a scripted model and a FileStore, and resumes on a brand-new
|
|
10
|
+
Agent instance to prove the paused state lives entirely in the store.
|
|
11
|
+
|
|
12
|
+
Run:
|
|
13
|
+
python examples/24_suspend_resume.py
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import asyncio
|
|
19
|
+
import tempfile
|
|
20
|
+
from collections.abc import Sequence
|
|
21
|
+
|
|
22
|
+
from agentix import (
|
|
23
|
+
Agent,
|
|
24
|
+
AgentPolicy,
|
|
25
|
+
FileStore,
|
|
26
|
+
MockModel,
|
|
27
|
+
ModelResponse,
|
|
28
|
+
Role,
|
|
29
|
+
TierGuard,
|
|
30
|
+
ToolCall,
|
|
31
|
+
tool,
|
|
32
|
+
)
|
|
33
|
+
from agentix.types import Message
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@tool
|
|
37
|
+
def wire_funds(to: str, amount: int) -> str:
|
|
38
|
+
"""Wire money to an account."""
|
|
39
|
+
return f"wired ${amount} to {to}"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def model(messages: Sequence[Message]) -> ModelResponse:
|
|
43
|
+
# Stateless, like a real model: answer once the tool result is present.
|
|
44
|
+
if any(m.role is Role.TOOL for m in messages):
|
|
45
|
+
return ModelResponse(text="Done — the transfer is complete.")
|
|
46
|
+
call = ToolCall("wire_funds", {"to": "acct-42", "amount": 9000}, id="c1")
|
|
47
|
+
return ModelResponse(tool_calls=[call])
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def build_agent(store: FileStore) -> Agent:
|
|
51
|
+
return Agent(
|
|
52
|
+
model=MockModel(model),
|
|
53
|
+
system_prompt="You are a finance assistant.",
|
|
54
|
+
tools=[wire_funds],
|
|
55
|
+
guards=[TierGuard()],
|
|
56
|
+
policy=AgentPolicy(confirm_first={"wire_funds"}), # gate the risky tool
|
|
57
|
+
store=store,
|
|
58
|
+
suspend_on_confirm=True,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
async def main() -> None:
|
|
63
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
64
|
+
store = FileStore(tmp)
|
|
65
|
+
|
|
66
|
+
# --- Request 1: start the run; it pauses for approval and returns. ---
|
|
67
|
+
first = await build_agent(store).run("Pay invoice 42", run_id="run-1")
|
|
68
|
+
print("after request 1:", first.status)
|
|
69
|
+
for p in first.pending:
|
|
70
|
+
c = p.call
|
|
71
|
+
print(f" awaiting approval: {c.name}({c.args}) — {p.reason}")
|
|
72
|
+
|
|
73
|
+
# ... the HTTP handler returns here; nothing is blocked. The human
|
|
74
|
+
# reviews and approves out-of-band. Later, a *new* process handles the
|
|
75
|
+
# approval callback: ---
|
|
76
|
+
|
|
77
|
+
# --- Request 2: a fresh Agent (same store) resumes with the decision. ---
|
|
78
|
+
approved = await build_agent(store).resume("run-1", decisions={"c1": True})
|
|
79
|
+
print("after request 2:", approved.status, "->", approved.answer)
|
|
80
|
+
|
|
81
|
+
# Denying instead would have declined the tool and still completed:
|
|
82
|
+
denied = await build_agent(store).resume("run-1", decisions={"c1": False})
|
|
83
|
+
print("if denied:", denied.status, "->", denied.answer)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
if __name__ == "__main__":
|
|
87
|
+
asyncio.run(main())
|
|
@@ -30,6 +30,11 @@ uv run python examples/01_hello_agent.py
|
|
|
30
30
|
| `17_eval.py` | Eval harness: score an agent over golden cases, gate CI on pass rate. | — |
|
|
31
31
|
| `18_verification.py` | Self-consistency (`SelfConsistencyModel`) + LLM-as-judge (`JudgeGuard`). | — |
|
|
32
32
|
| `19_tracing.py` | OpenTelemetry tracing: model/tool/run spans. | `agentix[otel]` + `opentelemetry-sdk` |
|
|
33
|
+
| `20_prompts.py` | Prompt registry/versioning + typed Anthropic reasoning knobs. | — |
|
|
34
|
+
| `21_providers.py` | Provider gallery: OpenAI / Gemini / Bedrock / Ollama / LiteLLM, one-line swap. | — (per-provider extra to run live) |
|
|
35
|
+
| `22_multimodal.py` | Multimodal input: text + image / PDF / audio parts via `TextPart`/`ImagePart`/`DocumentPart`/`AudioPart`. | — |
|
|
36
|
+
| `23_sandbox.py` | `SubprocessExecutor`: run untrusted/model-generated code in an isolated subprocess (no network, rlimits, timeout). | — (POSIX) |
|
|
37
|
+
| `24_suspend_resume.py` | Durable human-in-the-loop: `suspend_on_confirm` pauses for approval, persists, and `resume(decisions=…)` continues (even in a new process). | — |
|
|
33
38
|
|
|
34
39
|
To run the Anthropic example:
|
|
35
40
|
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "agentix-toolkit"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "A generic, batteries-included agent toolkit: configure the loop, tools, guards, and observability instead of rewriting them."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -27,6 +27,11 @@ dependencies = []
|
|
|
27
27
|
# Runtime extras (installed on demand): the optional provider/integration deps.
|
|
28
28
|
[project.optional-dependencies]
|
|
29
29
|
anthropic = ["anthropic>=0.40"]
|
|
30
|
+
openai = ["openai>=1.0"]
|
|
31
|
+
gemini = ["google-genai>=0.3"]
|
|
32
|
+
bedrock = ["boto3>=1.34"]
|
|
33
|
+
ollama = ["ollama>=0.3"]
|
|
34
|
+
litellm = ["litellm>=1.40"]
|
|
30
35
|
mcp = ["mcp>=1.0"]
|
|
31
36
|
otel = ["opentelemetry-api>=1.20"]
|
|
32
37
|
|
|
@@ -65,5 +70,12 @@ files = ["src"]
|
|
|
65
70
|
|
|
66
71
|
# Optional, lazily-imported deps — don't require their stubs to type-check.
|
|
67
72
|
[[tool.mypy.overrides]]
|
|
68
|
-
module = [
|
|
73
|
+
module = [
|
|
74
|
+
"opentelemetry.*",
|
|
75
|
+
"openai.*",
|
|
76
|
+
"google.*",
|
|
77
|
+
"boto3.*",
|
|
78
|
+
"ollama.*",
|
|
79
|
+
"litellm.*",
|
|
80
|
+
]
|
|
69
81
|
ignore_missing_imports = true
|