agentix-toolkit 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. agentix_toolkit-0.2.0/.editorconfig +18 -0
  2. agentix_toolkit-0.2.0/.github/ISSUE_TEMPLATE/bug_report.yml +43 -0
  3. agentix_toolkit-0.2.0/.github/ISSUE_TEMPLATE/config.yml +8 -0
  4. agentix_toolkit-0.2.0/.github/ISSUE_TEMPLATE/feature_request.yml +32 -0
  5. agentix_toolkit-0.2.0/.github/PULL_REQUEST_TEMPLATE.md +21 -0
  6. agentix_toolkit-0.2.0/.github/dependabot.yml +20 -0
  7. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/.github/workflows/ci.yml +4 -4
  8. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/.github/workflows/release.yml +4 -4
  9. agentix_toolkit-0.2.0/.pre-commit-config.yaml +27 -0
  10. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/CHANGELOG.md +34 -1
  11. agentix_toolkit-0.2.0/CODE_OF_CONDUCT.md +64 -0
  12. agentix_toolkit-0.2.0/CONTRIBUTING.md +67 -0
  13. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/PKG-INFO +57 -9
  14. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/PLAN.md +20 -2
  15. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/README.md +54 -8
  16. agentix_toolkit-0.2.0/SECURITY.md +46 -0
  17. agentix_toolkit-0.2.0/examples/15_permissions.py +91 -0
  18. agentix_toolkit-0.2.0/examples/16_reliability.py +72 -0
  19. agentix_toolkit-0.2.0/examples/17_eval.py +57 -0
  20. agentix_toolkit-0.2.0/examples/18_verification.py +72 -0
  21. agentix_toolkit-0.2.0/examples/19_tracing.py +77 -0
  22. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/README.md +5 -0
  23. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/pyproject.toml +7 -1
  24. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/__init__.py +44 -0
  25. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/agent.py +58 -0
  26. agentix_toolkit-0.2.0/src/agentix/consistency.py +76 -0
  27. agentix_toolkit-0.2.0/src/agentix/evals.py +262 -0
  28. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/guards/__init__.py +12 -0
  29. agentix_toolkit-0.2.0/src/agentix/guards/judge.py +53 -0
  30. agentix_toolkit-0.2.0/src/agentix/guards/permissions.py +81 -0
  31. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/providers/anthropic.py +13 -8
  32. agentix_toolkit-0.2.0/src/agentix/resilience.py +95 -0
  33. agentix_toolkit-0.2.0/src/agentix/tracing.py +122 -0
  34. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/types.py +1 -0
  35. agentix_toolkit-0.2.0/src/agentix/validation.py +57 -0
  36. agentix_toolkit-0.2.0/tests/test_consistency.py +78 -0
  37. agentix_toolkit-0.2.0/tests/test_evals.py +168 -0
  38. agentix_toolkit-0.2.0/tests/test_judge.py +61 -0
  39. agentix_toolkit-0.2.0/tests/test_permissions.py +187 -0
  40. agentix_toolkit-0.2.0/tests/test_resilience.py +105 -0
  41. agentix_toolkit-0.2.0/tests/test_tracing.py +195 -0
  42. agentix_toolkit-0.2.0/tests/test_validation.py +149 -0
  43. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/uv.lock +19 -3
  44. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/.gitignore +0 -0
  45. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/.python-version +0 -0
  46. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/LICENSE +0 -0
  47. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/RELEASING.md +0 -0
  48. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/01_hello_agent.py +0 -0
  49. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/02_tool_use.py +0 -0
  50. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/03_async_dynamic_loop.py +0 -0
  51. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/04_policy_and_trust.py +0 -0
  52. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/05_anthropic_model.py +0 -0
  53. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/06_tool_decorator.py +0 -0
  54. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/07_guards.py +0 -0
  55. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/08_persistence.py +0 -0
  56. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/09_streaming.py +0 -0
  57. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/10_concurrency.py +0 -0
  58. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/11_mcp.py +0 -0
  59. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/12_context.py +0 -0
  60. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/13_subagents.py +0 -0
  61. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/examples/14_cost_and_interrupt.py +0 -0
  62. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/concurrency.py +0 -0
  63. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/confirm.py +0 -0
  64. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/context.py +0 -0
  65. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/control.py +0 -0
  66. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/errors.py +0 -0
  67. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/events.py +0 -0
  68. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/executors.py +0 -0
  69. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/guards/base.py +0 -0
  70. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/guards/injection.py +0 -0
  71. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/guards/pii.py +0 -0
  72. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/guards/tiers.py +0 -0
  73. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/guards/trust.py +0 -0
  74. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/mcp.py +0 -0
  75. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/model.py +0 -0
  76. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/policy.py +0 -0
  77. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/pricing.py +0 -0
  78. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/providers/__init__.py +0 -0
  79. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/providers/mock.py +0 -0
  80. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/py.typed +0 -0
  81. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/serde.py +0 -0
  82. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/store.py +0 -0
  83. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/streaming.py +0 -0
  84. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/subagents.py +0 -0
  85. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/src/agentix/tools.py +0 -0
  86. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_agent.py +0 -0
  87. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_anthropic_adapter.py +0 -0
  88. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_concurrency.py +0 -0
  89. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_context.py +0 -0
  90. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_cost_and_interrupt.py +0 -0
  91. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_guards.py +0 -0
  92. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_mcp.py +0 -0
  93. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_persistence.py +0 -0
  94. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_streaming.py +0 -0
  95. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_subagents.py +0 -0
  96. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_tools.py +0 -0
  97. {agentix_toolkit-0.1.0 → agentix_toolkit-0.2.0}/tests/test_types.py +0 -0
@@ -0,0 +1,18 @@
1
+ root = true
2
+
3
+ [*]
4
+ charset = utf-8
5
+ end_of_line = lf
6
+ insert_final_newline = true
7
+ trim_trailing_whitespace = true
8
+ indent_style = space
9
+
10
+ [*.py]
11
+ indent_size = 4
12
+ max_line_length = 100
13
+
14
+ [*.{yml,yaml,toml,json}]
15
+ indent_size = 2
16
+
17
+ [*.md]
18
+ trim_trailing_whitespace = false
@@ -0,0 +1,43 @@
1
+ name: Bug report
2
+ description: Something isn't working as documented.
3
+ labels: ["bug"]
4
+ body:
5
+ - type: markdown
6
+ attributes:
7
+ value: |
8
+ Thanks for the report! For **security vulnerabilities**, do not file a
9
+ public issue — see [SECURITY.md](../blob/main/SECURITY.md).
10
+ - type: textarea
11
+ id: what-happened
12
+ attributes:
13
+ label: What happened?
14
+ description: What did you expect, and what happened instead?
15
+ validations:
16
+ required: true
17
+ - type: textarea
18
+ id: repro
19
+ attributes:
20
+ label: Minimal reproduction
21
+ description: The smallest code snippet that reproduces it. Prefer `MockModel` so it runs without an API key.
22
+ render: python
23
+ validations:
24
+ required: true
25
+ - type: input
26
+ id: version
27
+ attributes:
28
+ label: agentix-toolkit version
29
+ placeholder: "0.1.0"
30
+ validations:
31
+ required: true
32
+ - type: input
33
+ id: python
34
+ attributes:
35
+ label: Python version
36
+ placeholder: "3.12"
37
+ validations:
38
+ required: true
39
+ - type: textarea
40
+ id: extra
41
+ attributes:
42
+ label: Anything else?
43
+ description: Traceback, environment, relevant config (model/provider, guards, etc.).
@@ -0,0 +1,8 @@
1
+ blank_issues_enabled: false
2
+ contact_links:
3
+ - name: Security vulnerability
4
+ url: https://github.com/skwijeratne/agentix-toolkit/security/advisories/new
5
+ about: Report security issues privately — please do not open a public issue.
6
+ - name: Question / usage help
7
+ url: https://github.com/skwijeratne/agentix-toolkit/discussions
8
+ about: Ask questions and discuss ideas (if Discussions is enabled).
@@ -0,0 +1,32 @@
1
+ name: Feature request
2
+ description: Suggest a capability or improvement.
3
+ labels: ["enhancement"]
4
+ body:
5
+ - type: textarea
6
+ id: problem
7
+ attributes:
8
+ label: What problem does this solve?
9
+ description: The use case or pain point. What are you trying to build?
10
+ validations:
11
+ required: true
12
+ - type: textarea
13
+ id: proposal
14
+ attributes:
15
+ label: Proposed solution
16
+ description: |
17
+ What would the API look like? Remember agentix's design: small shared
18
+ core, with capabilities injected (a guard / tool / strategy / adapter)
19
+ rather than baked into the loop.
20
+ validations:
21
+ required: true
22
+ - type: textarea
23
+ id: alternatives
24
+ attributes:
25
+ label: Alternatives considered
26
+ - type: checkboxes
27
+ id: scope
28
+ attributes:
29
+ label: Fit
30
+ options:
31
+ - label: This keeps the core provider-agnostic (no coupling to one model vendor).
32
+ - label: I'm willing to help implement it.
@@ -0,0 +1,21 @@
1
+ <!-- Thanks for contributing! Keep PRs focused. -->
2
+
3
+ ## What & why
4
+
5
+ <!-- What does this change, and what problem does it solve? Link the issue. -->
6
+
7
+ Closes #
8
+
9
+ ## Checklist
10
+
11
+ - [ ] Tests added/updated (new behavior covered; bug fixes have a regression test)
12
+ - [ ] `uv run pytest` passes
13
+ - [ ] `uv run ruff check src tests` passes
14
+ - [ ] `uv run mypy` passes
15
+ - [ ] Docs updated where relevant (docstrings / README / an `examples/` script)
16
+ - [ ] `CHANGELOG.md` updated under `[Unreleased]`
17
+ - [ ] Change is opt-in / composable and keeps the core provider-agnostic
18
+
19
+ ## Notes for reviewers
20
+
21
+ <!-- Anything that needs context: design tradeoffs, follow-ups, etc. -->
@@ -0,0 +1,20 @@
1
+ version: 2
2
+ updates:
3
+ # Python dependencies (resolved via uv.lock).
4
+ - package-ecosystem: "uv"
5
+ directory: "/"
6
+ schedule:
7
+ interval: "weekly"
8
+ open-pull-requests-limit: 5
9
+ groups:
10
+ python-deps:
11
+ patterns: ["*"]
12
+
13
+ # Keep GitHub Actions current (also resolves the Node-version action drift).
14
+ - package-ecosystem: "github-actions"
15
+ directory: "/"
16
+ schedule:
17
+ interval: "weekly"
18
+ groups:
19
+ actions:
20
+ patterns: ["*"]
@@ -14,9 +14,9 @@ jobs:
14
14
  matrix:
15
15
  python-version: ["3.10", "3.11", "3.12", "3.13"]
16
16
  steps:
17
- - uses: actions/checkout@v4
17
+ - uses: actions/checkout@v7
18
18
  - name: Install uv
19
- uses: astral-sh/setup-uv@v6
19
+ uses: astral-sh/setup-uv@v7
20
20
  with:
21
21
  enable-cache: true
22
22
  - run: uv python install ${{ matrix.python-version }}
@@ -27,9 +27,9 @@ jobs:
27
27
  name: lint & types
28
28
  runs-on: ubuntu-latest
29
29
  steps:
30
- - uses: actions/checkout@v4
30
+ - uses: actions/checkout@v7
31
31
  - name: Install uv
32
- uses: astral-sh/setup-uv@v6
32
+ uses: astral-sh/setup-uv@v7
33
33
  with:
34
34
  enable-cache: true
35
35
  # --all-extras so mypy can resolve the optional anthropic/mcp imports.
@@ -12,13 +12,13 @@ jobs:
12
12
  build:
13
13
  runs-on: ubuntu-latest
14
14
  steps:
15
- - uses: actions/checkout@v4
15
+ - uses: actions/checkout@v7
16
16
  - name: Install uv
17
- uses: astral-sh/setup-uv@v6
17
+ uses: astral-sh/setup-uv@v7
18
18
  - run: uv build
19
19
  - name: Check the built artifacts
20
20
  run: uvx twine check dist/*
21
- - uses: actions/upload-artifact@v4
21
+ - uses: actions/upload-artifact@v7
22
22
  with:
23
23
  name: dist
24
24
  path: dist/
@@ -30,7 +30,7 @@ jobs:
30
30
  permissions:
31
31
  id-token: write # required for Trusted Publishing
32
32
  steps:
33
- - uses: actions/download-artifact@v4
33
+ - uses: actions/download-artifact@v8
34
34
  with:
35
35
  name: dist
36
36
  path: dist/
@@ -0,0 +1,27 @@
1
+ # Run `uv run pre-commit install` once to enable. Mirrors the CI gates.
2
+ repos:
3
+ - repo: https://github.com/pre-commit/pre-commit-hooks
4
+ rev: v5.0.0
5
+ hooks:
6
+ - id: trailing-whitespace
7
+ - id: end-of-file-fixer
8
+ - id: check-yaml
9
+ - id: check-toml
10
+ - id: check-merge-conflict
11
+ - id: check-added-large-files
12
+
13
+ - repo: https://github.com/astral-sh/ruff-pre-commit
14
+ rev: v0.15.18
15
+ hooks:
16
+ - id: ruff
17
+ args: [--fix]
18
+
19
+ # mypy --strict, matching CI. Uses the project's own env via uv.
20
+ - repo: local
21
+ hooks:
22
+ - id: mypy
23
+ name: mypy (strict)
24
+ entry: uv run mypy
25
+ language: system
26
+ types: [python]
27
+ pass_filenames: false
@@ -6,6 +6,38 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.2.0] - 2026-06-23
10
+
11
+ ### Added
12
+ - Subagents: `subagent_tool(agent, ...)` exposes a child agent as a delegable
13
+ tool (its own model/system prompt/tools/guards); composes with the loop and
14
+ `bounded_gather`.
15
+ - Cost & control: USD cost tracking (`pricing` module, `cost_usd`, and
16
+ `cost_usd` on `ModelResponse`/`AgentOutcome`; the Anthropic adapter fills
17
+ `input_tokens`/`output_tokens`/`cost_usd`); `AgentPolicy.max_budget_usd`; and
18
+ `Interrupt` to stop a run/stream at a safe boundary.
19
+ - Dynamic permissions: `CallbackGuard` (a `can_use_tool`-style per-call callback
20
+ returning allow/deny/confirm) and `ToolAllowlistGuard` (scope a run to a
21
+ subset of tools).
22
+ - Output validation + retry: `Agent(output_validator=, max_output_retries=)`
23
+ re-prompts on a failed validation and exposes `AgentOutcome.parsed`. Ships
24
+ `json_output`, `pydantic_output`, `regex_output`.
25
+ - Resilient model wrappers: `RetryModel` (backoff) and `FallbackModel`
26
+ (try-next-on-error), composable and drop-in.
27
+ - Eval harness (`agentix.evals`): `evaluate(...)` runs an agent over `Case`s and
28
+ returns an `EvalReport` with `pass_rate` / `format_success_rate` /
29
+ `assert_pass_rate()` (gate CI on regressions). Scorers: `exact_match`,
30
+ `contains`, `regex_match`, `predicate`, `llm_judge`.
31
+ - `SelfConsistencyModel`: sample a model N times per turn and return the majority
32
+ vote (drop-in `ModelFn`).
33
+ - `JudgeGuard`: an LLM reviews the final answer against a rubric and replaces it
34
+ on failure (an `on_answer` safety/on-brand/format gate).
35
+ - Anthropic adapter: structured-output passthrough documented
36
+ (`output_config={"format": ...}`) and `strict` tool schemas forwarded.
37
+ - OpenTelemetry tracing (`agentix[otel]`): `TracingModel`, `tracing_events`, and
38
+ `trace_run` produce a span tree (run → model/tool spans) for your observability
39
+ stack.
40
+
9
41
  ## [0.1.0] - 2026-06-22
10
42
 
11
43
  Initial release.
@@ -43,5 +75,6 @@ Initial release.
43
75
  `cost_usd`; `AgentPolicy.max_budget_usd` aborts a run over budget.
44
76
  - `Interrupt` stops a run or stream at the next safe boundary.
45
77
 
46
- [Unreleased]: https://github.com/skwijeratne/agentix-toolkit/compare/v0.1.0...HEAD
78
+ [Unreleased]: https://github.com/skwijeratne/agentix-toolkit/compare/v0.2.0...HEAD
79
+ [0.2.0]: https://github.com/skwijeratne/agentix-toolkit/compare/v0.1.0...v0.2.0
47
80
  [0.1.0]: https://github.com/skwijeratne/agentix-toolkit/releases/tag/v0.1.0
@@ -0,0 +1,64 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We as members, contributors, and leaders pledge to make participation in our
6
+ community a harassment-free experience for everyone, regardless of age, body
7
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
8
+ identity and expression, level of experience, education, socio-economic status,
9
+ nationality, personal appearance, race, religion, or sexual identity and
10
+ orientation.
11
+
12
+ We pledge to act and interact in ways that contribute to an open, welcoming,
13
+ diverse, inclusive, and healthy community.
14
+
15
+ ## Our Standards
16
+
17
+ Examples of behavior that contributes to a positive environment include:
18
+
19
+ - Demonstrating empathy and kindness toward other people
20
+ - Being respectful of differing opinions, viewpoints, and experiences
21
+ - Giving and gracefully accepting constructive feedback
22
+ - Accepting responsibility and apologizing to those affected by our mistakes,
23
+ and learning from the experience
24
+ - Focusing on what is best not just for us as individuals, but for the overall
25
+ community
26
+
27
+ Examples of unacceptable behavior include:
28
+
29
+ - The use of sexualized language or imagery, and sexual attention or advances of
30
+ any kind
31
+ - Trolling, insulting or derogatory comments, and personal or political attacks
32
+ - Public or private harassment
33
+ - Publishing others' private information, such as a physical or email address,
34
+ without their explicit permission
35
+ - Other conduct which could reasonably be considered inappropriate in a
36
+ professional setting
37
+
38
+ ## Enforcement Responsibilities
39
+
40
+ Community leaders are responsible for clarifying and enforcing our standards of
41
+ acceptable behavior and will take appropriate and fair corrective action in
42
+ response to any behavior that they deem inappropriate, threatening, offensive,
43
+ or harmful.
44
+
45
+ ## Scope
46
+
47
+ This Code of Conduct applies within all community spaces, and also applies when
48
+ an individual is officially representing the community in public spaces.
49
+
50
+ ## Enforcement
51
+
52
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
53
+ reported to the community leaders responsible for enforcement at
54
+ **skwijeratne@gmail.com**. All complaints will be reviewed and investigated
55
+ promptly and fairly. Community leaders are obligated to respect the privacy and
56
+ security of the reporter of any incident.
57
+
58
+ ## Attribution
59
+
60
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
61
+ version 2.1, available at
62
+ https://www.contributor-covenant.org/version/2/1/code_of_conduct.html.
63
+
64
+ [homepage]: https://www.contributor-covenant.org
@@ -0,0 +1,67 @@
1
+ # Contributing to agentix
2
+
3
+ Thanks for your interest in improving agentix! This guide gets you set up and
4
+ explains what we look for in a contribution.
5
+
6
+ > The distribution is **`agentix-toolkit`** on PyPI; you import it as **`agentix`**.
7
+
8
+ ## Development setup
9
+
10
+ This project uses [uv](https://docs.astral.sh/uv/).
11
+
12
+ ```bash
13
+ git clone https://github.com/skwijeratne/agentix-toolkit
14
+ cd agentix-toolkit
15
+ uv sync --all-extras # create the venv, install deps + dev tools + extras
16
+ ```
17
+
18
+ ## The checks (all three must pass)
19
+
20
+ CI runs these on every PR across Python 3.10–3.13, and they are **blocking**:
21
+
22
+ ```bash
23
+ uv run pytest # tests
24
+ uv run ruff check src tests # lint
25
+ uv run mypy # type-check (strict)
26
+ ```
27
+
28
+ Run them locally before pushing. Optionally enable the pre-commit hooks so
29
+ lint runs automatically:
30
+
31
+ ```bash
32
+ uv run pre-commit install
33
+ ```
34
+
35
+ ## Making a change
36
+
37
+ 1. **Open an issue first** for anything non-trivial, so we can agree on the
38
+ approach before you invest time.
39
+ 2. Branch off `main`.
40
+ 3. Keep the change focused. Match the surrounding style — small, shared core;
41
+ load-bearing behavior is injected and configurable, not baked into the loop.
42
+ 4. **Add tests.** New behavior needs coverage; bug fixes need a regression test.
43
+ Tests are plain `def` / `async def test_*` functions (pytest, `asyncio_mode`
44
+ is `auto`).
45
+ 5. Update docs where relevant: docstrings, the README, an `examples/` script,
46
+ and a `CHANGELOG.md` entry under `[Unreleased]`.
47
+ 6. Make sure all three checks pass.
48
+ 7. Open a PR using the template; describe the change and link the issue.
49
+
50
+ ## Design principles
51
+
52
+ - **Provider-agnostic core.** Don't couple the loop to a specific model
53
+ provider; provider code lives behind adapters (`providers/`).
54
+ - **Inject, don't bake in.** New capabilities should be opt-in and composable
55
+ (a guard, a tool, a strategy, an executor), not hard-coded into `agent.py`.
56
+ - **Security defaults are conservative.** When a guard is ambiguous, fail
57
+ closed. See `SECURITY.md`.
58
+ - **Typed and tested.** Public APIs are typed (`mypy --strict`) and exercised by
59
+ tests.
60
+
61
+ ## Reporting bugs / requesting features
62
+
63
+ Use the issue templates. For **security vulnerabilities, do not open a public
64
+ issue** — see [`SECURITY.md`](./SECURITY.md).
65
+
66
+ By contributing, you agree that your contributions are licensed under the
67
+ project's [MIT License](./LICENSE).
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentix-toolkit
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: A generic, batteries-included agent toolkit: configure the loop, tools, guards, and observability instead of rewriting them.
5
5
  Project-URL: Homepage, https://github.com/skwijeratne/agentix-toolkit
6
6
  Project-URL: Repository, https://github.com/skwijeratne/agentix-toolkit
@@ -22,6 +22,8 @@ Provides-Extra: anthropic
22
22
  Requires-Dist: anthropic>=0.40; extra == 'anthropic'
23
23
  Provides-Extra: mcp
24
24
  Requires-Dist: mcp>=1.0; extra == 'mcp'
25
+ Provides-Extra: otel
26
+ Requires-Dist: opentelemetry-api>=1.20; extra == 'otel'
25
27
  Description-Content-Type: text/markdown
26
28
 
27
29
  # agentix
@@ -53,11 +55,17 @@ outcome = await agent.run("What's the weather in Lisbon?")
53
55
 
54
56
  - **Async-first** core loop (`run` / `stream` / `resume`) with a sync wrapper.
55
57
  - **Provider-agnostic** — bring any model; a real **Anthropic** adapter is included.
56
- - **Tools from type hints** — one `@tool` decorator generates the JSON schema.
57
- - **Security as a first-class, opt-in subsystem** trust boundary, permission
58
- tiers, confirmation, PII/injection guards, audit events.
59
- - **Scales** — streaming, checkpoint/resume, MCP tools, context trimming, and
60
- fleet backpressure.
58
+ - **Tools from type hints** — one `@tool` decorator generates the JSON schema;
59
+ **MCP** servers and **subagents** plug in as tools too.
60
+ - **Security, opt-in** — trust boundary, permission tiers + dynamic
61
+ `can_use_tool` callbacks, PII/injection guards, human confirmation, audit events.
62
+ - **Cost & control** — token **and USD** cost tracking, step/token/USD budgets,
63
+ cooperative `Interrupt`.
64
+ - **Reliability** — output **validation + retry** (`outcome.parsed`), model
65
+ **fallback/retry**, self-consistency, and LLM-as-judge.
66
+ - **Scale & ops** — streaming, checkpoint/resume, context trimming, fleet
67
+ backpressure, an **eval harness** (gate CI on quality), and **OpenTelemetry**
68
+ tracing.
61
69
 
62
70
  > Status: **alpha**, under active development. APIs may change before `1.0`.
63
71
 
@@ -72,9 +80,9 @@ The distribution is **`agentix-toolkit`**; you import it as **`agentix`**.
72
80
  With [uv](https://docs.astral.sh/uv/) (recommended):
73
81
 
74
82
  ```bash
75
- uv add agentix-toolkit # core
76
- uv add "agentix-toolkit[anthropic]" # + Anthropic adapter
77
- uv add "agentix-toolkit[anthropic,mcp]" # + MCP client support
83
+ uv add agentix-toolkit # core (no required deps)
84
+ uv add "agentix-toolkit[anthropic]" # + Anthropic adapter
85
+ uv add "agentix-toolkit[anthropic,mcp,otel]" # + MCP client + OpenTelemetry tracing
78
86
  ```
79
87
 
80
88
  Or with pip:
@@ -83,6 +91,9 @@ Or with pip:
83
91
  pip install "agentix-toolkit[anthropic]"
84
92
  ```
85
93
 
94
+ Extras are opt-in: `anthropic` (the model adapter), `mcp` (MCP client),
95
+ `otel` (OpenTelemetry tracing). The core has **no required dependencies**.
96
+
86
97
  ### 2. Run an agent with no API key
87
98
 
88
99
  `MockModel` is a scripted, dependency-free model — perfect for trying the loop
@@ -169,6 +180,30 @@ async for event in agent.stream("Tell me about Lisbon."):
169
180
  print("\n", event.outcome.status)
170
181
  ```
171
182
 
183
+ ### 6. Make it production-safe (validate output, fall back, cap cost)
184
+
185
+ Stop malformed output from crashing downstream code: validate the final answer
186
+ and re-prompt on failure. Add a fallback model and a USD budget for resilience.
187
+
188
+ ```python
189
+ from agentix import Agent, AgentPolicy, FallbackModel, json_output
190
+
191
+ agent = Agent(
192
+ model=FallbackModel([primary_model, backup_model]), # survive a provider blip
193
+ system_prompt="Reply with a JSON object.",
194
+ tools=[...],
195
+ output_validator=json_output, # or pydantic_output(MyModel)
196
+ max_output_retries=2, # re-prompt the model on bad output
197
+ policy=AgentPolicy(max_budget_usd=0.50), # abort if it gets expensive
198
+ )
199
+ outcome = await agent.run("...")
200
+ outcome.parsed # a validated object — safe to use; outcome.cost_usd is tracked
201
+ ```
202
+
203
+ Then **gate quality in CI** with the eval harness — `evaluate(...)` runs your
204
+ agent over golden cases and `assert_pass_rate(...)` fails the build on a
205
+ regression (see `examples/17_eval.py`).
206
+
172
207
  ---
173
208
 
174
209
  ## Feature tour
@@ -184,6 +219,13 @@ Each links to a runnable example in [`examples/`](./examples):
184
219
  | Concurrency | `Limiter` + `bounded_gather` for fleets | `10_concurrency.py` |
185
220
  | MCP | use any MCP server's tools | `11_mcp.py` |
186
221
  | Context | bound the transcript (`TrimRounds`, …) | `12_context.py` |
222
+ | Subagents | delegate a subtask to a child agent | `13_subagents.py` |
223
+ | Cost & interrupt | USD budgets + stop a run mid-flight | `14_cost_and_interrupt.py` |
224
+ | Permissions | dynamic `can_use_tool` + tool allowlist | `15_permissions.py` |
225
+ | Reliability | output validation + retry, fallback/retry models | `16_reliability.py` |
226
+ | Eval | score golden cases, gate CI on pass rate | `17_eval.py` |
227
+ | Verify | self-consistency + LLM-as-judge | `18_verification.py` |
228
+ | Tracing | OpenTelemetry model/tool/run spans | `19_tracing.py` |
187
229
 
188
230
  ---
189
231
 
@@ -202,6 +244,12 @@ Run an example: `uv run python examples/01_hello_agent.py`.
202
244
  See [`RELEASING.md`](./RELEASING.md) for the publish process and
203
245
  [`PLAN.md`](./PLAN.md) for the roadmap.
204
246
 
247
+ ## Contributing
248
+
249
+ Contributions are welcome! See [`CONTRIBUTING.md`](./CONTRIBUTING.md) for setup
250
+ and the PR checklist, [`CODE_OF_CONDUCT.md`](./CODE_OF_CONDUCT.md), and
251
+ [`SECURITY.md`](./SECURITY.md) for reporting vulnerabilities privately.
252
+
205
253
  ## License
206
254
 
207
255
  MIT — see [`LICENSE`](./LICENSE).
@@ -109,8 +109,26 @@ agentix/
109
109
  - **P10 — Cost + interrupt.** ✅ `pricing` (per-model table + `cost_usd`);
110
110
  `ModelResponse`/`AgentOutcome` carry `cost_usd` (Anthropic adapter fills it);
111
111
  `AgentPolicy.max_budget_usd` aborts; `Interrupt` stops a run/stream at a safe
112
- boundary. Tests + example 14. (P8 — permission callbacks — still open; see
113
- `PLAN.gaps.md`.)
112
+ boundary. Tests + example 14.
113
+ - **P8 — Dynamic permissions.** ✅ `CallbackGuard(check)` (`can_use_tool`: a
114
+ per-call callback returning allow/deny/confirm or a bool) and
115
+ `ToolAllowlistGuard` (scope a run to a tool subset). Compose with the guard
116
+ pipeline. Tests + example 15.
117
+ - **P11 — Reliability & correctness.** ✅ Output validation + retry
118
+ (`Agent(output_validator=, max_output_retries=)` → `outcome.parsed`;
119
+ `json_output`/`pydantic_output`/`regex_output`); resilient model wrappers
120
+ (`RetryModel`, `FallbackModel`); `SelfConsistencyModel` (majority vote);
121
+ `JudgeGuard` (LLM answer gate); structured-output passthrough on the Anthropic
122
+ adapter. Examples 16 + 18.
123
+ - **P12 — Eval harness.** ✅ `agentix.evals`: `evaluate(dataset, agent, scorer=)`
124
+ → `EvalReport` (`pass_rate`, `format_success_rate`, `assert_pass_rate()` to
125
+ gate CI). Scorers: `exact_match`/`contains`/`regex_match`/`predicate`/
126
+ `llm_judge`. Tests + example 17.
127
+ - **P13 — OpenTelemetry tracing.** ✅ `agentix.tracing` (`agentix[otel]`):
128
+ `TracingModel` (model spans), `tracing_events()` (tool spans + guard/confirm),
129
+ `trace_run()` (root span). Tests + example 19 (verified vs the real OTel SDK).
130
+ Roadmap remainders (prompt versioning, citation guard, eval loaders) in
131
+ `PLAN.gaps.md`.
114
132
 
115
133
  > ⚠️ Streaming caveat: `on_answer` egress guards (PII redaction) can't un-send
116
134
  > already-streamed deltas — deltas are raw; `Done.outcome.answer` is redacted.
@@ -27,11 +27,17 @@ outcome = await agent.run("What's the weather in Lisbon?")
27
27
 
28
28
  - **Async-first** core loop (`run` / `stream` / `resume`) with a sync wrapper.
29
29
  - **Provider-agnostic** — bring any model; a real **Anthropic** adapter is included.
30
- - **Tools from type hints** — one `@tool` decorator generates the JSON schema.
31
- - **Security as a first-class, opt-in subsystem** trust boundary, permission
32
- tiers, confirmation, PII/injection guards, audit events.
33
- - **Scales** — streaming, checkpoint/resume, MCP tools, context trimming, and
34
- fleet backpressure.
30
+ - **Tools from type hints** — one `@tool` decorator generates the JSON schema;
31
+ **MCP** servers and **subagents** plug in as tools too.
32
+ - **Security, opt-in** — trust boundary, permission tiers + dynamic
33
+ `can_use_tool` callbacks, PII/injection guards, human confirmation, audit events.
34
+ - **Cost & control** — token **and USD** cost tracking, step/token/USD budgets,
35
+ cooperative `Interrupt`.
36
+ - **Reliability** — output **validation + retry** (`outcome.parsed`), model
37
+ **fallback/retry**, self-consistency, and LLM-as-judge.
38
+ - **Scale & ops** — streaming, checkpoint/resume, context trimming, fleet
39
+ backpressure, an **eval harness** (gate CI on quality), and **OpenTelemetry**
40
+ tracing.
35
41
 
36
42
  > Status: **alpha**, under active development. APIs may change before `1.0`.
37
43
 
@@ -46,9 +52,9 @@ The distribution is **`agentix-toolkit`**; you import it as **`agentix`**.
46
52
  With [uv](https://docs.astral.sh/uv/) (recommended):
47
53
 
48
54
  ```bash
49
- uv add agentix-toolkit # core
50
- uv add "agentix-toolkit[anthropic]" # + Anthropic adapter
51
- uv add "agentix-toolkit[anthropic,mcp]" # + MCP client support
55
+ uv add agentix-toolkit # core (no required deps)
56
+ uv add "agentix-toolkit[anthropic]" # + Anthropic adapter
57
+ uv add "agentix-toolkit[anthropic,mcp,otel]" # + MCP client + OpenTelemetry tracing
52
58
  ```
53
59
 
54
60
  Or with pip:
@@ -57,6 +63,9 @@ Or with pip:
57
63
  pip install "agentix-toolkit[anthropic]"
58
64
  ```
59
65
 
66
+ Extras are opt-in: `anthropic` (the model adapter), `mcp` (MCP client),
67
+ `otel` (OpenTelemetry tracing). The core has **no required dependencies**.
68
+
60
69
  ### 2. Run an agent with no API key
61
70
 
62
71
  `MockModel` is a scripted, dependency-free model — perfect for trying the loop
@@ -143,6 +152,30 @@ async for event in agent.stream("Tell me about Lisbon."):
143
152
  print("\n", event.outcome.status)
144
153
  ```
145
154
 
155
+ ### 6. Make it production-safe (validate output, fall back, cap cost)
156
+
157
+ Stop malformed output from crashing downstream code: validate the final answer
158
+ and re-prompt on failure. Add a fallback model and a USD budget for resilience.
159
+
160
+ ```python
161
+ from agentix import Agent, AgentPolicy, FallbackModel, json_output
162
+
163
+ agent = Agent(
164
+ model=FallbackModel([primary_model, backup_model]), # survive a provider blip
165
+ system_prompt="Reply with a JSON object.",
166
+ tools=[...],
167
+ output_validator=json_output, # or pydantic_output(MyModel)
168
+ max_output_retries=2, # re-prompt the model on bad output
169
+ policy=AgentPolicy(max_budget_usd=0.50), # abort if it gets expensive
170
+ )
171
+ outcome = await agent.run("...")
172
+ outcome.parsed # a validated object — safe to use; outcome.cost_usd is tracked
173
+ ```
174
+
175
+ Then **gate quality in CI** with the eval harness — `evaluate(...)` runs your
176
+ agent over golden cases and `assert_pass_rate(...)` fails the build on a
177
+ regression (see `examples/17_eval.py`).
178
+
146
179
  ---
147
180
 
148
181
  ## Feature tour
@@ -158,6 +191,13 @@ Each links to a runnable example in [`examples/`](./examples):
158
191
  | Concurrency | `Limiter` + `bounded_gather` for fleets | `10_concurrency.py` |
159
192
  | MCP | use any MCP server's tools | `11_mcp.py` |
160
193
  | Context | bound the transcript (`TrimRounds`, …) | `12_context.py` |
194
+ | Subagents | delegate a subtask to a child agent | `13_subagents.py` |
195
+ | Cost & interrupt | USD budgets + stop a run mid-flight | `14_cost_and_interrupt.py` |
196
+ | Permissions | dynamic `can_use_tool` + tool allowlist | `15_permissions.py` |
197
+ | Reliability | output validation + retry, fallback/retry models | `16_reliability.py` |
198
+ | Eval | score golden cases, gate CI on pass rate | `17_eval.py` |
199
+ | Verify | self-consistency + LLM-as-judge | `18_verification.py` |
200
+ | Tracing | OpenTelemetry model/tool/run spans | `19_tracing.py` |
161
201
 
162
202
  ---
163
203
 
@@ -176,6 +216,12 @@ Run an example: `uv run python examples/01_hello_agent.py`.
176
216
  See [`RELEASING.md`](./RELEASING.md) for the publish process and
177
217
  [`PLAN.md`](./PLAN.md) for the roadmap.
178
218
 
219
+ ## Contributing
220
+
221
+ Contributions are welcome! See [`CONTRIBUTING.md`](./CONTRIBUTING.md) for setup
222
+ and the PR checklist, [`CODE_OF_CONDUCT.md`](./CODE_OF_CONDUCT.md), and
223
+ [`SECURITY.md`](./SECURITY.md) for reporting vulnerabilities privately.
224
+
179
225
  ## License
180
226
 
181
227
  MIT — see [`LICENSE`](./LICENSE).