agentforge-py 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. agentforge/__init__.py +114 -0
  2. agentforge/_testing/__init__.py +19 -0
  3. agentforge/_testing/fake_llm.py +126 -0
  4. agentforge/_testing/fake_tool.py +122 -0
  5. agentforge/_tools/__init__.py +14 -0
  6. agentforge/_tools/calculator.py +102 -0
  7. agentforge/_tools/decorator.py +300 -0
  8. agentforge/_tools/file_read.py +112 -0
  9. agentforge/_tools/shell.py +134 -0
  10. agentforge/_tools/web_search.py +207 -0
  11. agentforge/agent.py +817 -0
  12. agentforge/auth.py +42 -0
  13. agentforge/cli/__init__.py +18 -0
  14. agentforge/cli/_build.py +323 -0
  15. agentforge/cli/_scaffold_state.py +250 -0
  16. agentforge/cli/_shared_scaffold.py +174 -0
  17. agentforge/cli/config_cmd.py +174 -0
  18. agentforge/cli/db_cmd.py +262 -0
  19. agentforge/cli/debug_cmd.py +168 -0
  20. agentforge/cli/docs_cmd.py +217 -0
  21. agentforge/cli/eval_cmd.py +181 -0
  22. agentforge/cli/health_cmd.py +139 -0
  23. agentforge/cli/list_modules.py +85 -0
  24. agentforge/cli/main.py +81 -0
  25. agentforge/cli/manifest_apply.py +368 -0
  26. agentforge/cli/module_cmd.py +247 -0
  27. agentforge/cli/new_cmd.py +171 -0
  28. agentforge/cli/run_cmd.py +234 -0
  29. agentforge/cli/upgrade_cmd.py +230 -0
  30. agentforge/config/__init__.py +45 -0
  31. agentforge/eval/__init__.py +18 -0
  32. agentforge/eval/consistency.py +107 -0
  33. agentforge/eval/coverage.py +100 -0
  34. agentforge/eval/format_compliance.py +107 -0
  35. agentforge/eval/regression.py +143 -0
  36. agentforge/findings.py +166 -0
  37. agentforge/guardrails/__init__.py +32 -0
  38. agentforge/guardrails/allowlist.py +49 -0
  39. agentforge/guardrails/capability_check.py +58 -0
  40. agentforge/guardrails/engine.py +289 -0
  41. agentforge/guardrails/pii_redact_basic.py +61 -0
  42. agentforge/guardrails/prompt_injection_basic.py +90 -0
  43. agentforge/memory/__init__.py +16 -0
  44. agentforge/memory/in_memory.py +130 -0
  45. agentforge/memory/in_memory_graph.py +262 -0
  46. agentforge/memory/in_memory_vector.py +167 -0
  47. agentforge/pipeline/__init__.py +26 -0
  48. agentforge/pipeline/engine.py +189 -0
  49. agentforge/pipeline/errors.py +19 -0
  50. agentforge/pipeline/tool.py +93 -0
  51. agentforge/py.typed +0 -0
  52. agentforge/recording.py +189 -0
  53. agentforge/renderers/__init__.py +28 -0
  54. agentforge/renderers/_defaults.py +32 -0
  55. agentforge/renderers/markdown.py +44 -0
  56. agentforge/renderers/patch_applier.py +46 -0
  57. agentforge/renderers/registry.py +108 -0
  58. agentforge/renderers/scorecard.py +59 -0
  59. agentforge/renderers/span_table.py +71 -0
  60. agentforge/replay.py +260 -0
  61. agentforge/resolver_register.py +41 -0
  62. agentforge/retrieval.py +410 -0
  63. agentforge/runtime.py +63 -0
  64. agentforge/strategies/__init__.py +27 -0
  65. agentforge/strategies/_base.py +280 -0
  66. agentforge/strategies/_plan.py +93 -0
  67. agentforge/strategies/multi_agent.py +541 -0
  68. agentforge/strategies/plan_execute.py +506 -0
  69. agentforge/strategies/react.py +237 -0
  70. agentforge/strategies/tot.py +472 -0
  71. agentforge/templates/_shared/.cursorrules +12 -0
  72. agentforge/templates/_shared/.github/copilot-instructions.md +13 -0
  73. agentforge/templates/_shared/.gitkeep +0 -0
  74. agentforge/templates/_shared/AGENTS.md.tmpl +123 -0
  75. agentforge/templates/_shared/CLAUDE.md +13 -0
  76. agentforge/templates/_shared/docs/runbooks/01-set-up-new-agent.md.tmpl +67 -0
  77. agentforge/templates/_shared/docs/runbooks/02-add-a-tool.md +67 -0
  78. agentforge/templates/_shared/docs/runbooks/03-add-a-pipeline-task.md +69 -0
  79. agentforge/templates/_shared/docs/runbooks/04-pick-reasoning-strategy.md +67 -0
  80. agentforge/templates/_shared/docs/runbooks/05-write-prompts.md +75 -0
  81. agentforge/templates/_shared/docs/runbooks/06-test-your-agent.md +75 -0
  82. agentforge/templates/_shared/docs/runbooks/07-debug-a-run.md +70 -0
  83. agentforge/templates/_shared/docs/runbooks/08-add-memory.md +75 -0
  84. agentforge/templates/_shared/docs/runbooks/09-add-mcp.md +78 -0
  85. agentforge/templates/_shared/docs/runbooks/10-add-evaluators.md +76 -0
  86. agentforge/templates/_shared/docs/runbooks/11-add-safety-guardrails.md +83 -0
  87. agentforge/templates/_shared/docs/runbooks/12-add-observability.md +77 -0
  88. agentforge/templates/_shared/docs/runbooks/13-configure-multi-provider.md +91 -0
  89. agentforge/templates/_shared/docs/runbooks/14-deploy-your-agent.md +70 -0
  90. agentforge/templates/_shared/docs/runbooks/15-upgrade-your-agent.md +67 -0
  91. agentforge/templates/_shared/docs/runbooks/16-configuration-reference.md +81 -0
  92. agentforge/templates/_shared/docs/runbooks/17-add-reranker.md +78 -0
  93. agentforge/templates/_shared/docs/runbooks/18-add-hybrid-search.md +78 -0
  94. agentforge/templates/_shared/docs/runbooks/19-add-graphrag.md +83 -0
  95. agentforge/templates/_shared/docs/runbooks/20-apply-schema-migrations.md +92 -0
  96. agentforge/templates/_shared/docs/runbooks/21-use-streaming-guardrails.md +82 -0
  97. agentforge/templates/_shared/docs/runbooks/README.md.tmpl +68 -0
  98. agentforge/templates/code-reviewer/.env.example +8 -0
  99. agentforge/templates/code-reviewer/.gitignore +7 -0
  100. agentforge/templates/code-reviewer/README.md +12 -0
  101. agentforge/templates/code-reviewer/agentforge.yaml +23 -0
  102. agentforge/templates/code-reviewer/copier.yml +34 -0
  103. agentforge/templates/code-reviewer/pyproject.toml +18 -0
  104. agentforge/templates/code-reviewer/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  105. agentforge/templates/code-reviewer/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
  106. agentforge/templates/docs-qa/.env.example +8 -0
  107. agentforge/templates/docs-qa/.gitignore +7 -0
  108. agentforge/templates/docs-qa/README.md +14 -0
  109. agentforge/templates/docs-qa/agentforge.yaml +19 -0
  110. agentforge/templates/docs-qa/copier.yml +31 -0
  111. agentforge/templates/docs-qa/pyproject.toml +18 -0
  112. agentforge/templates/docs-qa/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  113. agentforge/templates/docs-qa/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
  114. agentforge/templates/minimal/.env.example +11 -0
  115. agentforge/templates/minimal/.gitignore +10 -0
  116. agentforge/templates/minimal/README.md +28 -0
  117. agentforge/templates/minimal/agentforge.yaml +10 -0
  118. agentforge/templates/minimal/copier.yml +52 -0
  119. agentforge/templates/minimal/pyproject.toml +18 -0
  120. agentforge/templates/minimal/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  121. agentforge/templates/minimal/src/{{project_slug.replace('-', '_')}}/main.py +34 -0
  122. agentforge/templates/patch-bot/.env.example +8 -0
  123. agentforge/templates/patch-bot/.gitignore +7 -0
  124. agentforge/templates/patch-bot/README.md +13 -0
  125. agentforge/templates/patch-bot/agentforge.yaml +15 -0
  126. agentforge/templates/patch-bot/copier.yml +31 -0
  127. agentforge/templates/patch-bot/pyproject.toml +18 -0
  128. agentforge/templates/patch-bot/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  129. agentforge/templates/patch-bot/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
  130. agentforge/templates/research/.env.example +8 -0
  131. agentforge/templates/research/.gitignore +7 -0
  132. agentforge/templates/research/README.md +14 -0
  133. agentforge/templates/research/agentforge.yaml +17 -0
  134. agentforge/templates/research/copier.yml +31 -0
  135. agentforge/templates/research/pyproject.toml +18 -0
  136. agentforge/templates/research/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  137. agentforge/templates/research/src/{{project_slug.replace('-', '_')}}/main.py +31 -0
  138. agentforge/templates/triage/.env.example +8 -0
  139. agentforge/templates/triage/.gitignore +7 -0
  140. agentforge/templates/triage/README.md +14 -0
  141. agentforge/templates/triage/agentforge.yaml +25 -0
  142. agentforge/templates/triage/copier.yml +31 -0
  143. agentforge/templates/triage/pyproject.toml +18 -0
  144. agentforge/templates/triage/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  145. agentforge/templates/triage/src/{{project_slug.replace('-', '_')}}/main.py +30 -0
  146. agentforge/testing/__init__.py +69 -0
  147. agentforge/testing/conformance.py +40 -0
  148. agentforge/testing/factory.py +89 -0
  149. agentforge/testing/fixtures.py +42 -0
  150. agentforge/testing/llm.py +235 -0
  151. agentforge/testing/recording.py +177 -0
  152. agentforge/tools/__init__.py +41 -0
  153. agentforge_py-0.2.1.dist-info/METADATA +158 -0
  154. agentforge_py-0.2.1.dist-info/RECORD +157 -0
  155. agentforge_py-0.2.1.dist-info/WHEEL +4 -0
  156. agentforge_py-0.2.1.dist-info/entry_points.txt +2 -0
  157. agentforge_py-0.2.1.dist-info/licenses/LICENSE +202 -0
@@ -0,0 +1,76 @@
1
+ # 10 — Add evaluators
2
+
3
+ > **Goal:** score each agent run on quality so regressions are
4
+ > caught before they ship.
5
+ > **Time:** ~20 minutes.
6
+ > **Prereqs:** runbook 06.
7
+
8
+ ## TL;DR
9
+
10
+ ```yaml
11
+ # agentforge.yaml
12
+ modules:
13
+ evaluators:
14
+ - name: faithfulness # LLM-judge
15
+ - name: coverage # deterministic
16
+ config:
17
+ required_facts: ["population", "year"]
18
+ - name: regression-vs-baseline
19
+ config:
20
+ baseline_path: ./tests/baselines/answers.jsonl
21
+ ```
22
+
23
+ ```bash
24
+ agentforge eval --fixtures ./tests/golden.jsonl --threshold 0.8
25
+ ```
26
+
27
+ ## Step by step
28
+
29
+ 1. **Mix deterministic + LLM-judge.** Deterministic graders
30
+ (coverage, format-compliance, regression-vs-baseline,
31
+ consistency) are cheap; ship them everywhere. Use LLM-judge
32
+ graders (faithfulness, groundedness, hallucination,
33
+ relevance, helpfulness, correctness) when no rule captures
34
+ the property — they cost LLM calls per evaluation.
35
+ 2. **Declare under `modules.evaluators`.** Each entry has a
36
+ `name` (resolver key) and optional `config`. The framework
37
+ instantiates and runs them post-run, attaching scores to
38
+ `RunResult.eval_scores`.
39
+ 3. **Wire into CI.** `agentforge eval --fixtures golden.jsonl
40
+ --threshold 0.8 --output-format junit > eval.xml` exits 5
41
+ when the mean score is below the threshold.
42
+ 4. **Threshold per evaluator** (when one matters more than the
43
+ others) goes in the evaluator's own `config` block.
44
+ 5. **Custom evaluators** subclass `Evaluator` and register with
45
+ `@register("evaluators", "my-name")`. Run
46
+ `run_evaluator_conformance(my_eval)` to verify the contract.
47
+
48
+ ## Variations
49
+
50
+ - **Cost gating** — each LLM-judge declares
51
+ `cost_estimate_usd`. `BudgetPolicy` skips them when the run's
52
+ remaining budget would be exceeded.
53
+ - **GEval rubrics** — `agentforge-eval-geval` lets you define
54
+ arbitrary judge rubrics in YAML.
55
+ - **Snapshot diff** — for outputs that should stay byte-stable,
56
+ pair an evaluator with `agentforge_testing.assert_snapshot`.
57
+
58
+ ## Troubleshooting
59
+
60
+ | Symptom | Cause | Fix |
61
+ |---|---|---|
62
+ | `No module registered for evaluators:faithfulness` | LLM-judge pkg missing | `agentforge add module eval-geval` |
63
+ | Evaluators didn't run | budget exhausted before eval pass | bump `agent.budget.usd` or drop expensive judges |
64
+ | Threshold pass but quality regressed | mean masked outliers | switch CI to per-fixture threshold or run with `--threshold-per-evaluator` |
65
+ | Judge gives same score every time | judge prompt too vague | tighten the rubric; add 2-3 worked examples |
66
+
67
+ ## Related
68
+
69
+ - Runbook 06 — Test your agent
70
+ - Runbook 12 — Add observability (eval scores feed dashboards)
71
+ - Feature spec: `docs/features/feat-006-evaluators-and-benchmarks.md`
72
+
73
+ <!-- agentforge:end-managed -->
74
+
75
+ <!-- agentforge:custom -->
76
+ <!-- agentforge:end-custom -->
@@ -0,0 +1,83 @@
1
+ # 11 — Add safety guardrails
2
+
3
+ > **Goal:** layer input validation, output redaction, and tool-
4
+ > call gating onto your agent.
5
+ > **Time:** ~15 minutes.
6
+ > **Prereqs:** runbook 02.
7
+
8
+ ## TL;DR
9
+
10
+ ```yaml
11
+ # agentforge.yaml
12
+ modules:
13
+ guardrails:
14
+ defaults: true # framework basics auto-installed
15
+ input:
16
+ - prompt_injection_basic
17
+ output:
18
+ - pii_redact_basic
19
+ tool_gates:
20
+ - capability_check
21
+ - allowlist:
22
+ allowed: ["web_search", "calculator"]
23
+ guardrail_policy:
24
+ on_input_violation: block
25
+ on_output_violation: redact
26
+ on_tool_violation: block
27
+ fail_open: false
28
+ ```
29
+
30
+ ## Step by step
31
+
32
+ 1. **Start with the basics.** `prompt_injection_basic` +
33
+ `pii_redact_basic` + `capability_check` cover the obvious
34
+ cases out of the box; they ship with the framework.
35
+ 2. **Add an allowlist** if your tools include anything
36
+ `destructive`. `capability_check` already denies destructive
37
+ tools by default; `allowlist` is a tighter second layer.
38
+ 3. **Pick a policy.** `block` is the safe default for input and
39
+ tool violations; `redact` for outputs lets the run complete
40
+ with PII stripped. `fail_open: false` (the default) treats
41
+ validator exceptions as failures.
42
+ 4. **Add vendor modules** when basics aren't enough. `presidio`
43
+ for richer PII, `llmguard` for richer prompt-injection,
44
+ `nemo` for programmable Colang rails, `llamaguard` for the
45
+ Llama Guard 3 classifier. Each is a separate pip install.
46
+ 5. **Audit decisions.** Every validator call emits an
47
+ `agentforge.audit` log record and appends to
48
+ `RunResult.guardrail_events`. Configure your log pipeline to
49
+ stream the audit logger to a security store.
50
+
51
+ ## Variations
52
+
53
+ - **Custom validator.** Subclass `InputValidator` /
54
+ `OutputValidator` / `ToolCallGate` from
55
+ `agentforge_core.contracts.guardrails`, register with
56
+ `@register("guardrails.input", "my-name")`.
57
+ - **Score-only mode** — Presidio + LLM Guard support a
58
+ `score-only` action that reports without modifying content.
59
+ Useful for triage dashboards.
60
+ - **Conformance test custom validators** with
61
+ `run_input_validator_conformance` / `run_output_validator_
62
+ conformance` / `run_tool_gate_conformance` from
63
+ `agentforge.testing`.
64
+
65
+ ## Troubleshooting
66
+
67
+ | Symptom | Cause | Fix |
68
+ |---|---|---|
69
+ | `GuardrailViolation` at startup | input flagged | inspect `RunResult.guardrail_events`; relax to `warn` if false-positive |
70
+ | PII still in output | regex basic doesn't catch your case | install `agentforge-guard-presidio` for richer detection |
71
+ | Destructive tool still ran | `capability_check` was disabled in config | re-enable; ensure `Tool.capabilities` includes `"destructive"` |
72
+ | Tests fail with `GuardrailViolation` | tests use prompts that look like injection | mock the validator in tests, or rephrase the test prompt |
73
+
74
+ ## Related
75
+
76
+ - Runbook 12 — Add observability (audit stream)
77
+ - Runbook 14 — Deploy your agent (policy hardening)
78
+ - Feature spec: `docs/features/feat-018-safety-and-security-guardrails.md`
79
+
80
+ <!-- agentforge:end-managed -->
81
+
82
+ <!-- agentforge:custom -->
83
+ <!-- agentforge:end-custom -->
@@ -0,0 +1,77 @@
1
+ # 12 — Add observability
2
+
3
+ > **Goal:** stream structured logs + distributed traces from
4
+ > every agent run to your APM stack.
5
+ > **Time:** ~15 minutes.
6
+ > **Prereqs:** runbook 01.
7
+
8
+ ## TL;DR
9
+
10
+ ```yaml
11
+ # agentforge.yaml
12
+ logging:
13
+ format: json
14
+ run_id_filter: true
15
+ modules:
16
+ observability:
17
+ - name: otel
18
+ config:
19
+ endpoint: "${OTEL_EXPORTER_OTLP_ENDPOINT}"
20
+ service_name: "{{ project_slug }}"
21
+ ```
22
+
23
+ ```bash
24
+ agentforge add module otel
25
+ ```
26
+
27
+ ## Step by step
28
+
29
+ 1. **Turn on JSON logging.** `logging.format: json` swaps the
30
+ default text formatter for `JSONFormatter`; every log line
31
+ becomes one JSON object suitable for piping into a log
32
+ aggregator.
33
+ 2. **Enable run_id propagation.** `run_id_filter: true`
34
+ installs a logging filter that attaches the active run's
35
+ `run_id` to every record under that run's context. Cross-
36
+ reference runs across components.
37
+ 3. **Install OTel.** `agentforge add module otel` adds
38
+ `agentforge-otel`; the framework's root span (`agent.run`)
39
+ then becomes the parent of every strategy / LLM / tool span.
40
+ 4. **Point at your collector.** OTLP/gRPC by default; set
41
+ `OTEL_EXPORTER_OTLP_ENDPOINT` (or hard-code in the YAML).
42
+ Service name = project slug by default.
43
+ 5. **Custom hooks.** Implement `on_step(step)` / `on_finish(
44
+ result)` callables and pass them to `Agent(on_step=...,
45
+ on_finish=...)` for bespoke metrics; multiple hooks fan out
46
+ in parallel.
47
+
48
+ ## Variations
49
+
50
+ - **Custom log channels.** Audit decisions go to
51
+ `agentforge.audit`; route them to a security store separately
52
+ from app logs.
53
+ - **Vendor backends** — Langfuse / Phoenix / Evidently / StatsD
54
+ modules each wrap their own SDK behind the same hook
55
+ contract. Add via `agentforge add module <name>`.
56
+ - **Cost dashboards.** `RunResult.cost_usd` + `eval_scores` are
57
+ cheap series for daily cost-vs-quality charts.
58
+
59
+ ## Troubleshooting
60
+
61
+ | Symptom | Cause | Fix |
62
+ |---|---|---|
63
+ | No spans in OTel UI | exporter endpoint wrong | check `agentforge config show --resolved` then curl the OTLP endpoint |
64
+ | Run id missing from logs | run_id_filter disabled | re-enable in YAML; restart the process |
65
+ | Hook breaks the run | exceptions in hooks default to log-and-continue | check the hook's error log; framework isolates failures |
66
+ | Spans missing inside strategies | older `agentforge-otel`; iteration spans land in 0.2+ | upgrade the module |
67
+
68
+ ## Related
69
+
70
+ - Runbook 11 — Add safety guardrails (audit stream)
71
+ - Runbook 14 — Deploy your agent
72
+ - Feature spec: `docs/features/feat-009-observability.md`
73
+
74
+ <!-- agentforge:end-managed -->
75
+
76
+ <!-- agentforge:custom -->
77
+ <!-- agentforge:end-custom -->
@@ -0,0 +1,91 @@
1
+ # 13 — Configure multi-provider
2
+
3
+ > **Goal:** run different model classes for reasoning, judging,
4
+ > and embedding without rewriting your agent.
5
+ > **Time:** ~10 minutes.
6
+ > **Prereqs:** runbook 01.
7
+
8
+ ## TL;DR
9
+
10
+ ```yaml
11
+ # agentforge.yaml
12
+ providers:
13
+ default:
14
+ type: anthropic # native Anthropic API
15
+ model: claude-sonnet-4-7
16
+ judge:
17
+ type: anthropic
18
+ model: claude-haiku-4-5 # cheaper judge
19
+ embed:
20
+ type: voyage
21
+ model: voyage-3-large
22
+ agent:
23
+ model: anthropic:claude-sonnet-4-7
24
+ modules:
25
+ evaluators:
26
+ - name: faithfulness
27
+ config:
28
+ judge_provider: judge
29
+ ```
30
+
31
+ **Available provider drivers (v0.2):**
32
+
33
+ | `type:` | Package | Capabilities |
34
+ |---|---|---|
35
+ | `bedrock` | `agentforge-bedrock` | tools, json_mode, caching, thinking, streaming |
36
+ | `anthropic` | `agentforge-anthropic` | tools, json_mode, caching, thinking, streaming |
37
+ | `openai` | `agentforge-openai` | tools, json_mode, streaming, vision (gpt-4o*) |
38
+ | `ollama` | `agentforge-ollama` | tools, streaming (local; zero cost) |
39
+ | `litellm` | `agentforge-litellm` | tools (router → 100+ backends) |
40
+ | `voyage` | `agentforge-voyage` | embedding-only; matryoshka |
41
+
42
+ ## Step by step
43
+
44
+ 1. **Name your providers** under the top-level `providers:` map.
45
+ `default` is the one `agent.model` falls back to; named
46
+ entries (`judge`, `embed`, `summariser`) can be addressed by
47
+ downstream modules.
48
+ 2. **Pick the reasoning model.** `agent.model` is the agent's
49
+ primary LLM. Use the strongest model you can afford.
50
+ 3. **Use a cheaper judge** for LLM-judge evaluators. Per
51
+ feat-006, judge graders take a `judge_provider` config that
52
+ resolves the named provider. Cheap haiku-class models bring
53
+ judge cost down 10x with marginal quality loss for boolean
54
+ evaluations.
55
+ 4. **Separate embedding from reasoning.** Vector indexing
56
+ typically benefits from a dedicated embedder
57
+ (`voyage-3`, `text-embedding-3-large`). Wire it into
58
+ `modules.retriever.embedding_provider`.
59
+ 5. **Per-module overrides.** Any module that takes an LLM (
60
+ guardrails / evaluators / etc.) can name a provider.
61
+
62
+ ## Variations
63
+
64
+ - **Fallback chain.** Use `agentforge_core.production.FallbackChain`
65
+ to wrap two providers; primary first, secondary on
66
+ `RateLimitError` / `ServiceError`.
67
+ - **Different providers per environment.** `agentforge.dev.yaml`
68
+ overlay points at a cheap dev model; `agentforge.prod.yaml`
69
+ swaps to the production tier. `AGENTFORGE_ENV=prod` selects.
70
+ - **Mock provider for tests.** Register `MockLLMClient` as a
71
+ named provider so config-driven tests reuse it.
72
+
73
+ ## Troubleshooting
74
+
75
+ | Symptom | Cause | Fix |
76
+ |---|---|---|
77
+ | `No LLM provider registered for X` | provider package not installed | `agentforge add module <X>` |
78
+ | Judge cost > reasoning cost | judge running on the same big model | name a cheaper judge provider |
79
+ | Embedder shape mismatch | mixed-dimension stores | pin embedding model + dimension in the vector store config |
80
+ | Run intermittently 5xx | provider outage | wrap with FallbackChain |
81
+
82
+ ## Related
83
+
84
+ - Runbook 10 — Add evaluators (judge_provider)
85
+ - Runbook 14 — Deploy your agent (environment overlays)
86
+ - Feature spec: `docs/features/feat-003-llm-provider-abstraction.md`
87
+
88
+ <!-- agentforge:end-managed -->
89
+
90
+ <!-- agentforge:custom -->
91
+ <!-- agentforge:end-custom -->
@@ -0,0 +1,70 @@
1
+ # 14 — Deploy your agent
2
+
3
+ > **Goal:** get the agent running somewhere durable (container,
4
+ > serverless, batch job) with proper secrets and observability.
5
+ > **Time:** ~30 minutes.
6
+ > **Prereqs:** runbooks 01, 08, 12.
7
+
8
+ ## TL;DR
9
+
10
+ ```dockerfile
11
+ FROM python:3.13-slim
12
+ WORKDIR /app
13
+ RUN pip install --no-cache-dir uv
14
+ COPY pyproject.toml uv.lock ./
15
+ COPY src/ ./src/
16
+ COPY agentforge.yaml ./
17
+ RUN uv sync --frozen
18
+ ENV AGENTFORGE_ENV=prod
19
+ CMD ["uv", "run", "agentforge", "run", "--task-file", "/in/task.txt", "--output-format", "json"]
20
+ ```
21
+
22
+ ## Step by step
23
+
24
+ 1. **Pin every dependency.** `uv.lock` must ship with the
25
+ image. `uv sync --frozen` enforces that.
26
+ 2. **Use environment overlays.** Ship `agentforge.yaml` +
27
+ `agentforge.prod.yaml`; set `AGENTFORGE_ENV=prod` in the
28
+ container. The framework merges the overlay automatically.
29
+ 3. **Mount secrets via env.** `${AWS_ACCESS_KEY_ID}` etc. in
30
+ the YAML resolve from the container's env. Never bake
31
+ secrets into the image.
32
+ 4. **Provision the memory store.** If using Postgres, run
33
+ `agentforge db migrate` as a pre-deploy step (helm hook,
34
+ k8s Job, deployment script).
35
+ 5. **Configure observability** — export `OTEL_EXPORTER_OTLP_
36
+ ENDPOINT`, `OTEL_RESOURCE_ATTRIBUTES=service.name=...`.
37
+ 6. **Health probe.** `agentforge health --output-format json`
38
+ exits 0 when config + modules + backends are all OK; perfect
39
+ for k8s readiness probes.
40
+
41
+ ## Variations
42
+
43
+ - **Serverless.** Same image, different entrypoint. Lambda /
44
+ Cloud Run trigger calls `agentforge run` with the task from
45
+ the event.
46
+ - **Batch worker.** Loop over a queue; reuse the Agent across
47
+ tasks. `Agent` is thread-safe; each `run` creates fresh
48
+ per-run state.
49
+ - **Multi-tenant.** One Agent per tenant; route requests by
50
+ `project` / `agent` claim namespace.
51
+
52
+ ## Troubleshooting
53
+
54
+ | Symptom | Cause | Fix |
55
+ |---|---|---|
56
+ | Container exits 2 on start | config invalid in the prod overlay | check `agentforge config validate --env prod` locally |
57
+ | `connection refused` on DB | network policy blocking | mount the secret AND open egress |
58
+ | OTel spans not appearing | service.name not set | export `OTEL_RESOURCE_ATTRIBUTES=service.name=<your-agent>` |
59
+ | Probe fails intermittently | cold-start LLM auth | bump probe initial delay; cache provider client across requests |
60
+
61
+ ## Related
62
+
63
+ - Runbook 08 — Add memory (DSN secrets, migration)
64
+ - Runbook 12 — Add observability
65
+ - Runbook 15 — Upgrade your agent (release process)
66
+
67
+ <!-- agentforge:end-managed -->
68
+
69
+ <!-- agentforge:custom -->
70
+ <!-- agentforge:end-custom -->
@@ -0,0 +1,67 @@
1
+ # 15 — Upgrade your agent
2
+
3
+ > **Goal:** pull the latest framework changes into this project
4
+ > without losing your customisations.
5
+ > **Time:** ~15 minutes.
6
+ > **Prereqs:** runbook 01.
7
+
8
+ ## TL;DR
9
+
10
+ ```bash
11
+ agentforge upgrade --dry-run # preview
12
+ agentforge upgrade # apply
13
+ agentforge status # any drift?
14
+ pytest -q
15
+ ```
16
+
17
+ ## Step by step
18
+
19
+ 1. **Read the framework's CHANGELOG.** Open
20
+ `docs/features/README.md` from the framework repo (or the
21
+ release notes) and skim what shipped between your version
22
+ and current.
23
+ 2. **Stage clean.** Commit any uncommitted work first.
24
+ `agentforge upgrade` is a three-way merge — easier to
25
+ resolve from a clean tree.
26
+ 3. **Dry-run.** `agentforge upgrade --dry-run` prints the diff
27
+ without writing. Use to scope the review.
28
+ 4. **Apply.** `agentforge upgrade` runs Copier's `run_update`,
29
+ merging managed files against the recorded template
30
+ version. Custom sections of three-section docs are
31
+ preserved automatically; non-managed code is left alone.
32
+ 5. **Resolve conflicts.** Copier surfaces conflicts in `.rej`
33
+ files. Edit by hand or `agentforge fork <path>` to claim
34
+ the file outright (future upgrades skip it).
35
+ 6. **Verify.** `agentforge status` should show no `DRIFTED`
36
+ files; `pytest -q` should pass.
37
+
38
+ ## Variations
39
+
40
+ - **Fork a file.** `agentforge fork src/myagent/agent_runtime.py`
41
+ strips the marker and flips the lock entry to `forked: true`.
42
+ Future upgrades skip it.
43
+ - **Unfork.** `agentforge unfork <path>` re-prepends the marker;
44
+ next upgrade re-pulls framework content (lossy).
45
+ - **Pin a target ref.** `agentforge upgrade --to <ref>` points
46
+ at a specific template ref instead of the latest. Useful for
47
+ staged rollouts.
48
+
49
+ ## Troubleshooting
50
+
51
+ | Symptom | Cause | Fix |
52
+ |---|---|---|
53
+ | `No .agentforge-state/answers.yml` | this directory wasn't scaffolded by `agentforge new` | upgrade only works on scaffolded projects |
54
+ | `.rej` conflict file | three-way merge couldn't auto-resolve | edit by hand; the `.rej` carries the framework's preferred shape |
55
+ | Custom section in runbook overwritten | edit went above the `<!-- agentforge:end-managed -->` marker | move custom content below the marker, restore from git |
56
+ | DB schema out of date | driver bumped its schema | `agentforge db backup` → `agentforge db migrate` → `agentforge db restore` |
57
+
58
+ ## Related
59
+
60
+ - Runbook 08 — Add memory (db migrate during upgrade)
61
+ - Runbook 14 — Deploy your agent (release process)
62
+ - Feature spec: `docs/features/feat-011-scaffolding-and-upgrade.md`
63
+
64
+ <!-- agentforge:end-managed -->
65
+
66
+ <!-- agentforge:custom -->
67
+ <!-- agentforge:end-custom -->
@@ -0,0 +1,81 @@
1
+ # 16 — Configuration reference
2
+
3
+ > **Goal:** find the canonical shape of every `agentforge.yaml`
4
+ > field without re-reading source.
5
+ > **Time:** ~5 minutes (lookup).
6
+ > **Prereqs:** none.
7
+
8
+ ## TL;DR
9
+
10
+ ```bash
11
+ agentforge config schema | less # print the full JSON schema
12
+ agentforge config show --resolved # see what your YAML actually parsed to
13
+ agentforge config validate # fast-fail on bad keys
14
+ ```
15
+
16
+ ## Step by step
17
+
18
+ 1. **Schema is the truth.** `agentforge config schema` prints
19
+ the Pydantic-derived JSON schema for `AgentForgeConfig`. No
20
+ guessing.
21
+ 2. **Resolved view.** `agentforge config show --resolved` prints
22
+ the parsed config with `${ENV_VAR}` interpolation expanded,
23
+ env overlay merged, and CLI overrides applied. Source-of-
24
+ truth for "what will the agent actually run with?"
25
+ 3. **Validate** before commit. `agentforge config validate` is
26
+ the same parse the runtime does; exit code 2 means the YAML
27
+ has unknown keys, bad types, or invalid env references.
28
+
29
+ ## Top-level sections
30
+
31
+ | Section | Purpose |
32
+ |---|---|
33
+ | `agent` | name, model, strategy, system prompt, tools, budget, max_iterations, llm_options |
34
+ | `modules` | memory / graph / retriever / evaluators / observability / tools / protocols / guardrails |
35
+ | `providers` | named LLM clients (default + judge + embed + custom) |
36
+ | `logging` | level, run_id_filter, format (text\|json) |
37
+ | `output` | finding variant defaults, renderer choice, thresholds |
38
+ | `guardrail_policy` | on_input / on_output / on_tool violation actions, audit_channel, fail_open |
39
+
40
+ ## Environment + override order
41
+
42
+ CLI flags > `--override` flags > `agentforge.<env>.yaml` overlay >
43
+ `agentforge.yaml` > defaults.
44
+
45
+ ```bash
46
+ agentforge run \
47
+ --env prod \
48
+ --override agent.budget.usd=20 \
49
+ --override providers.default.model=claude-haiku-4-5 \
50
+ "your task"
51
+ ```
52
+
53
+ ## Variations
54
+
55
+ - **Schema export** — `agentforge config schema > schema.json`
56
+ feeds IDE YAML LSPs (vs-code-yaml etc.) for autocomplete.
57
+ - **Per-module schemas** — installed modules contribute schemas
58
+ to `modules.<section>.config`. `agentforge config validate
59
+ --strict` enforces.
60
+ - **`AGENTFORGE_CONFIG`** + `AGENTFORGE_ENV` + `AGENTFORGE_LOG_
61
+ LEVEL` env vars are the three shortcuts that don't require
62
+ flags.
63
+
64
+ ## Troubleshooting
65
+
66
+ | Symptom | Cause | Fix |
67
+ |---|---|---|
68
+ | `unknown field` on a key you expected to be valid | typo or post-major rename | check the schema; spec changes are listed in CHANGELOG |
69
+ | `${VAR}` not resolving | env var unset | `agentforge config show --resolved` reports the missing one |
70
+ | Override not taking effect | wrong dotted path | overrides are dotted: `agent.budget.usd=10`, not `budget.usd` |
71
+ | `fail_open: true` slipped into prod | dev overlay leaked | rotate env-overlay names; only prod overlay shipped to prod |
72
+
73
+ ## Related
74
+
75
+ - Every other runbook (they all link back here)
76
+ - Feature spec: `docs/features/feat-012-configuration-system.md`
77
+
78
+ <!-- agentforge:end-managed -->
79
+
80
+ <!-- agentforge:custom -->
81
+ <!-- agentforge:end-custom -->
@@ -0,0 +1,78 @@
1
+ # 17 — Add a reranker
2
+
3
+ > **Goal:** improve retrieval precision by re-scoring the top-k
4
+ > candidates a vector store returned, then keeping the best.
5
+ > **Time:** ~10 minutes.
6
+ > **Prereqs:** runbook 08 (retrieval already wired).
7
+
8
+ ## TL;DR
9
+
10
+ ```yaml
11
+ # agentforge.yaml
12
+ retrieval:
13
+ embedder:
14
+ driver: voyage
15
+ config: {model: voyage-3-large}
16
+ vector_store:
17
+ driver: postgres
18
+ config: {dsn: $POSTGRES_DSN, table: docs}
19
+ reranker:
20
+ name: cohere # or: sentence_transformers / voyage / mixedbread
21
+ config:
22
+ api_key: $COHERE_API_KEY
23
+ model: rerank-english-v3.0
24
+ top_k: 4 # keep the top 4 after re-scoring
25
+ ```
26
+
27
+ ## Step by step
28
+
29
+ 1. **Pick a reranker driver.** Built-in choices:
30
+ - `sentence_transformers` — local cross-encoder; no API key, slower.
31
+ - `cohere` — managed; fast; needs `COHERE_API_KEY`.
32
+ - `voyage` — managed; high quality; needs `VOYAGE_API_KEY`.
33
+ - `mixedbread` — managed; needs `MIXEDBREAD_API_KEY`.
34
+ 2. **Install the matching package.**
35
+ `agentforge add module reranker-cohere` (or `-voyage`,
36
+ `-mixedbread`, `-sentence-transformers`).
37
+ 3. **Drop the `reranker:` block** into `retrieval:`. The
38
+ `Retriever` looks up the driver via the `agentforge.rerankers`
39
+ entry-point category and slots it after the vector / hybrid
40
+ search stage.
41
+ 4. **Set `top_k`.** The reranker runs over the vector store's
42
+ `top_k_pre` candidates and returns `top_k`. Common settings:
43
+ `top_k_pre=20, top_k=4` for cost-aware, `top_k_pre=50,
44
+ top_k=8` for quality-aware.
45
+ 5. **Test it.** `await retriever.retrieve("query")` returns
46
+ `VectorMatch` rows already in reranked order — the
47
+ `score` field reflects the reranker's score, not the
48
+ original vector similarity.
49
+
50
+ ## Variations
51
+
52
+ - **Two-stage** — keep an embedding-based fast path with a
53
+ reranker only on cold queries. Set
54
+ `retrieval.reranker.always: false`.
55
+ - **Custom reranker** — implement the `Reranker` ABC in
56
+ `agentforge_core.contracts.reranker` and register it via the
57
+ `agentforge.rerankers` entry-point in your module's
58
+ `pyproject.toml`.
59
+
60
+ ## Troubleshooting
61
+
62
+ | Symptom | Cause | Fix |
63
+ |---|---|---|
64
+ | `No reranker registered for X` | package not installed | `agentforge add module reranker-X` |
65
+ | Latency 2-3x higher | local cross-encoder on CPU | switch to managed (Cohere / Voyage) |
66
+ | Top result is wrong | reranker model mismatch with corpus language | pick the matching `rerank-multilingual-v3.0` or similar |
67
+ | Cost spike | reranker called per request, hot path | cache by query hash or move reranker to async batch path |
68
+
69
+ ## Related
70
+
71
+ - Runbook 08 — Add memory + retrieval
72
+ - Runbook 18 — Add hybrid search
73
+ - Feature spec: `docs/features/feat-021-reranker.md`
74
+
75
+ <!-- agentforge:end-managed -->
76
+
77
+ <!-- agentforge:custom -->
78
+ <!-- agentforge:end-custom -->