agentforge-py 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentforge/__init__.py +114 -0
- agentforge/_testing/__init__.py +19 -0
- agentforge/_testing/fake_llm.py +126 -0
- agentforge/_testing/fake_tool.py +122 -0
- agentforge/_tools/__init__.py +14 -0
- agentforge/_tools/calculator.py +102 -0
- agentforge/_tools/decorator.py +300 -0
- agentforge/_tools/file_read.py +112 -0
- agentforge/_tools/shell.py +134 -0
- agentforge/_tools/web_search.py +207 -0
- agentforge/agent.py +817 -0
- agentforge/auth.py +42 -0
- agentforge/cli/__init__.py +18 -0
- agentforge/cli/_build.py +323 -0
- agentforge/cli/_scaffold_state.py +250 -0
- agentforge/cli/_shared_scaffold.py +174 -0
- agentforge/cli/config_cmd.py +174 -0
- agentforge/cli/db_cmd.py +262 -0
- agentforge/cli/debug_cmd.py +168 -0
- agentforge/cli/docs_cmd.py +217 -0
- agentforge/cli/eval_cmd.py +181 -0
- agentforge/cli/health_cmd.py +139 -0
- agentforge/cli/list_modules.py +85 -0
- agentforge/cli/main.py +81 -0
- agentforge/cli/manifest_apply.py +368 -0
- agentforge/cli/module_cmd.py +247 -0
- agentforge/cli/new_cmd.py +171 -0
- agentforge/cli/run_cmd.py +234 -0
- agentforge/cli/upgrade_cmd.py +230 -0
- agentforge/config/__init__.py +45 -0
- agentforge/eval/__init__.py +18 -0
- agentforge/eval/consistency.py +107 -0
- agentforge/eval/coverage.py +100 -0
- agentforge/eval/format_compliance.py +107 -0
- agentforge/eval/regression.py +143 -0
- agentforge/findings.py +166 -0
- agentforge/guardrails/__init__.py +32 -0
- agentforge/guardrails/allowlist.py +49 -0
- agentforge/guardrails/capability_check.py +58 -0
- agentforge/guardrails/engine.py +289 -0
- agentforge/guardrails/pii_redact_basic.py +61 -0
- agentforge/guardrails/prompt_injection_basic.py +90 -0
- agentforge/memory/__init__.py +16 -0
- agentforge/memory/in_memory.py +130 -0
- agentforge/memory/in_memory_graph.py +262 -0
- agentforge/memory/in_memory_vector.py +167 -0
- agentforge/pipeline/__init__.py +26 -0
- agentforge/pipeline/engine.py +189 -0
- agentforge/pipeline/errors.py +19 -0
- agentforge/pipeline/tool.py +93 -0
- agentforge/py.typed +0 -0
- agentforge/recording.py +189 -0
- agentforge/renderers/__init__.py +28 -0
- agentforge/renderers/_defaults.py +32 -0
- agentforge/renderers/markdown.py +44 -0
- agentforge/renderers/patch_applier.py +46 -0
- agentforge/renderers/registry.py +108 -0
- agentforge/renderers/scorecard.py +59 -0
- agentforge/renderers/span_table.py +71 -0
- agentforge/replay.py +260 -0
- agentforge/resolver_register.py +41 -0
- agentforge/retrieval.py +410 -0
- agentforge/runtime.py +63 -0
- agentforge/strategies/__init__.py +27 -0
- agentforge/strategies/_base.py +280 -0
- agentforge/strategies/_plan.py +93 -0
- agentforge/strategies/multi_agent.py +541 -0
- agentforge/strategies/plan_execute.py +506 -0
- agentforge/strategies/react.py +237 -0
- agentforge/strategies/tot.py +472 -0
- agentforge/templates/_shared/.cursorrules +12 -0
- agentforge/templates/_shared/.github/copilot-instructions.md +13 -0
- agentforge/templates/_shared/.gitkeep +0 -0
- agentforge/templates/_shared/AGENTS.md.tmpl +123 -0
- agentforge/templates/_shared/CLAUDE.md +13 -0
- agentforge/templates/_shared/docs/runbooks/01-set-up-new-agent.md.tmpl +67 -0
- agentforge/templates/_shared/docs/runbooks/02-add-a-tool.md +67 -0
- agentforge/templates/_shared/docs/runbooks/03-add-a-pipeline-task.md +69 -0
- agentforge/templates/_shared/docs/runbooks/04-pick-reasoning-strategy.md +67 -0
- agentforge/templates/_shared/docs/runbooks/05-write-prompts.md +75 -0
- agentforge/templates/_shared/docs/runbooks/06-test-your-agent.md +75 -0
- agentforge/templates/_shared/docs/runbooks/07-debug-a-run.md +70 -0
- agentforge/templates/_shared/docs/runbooks/08-add-memory.md +75 -0
- agentforge/templates/_shared/docs/runbooks/09-add-mcp.md +78 -0
- agentforge/templates/_shared/docs/runbooks/10-add-evaluators.md +76 -0
- agentforge/templates/_shared/docs/runbooks/11-add-safety-guardrails.md +83 -0
- agentforge/templates/_shared/docs/runbooks/12-add-observability.md +77 -0
- agentforge/templates/_shared/docs/runbooks/13-configure-multi-provider.md +91 -0
- agentforge/templates/_shared/docs/runbooks/14-deploy-your-agent.md +70 -0
- agentforge/templates/_shared/docs/runbooks/15-upgrade-your-agent.md +67 -0
- agentforge/templates/_shared/docs/runbooks/16-configuration-reference.md +81 -0
- agentforge/templates/_shared/docs/runbooks/17-add-reranker.md +78 -0
- agentforge/templates/_shared/docs/runbooks/18-add-hybrid-search.md +78 -0
- agentforge/templates/_shared/docs/runbooks/19-add-graphrag.md +83 -0
- agentforge/templates/_shared/docs/runbooks/20-apply-schema-migrations.md +92 -0
- agentforge/templates/_shared/docs/runbooks/21-use-streaming-guardrails.md +82 -0
- agentforge/templates/_shared/docs/runbooks/README.md.tmpl +68 -0
- agentforge/templates/code-reviewer/.env.example +8 -0
- agentforge/templates/code-reviewer/.gitignore +7 -0
- agentforge/templates/code-reviewer/README.md +12 -0
- agentforge/templates/code-reviewer/agentforge.yaml +23 -0
- agentforge/templates/code-reviewer/copier.yml +34 -0
- agentforge/templates/code-reviewer/pyproject.toml +18 -0
- agentforge/templates/code-reviewer/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
- agentforge/templates/code-reviewer/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
- agentforge/templates/docs-qa/.env.example +8 -0
- agentforge/templates/docs-qa/.gitignore +7 -0
- agentforge/templates/docs-qa/README.md +14 -0
- agentforge/templates/docs-qa/agentforge.yaml +19 -0
- agentforge/templates/docs-qa/copier.yml +31 -0
- agentforge/templates/docs-qa/pyproject.toml +18 -0
- agentforge/templates/docs-qa/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
- agentforge/templates/docs-qa/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
- agentforge/templates/minimal/.env.example +11 -0
- agentforge/templates/minimal/.gitignore +10 -0
- agentforge/templates/minimal/README.md +28 -0
- agentforge/templates/minimal/agentforge.yaml +10 -0
- agentforge/templates/minimal/copier.yml +52 -0
- agentforge/templates/minimal/pyproject.toml +18 -0
- agentforge/templates/minimal/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
- agentforge/templates/minimal/src/{{project_slug.replace('-', '_')}}/main.py +34 -0
- agentforge/templates/patch-bot/.env.example +8 -0
- agentforge/templates/patch-bot/.gitignore +7 -0
- agentforge/templates/patch-bot/README.md +13 -0
- agentforge/templates/patch-bot/agentforge.yaml +15 -0
- agentforge/templates/patch-bot/copier.yml +31 -0
- agentforge/templates/patch-bot/pyproject.toml +18 -0
- agentforge/templates/patch-bot/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
- agentforge/templates/patch-bot/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
- agentforge/templates/research/.env.example +8 -0
- agentforge/templates/research/.gitignore +7 -0
- agentforge/templates/research/README.md +14 -0
- agentforge/templates/research/agentforge.yaml +17 -0
- agentforge/templates/research/copier.yml +31 -0
- agentforge/templates/research/pyproject.toml +18 -0
- agentforge/templates/research/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
- agentforge/templates/research/src/{{project_slug.replace('-', '_')}}/main.py +31 -0
- agentforge/templates/triage/.env.example +8 -0
- agentforge/templates/triage/.gitignore +7 -0
- agentforge/templates/triage/README.md +14 -0
- agentforge/templates/triage/agentforge.yaml +25 -0
- agentforge/templates/triage/copier.yml +31 -0
- agentforge/templates/triage/pyproject.toml +18 -0
- agentforge/templates/triage/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
- agentforge/templates/triage/src/{{project_slug.replace('-', '_')}}/main.py +30 -0
- agentforge/testing/__init__.py +69 -0
- agentforge/testing/conformance.py +40 -0
- agentforge/testing/factory.py +89 -0
- agentforge/testing/fixtures.py +42 -0
- agentforge/testing/llm.py +235 -0
- agentforge/testing/recording.py +177 -0
- agentforge/tools/__init__.py +41 -0
- agentforge_py-0.2.1.dist-info/METADATA +158 -0
- agentforge_py-0.2.1.dist-info/RECORD +157 -0
- agentforge_py-0.2.1.dist-info/WHEEL +4 -0
- agentforge_py-0.2.1.dist-info/entry_points.txt +2 -0
- agentforge_py-0.2.1.dist-info/licenses/LICENSE +202 -0
|
@@ -0,0 +1,472 @@
|
|
|
1
|
+
"""`TreeOfThoughts` — beam-search reasoning with scored branches.
|
|
2
|
+
|
|
3
|
+
Per feat-002 §4.3:
|
|
4
|
+
|
|
5
|
+
GENERATE: one LLM call returns `branch_factor` candidate thoughts
|
|
6
|
+
at the current depth.
|
|
7
|
+
SCORE: each thought is scored 0..1 by the LLM ("self") or by
|
|
8
|
+
a cheap judge model ("judge").
|
|
9
|
+
PRUNE: keep thoughts above `score_threshold`. If `beam_width`
|
|
10
|
+
is set, additionally keep only the top-K.
|
|
11
|
+
EXPAND: recurse on survivors to depth=`depth`.
|
|
12
|
+
SYNTHESIZE: best leaf → final answer.
|
|
13
|
+
|
|
14
|
+
Modern: structured Pydantic schemas for branch generation and
|
|
15
|
+
scoring (no free-form parsing); budget-aware graceful degradation
|
|
16
|
+
(if the next level's estimated cost would exceed the remaining
|
|
17
|
+
budget, the strategy synthesises with what it has rather than
|
|
18
|
+
crashing).
|
|
19
|
+
|
|
20
|
+
`scorer="judge"` in v0.1 falls back to "self" — a separate cheap
|
|
21
|
+
judge model is introduced in feat-006 (`agentforge-eval-geval`).
|
|
22
|
+
The constructor accepts the value so the API surface is locked at
|
|
23
|
+
v0.1; the implementation upgrades transparently when feat-006 lands.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import json
|
|
29
|
+
import logging
|
|
30
|
+
from collections.abc import AsyncIterator
|
|
31
|
+
from dataclasses import dataclass, field
|
|
32
|
+
from typing import Literal
|
|
33
|
+
from uuid import uuid4
|
|
34
|
+
|
|
35
|
+
from agentforge_core.observability.tracing import get_tracer
|
|
36
|
+
from agentforge_core.values.chat import StreamingEvent
|
|
37
|
+
from agentforge_core.values.messages import Message
|
|
38
|
+
from agentforge_core.values.state import AgentState
|
|
39
|
+
from pydantic import BaseModel, ConfigDict, Field, ValidationError
|
|
40
|
+
|
|
41
|
+
from agentforge.resolver_register import register_strategy
|
|
42
|
+
from agentforge.runtime import RuntimeContext
|
|
43
|
+
from agentforge.strategies._base import StrategyBase, _events_for_new_steps, get_runtime
|
|
44
|
+
|
|
45
|
+
log = logging.getLogger(__name__)
|
|
46
|
+
|
|
47
|
+
ScorerKind = Literal["self", "judge"]
|
|
48
|
+
|
|
49
|
+
GENERATE_SYSTEM_PROMPT = (
|
|
50
|
+
"You are exploring multiple plausible reasoning paths for a task. "
|
|
51
|
+
"Generate {branch_factor} distinct candidate thoughts. Each thought "
|
|
52
|
+
"should be a different angle, approach, or partial solution. Return "
|
|
53
|
+
"ONLY a JSON object matching this schema (no other text):\n\n"
|
|
54
|
+
' {{"thoughts": [{{"id": "<unique id>", "content": "<thought text>"}}, ...]}}\n\n'
|
|
55
|
+
"Provide exactly {branch_factor} thoughts."
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
SCORE_SYSTEM_PROMPT = (
|
|
59
|
+
"Score each of the candidate thoughts below from 0.0 (irrelevant / "
|
|
60
|
+
"wrong) to 1.0 (excellent / correct) for how well it advances the "
|
|
61
|
+
"user's task. Return ONLY a JSON object matching this schema (no "
|
|
62
|
+
"other text):\n\n"
|
|
63
|
+
' {"scores": [{"branch_id": "<id>", "score": <0..1>, "reasoning": "<why>"}, ...]}'
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
SYNTHESIZE_SYSTEM_PROMPT = (
|
|
67
|
+
"You have explored multiple reasoning paths and selected the best "
|
|
68
|
+
"one. Produce the final answer based on the path's content; do not "
|
|
69
|
+
"introduce new claims unsupported by the path."
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# ----------------------------------------------------------------------
|
|
74
|
+
# LLM I/O schemas
|
|
75
|
+
# ----------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class _Thought(BaseModel):
|
|
79
|
+
model_config = ConfigDict(frozen=True, strict=True)
|
|
80
|
+
id: str = Field(min_length=1)
|
|
81
|
+
content: str = Field(min_length=1)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class _ThoughtList(BaseModel):
|
|
85
|
+
model_config = ConfigDict(frozen=True, strict=True)
|
|
86
|
+
thoughts: list[_Thought] = Field(min_length=1)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class _BranchScore(BaseModel):
|
|
90
|
+
model_config = ConfigDict(frozen=True, strict=True)
|
|
91
|
+
branch_id: str
|
|
92
|
+
score: float = Field(ge=0.0, le=1.0)
|
|
93
|
+
reasoning: str = ""
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class _BranchScoreList(BaseModel):
|
|
97
|
+
model_config = ConfigDict(frozen=True, strict=True)
|
|
98
|
+
scores: list[_BranchScore]
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ----------------------------------------------------------------------
|
|
102
|
+
# Internal node + tree
|
|
103
|
+
# ----------------------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@dataclass(slots=True)
|
|
107
|
+
class _Node:
|
|
108
|
+
"""One thought in the search tree."""
|
|
109
|
+
|
|
110
|
+
id: str
|
|
111
|
+
parent_id: str | None
|
|
112
|
+
depth: int
|
|
113
|
+
content: str
|
|
114
|
+
score: float = 0.0
|
|
115
|
+
children: list[_Node] = field(default_factory=list)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _path_to_root(leaf: _Node, by_id: dict[str, _Node]) -> list[_Node]:
|
|
119
|
+
path: list[_Node] = []
|
|
120
|
+
cursor: _Node | None = leaf
|
|
121
|
+
while cursor is not None:
|
|
122
|
+
path.append(cursor)
|
|
123
|
+
cursor = by_id.get(cursor.parent_id) if cursor.parent_id else None
|
|
124
|
+
return list(reversed(path))
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# ----------------------------------------------------------------------
|
|
128
|
+
# Strategy
|
|
129
|
+
# ----------------------------------------------------------------------
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@register_strategy("tot")
|
|
133
|
+
class TreeOfThoughts(StrategyBase):
|
|
134
|
+
"""Beam-search reasoning over scored branches.
|
|
135
|
+
|
|
136
|
+
Per feat-002 §4.2 the constructor surface is locked at v0.1:
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
branch_factor: Number of candidate thoughts generated per
|
|
140
|
+
level. Default 3.
|
|
141
|
+
depth: Number of expansion levels (root + depth-1 expansions).
|
|
142
|
+
Default 2.
|
|
143
|
+
score_threshold: Minimum score for a branch to survive
|
|
144
|
+
pruning. Range [0, 1]. Default 0.5.
|
|
145
|
+
scorer: "self" uses the agent's primary LLM to score; "judge"
|
|
146
|
+
(deferred to feat-006) will use a cheap-judge model. Both
|
|
147
|
+
values currently behave identically.
|
|
148
|
+
beam_width: If set, keep at most this many of the highest-
|
|
149
|
+
scoring survivors per level. None = no top-K cap (only
|
|
150
|
+
score_threshold applies). Default None.
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
def __init__(
|
|
154
|
+
self,
|
|
155
|
+
*,
|
|
156
|
+
branch_factor: int = 3,
|
|
157
|
+
depth: int = 2,
|
|
158
|
+
score_threshold: float = 0.5,
|
|
159
|
+
scorer: ScorerKind = "self",
|
|
160
|
+
beam_width: int | None = None,
|
|
161
|
+
) -> None:
|
|
162
|
+
if branch_factor < 1:
|
|
163
|
+
raise ValueError("branch_factor must be >= 1")
|
|
164
|
+
if depth < 1:
|
|
165
|
+
raise ValueError("depth must be >= 1")
|
|
166
|
+
if not 0.0 <= score_threshold <= 1.0:
|
|
167
|
+
raise ValueError("score_threshold must be in [0, 1]")
|
|
168
|
+
if scorer not in ("self", "judge"):
|
|
169
|
+
raise ValueError(f"scorer must be 'self' or 'judge', got {scorer!r}")
|
|
170
|
+
if beam_width is not None and beam_width < 1:
|
|
171
|
+
raise ValueError("beam_width must be >= 1 when set")
|
|
172
|
+
self._branch_factor = branch_factor
|
|
173
|
+
self._depth = depth
|
|
174
|
+
self._score_threshold = score_threshold
|
|
175
|
+
self._scorer: ScorerKind = scorer
|
|
176
|
+
self._beam_width = beam_width
|
|
177
|
+
|
|
178
|
+
async def run(self, state: AgentState) -> AgentState:
|
|
179
|
+
runtime = get_runtime(state)
|
|
180
|
+
by_id: dict[str, _Node] = {}
|
|
181
|
+
tracer = get_tracer()
|
|
182
|
+
|
|
183
|
+
# Root — start from the task itself; no LLM call yet.
|
|
184
|
+
root = _Node(id=str(uuid4()), parent_id=None, depth=0, content=state.task, score=1.0)
|
|
185
|
+
by_id[root.id] = root
|
|
186
|
+
|
|
187
|
+
survivors: list[_Node] = [root]
|
|
188
|
+
current_depth = 0
|
|
189
|
+
|
|
190
|
+
while current_depth < self._depth and survivors:
|
|
191
|
+
self._check_guardrails(state)
|
|
192
|
+
|
|
193
|
+
# Budget-aware graceful degradation: estimate the cost of
|
|
194
|
+
# the next level (branch + score per survivor); if it would
|
|
195
|
+
# exceed remaining budget, stop expanding and synthesise.
|
|
196
|
+
if not self._can_afford_next_level(runtime, len(survivors)):
|
|
197
|
+
log.warning(
|
|
198
|
+
"TreeOfThoughts: estimated next-level cost exceeds "
|
|
199
|
+
"remaining budget; synthesising with current best."
|
|
200
|
+
)
|
|
201
|
+
break
|
|
202
|
+
|
|
203
|
+
with tracer.start_as_current_span(
|
|
204
|
+
"strategy.iteration",
|
|
205
|
+
attributes={
|
|
206
|
+
"agentforge.iteration": current_depth,
|
|
207
|
+
"agentforge.strategy": "tot",
|
|
208
|
+
},
|
|
209
|
+
):
|
|
210
|
+
survivors = await self._iterate_depth(state, by_id, survivors, current_depth)
|
|
211
|
+
current_depth += 1
|
|
212
|
+
|
|
213
|
+
# Pick the best leaf overall — the best surviving node, or the
|
|
214
|
+
# root if no level survived pruning.
|
|
215
|
+
best = max(by_id.values(), key=lambda n: n.score) if by_id else root
|
|
216
|
+
path = _path_to_root(best, by_id)
|
|
217
|
+
path_text = "\n".join(f" depth={n.depth} score={n.score:.2f}: {n.content}" for n in path)
|
|
218
|
+
|
|
219
|
+
# SYNTHESIZE the final answer from the best path.
|
|
220
|
+
await self._call_llm(
|
|
221
|
+
state,
|
|
222
|
+
iteration=current_depth + 1,
|
|
223
|
+
system=SYNTHESIZE_SYSTEM_PROMPT,
|
|
224
|
+
messages=[
|
|
225
|
+
Message(role="user", content=state.task),
|
|
226
|
+
Message(
|
|
227
|
+
role="assistant",
|
|
228
|
+
content=f"Best path explored:\n{path_text}",
|
|
229
|
+
),
|
|
230
|
+
],
|
|
231
|
+
kind="synthesize",
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
return state
|
|
235
|
+
|
|
236
|
+
async def stream(self, state: AgentState) -> AsyncIterator[StreamingEvent]:
|
|
237
|
+
"""Per-depth streaming override (feat-002 v0.3 polish).
|
|
238
|
+
|
|
239
|
+
Mirrors :meth:`run` but yields a ``step`` `StreamingEvent`
|
|
240
|
+
for each ``branch`` step recorded inside ``_iterate_depth``
|
|
241
|
+
(flushed after the helper returns), plus the final
|
|
242
|
+
``synthesize`` step and the terminal ``done`` event.
|
|
243
|
+
"""
|
|
244
|
+
runtime = get_runtime(state)
|
|
245
|
+
by_id: dict[str, _Node] = {}
|
|
246
|
+
tracer = get_tracer()
|
|
247
|
+
|
|
248
|
+
root = _Node(id=str(uuid4()), parent_id=None, depth=0, content=state.task, score=1.0)
|
|
249
|
+
by_id[root.id] = root
|
|
250
|
+
|
|
251
|
+
survivors: list[_Node] = [root]
|
|
252
|
+
current_depth = 0
|
|
253
|
+
before = len(state.steps)
|
|
254
|
+
|
|
255
|
+
while current_depth < self._depth and survivors:
|
|
256
|
+
self._check_guardrails(state)
|
|
257
|
+
|
|
258
|
+
if not self._can_afford_next_level(runtime, len(survivors)):
|
|
259
|
+
log.warning(
|
|
260
|
+
"TreeOfThoughts: estimated next-level cost exceeds "
|
|
261
|
+
"remaining budget; synthesising with current best."
|
|
262
|
+
)
|
|
263
|
+
break
|
|
264
|
+
|
|
265
|
+
with tracer.start_as_current_span(
|
|
266
|
+
"strategy.iteration",
|
|
267
|
+
attributes={
|
|
268
|
+
"agentforge.iteration": current_depth,
|
|
269
|
+
"agentforge.strategy": "tot",
|
|
270
|
+
},
|
|
271
|
+
):
|
|
272
|
+
survivors = await self._iterate_depth(state, by_id, survivors, current_depth)
|
|
273
|
+
for ev in _events_for_new_steps(state.steps, before):
|
|
274
|
+
yield ev
|
|
275
|
+
before = len(state.steps)
|
|
276
|
+
current_depth += 1
|
|
277
|
+
|
|
278
|
+
best = max(by_id.values(), key=lambda n: n.score) if by_id else root
|
|
279
|
+
path = _path_to_root(best, by_id)
|
|
280
|
+
path_text = "\n".join(f" depth={n.depth} score={n.score:.2f}: {n.content}" for n in path)
|
|
281
|
+
|
|
282
|
+
await self._call_llm(
|
|
283
|
+
state,
|
|
284
|
+
iteration=current_depth + 1,
|
|
285
|
+
system=SYNTHESIZE_SYSTEM_PROMPT,
|
|
286
|
+
messages=[
|
|
287
|
+
Message(role="user", content=state.task),
|
|
288
|
+
Message(
|
|
289
|
+
role="assistant",
|
|
290
|
+
content=f"Best path explored:\n{path_text}",
|
|
291
|
+
),
|
|
292
|
+
],
|
|
293
|
+
kind="synthesize",
|
|
294
|
+
)
|
|
295
|
+
for ev in _events_for_new_steps(state.steps, before):
|
|
296
|
+
yield ev
|
|
297
|
+
|
|
298
|
+
yield StreamingEvent(
|
|
299
|
+
kind="done",
|
|
300
|
+
content={
|
|
301
|
+
"run_id": state.run_id,
|
|
302
|
+
"cost_usd": float(runtime.budget.spent_usd),
|
|
303
|
+
},
|
|
304
|
+
metadata={},
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# ------------------------------------------------------------------
|
|
308
|
+
# Phase helpers
|
|
309
|
+
# ------------------------------------------------------------------
|
|
310
|
+
|
|
311
|
+
async def _iterate_depth(
|
|
312
|
+
self,
|
|
313
|
+
state: AgentState,
|
|
314
|
+
by_id: dict[str, _Node],
|
|
315
|
+
survivors: list[_Node],
|
|
316
|
+
current_depth: int,
|
|
317
|
+
) -> list[_Node]:
|
|
318
|
+
"""Run one depth-level of generate/score/prune.
|
|
319
|
+
|
|
320
|
+
Returns the survivors of this level after threshold filtering
|
|
321
|
+
and (optional) global beam-width truncation. Records one
|
|
322
|
+
``branch`` step per generated child.
|
|
323
|
+
"""
|
|
324
|
+
new_survivors: list[_Node] = []
|
|
325
|
+
for parent in survivors:
|
|
326
|
+
self._check_guardrails(state)
|
|
327
|
+
|
|
328
|
+
# GENERATE candidates for this parent
|
|
329
|
+
candidates = await self._generate(state, parent, current_depth)
|
|
330
|
+
if not candidates:
|
|
331
|
+
continue
|
|
332
|
+
|
|
333
|
+
# SCORE candidates
|
|
334
|
+
scored = await self._score(state, candidates, current_depth)
|
|
335
|
+
|
|
336
|
+
# Build child nodes + record branch step
|
|
337
|
+
for thought, score in scored:
|
|
338
|
+
child = _Node(
|
|
339
|
+
id=thought.id,
|
|
340
|
+
parent_id=parent.id,
|
|
341
|
+
depth=current_depth + 1,
|
|
342
|
+
content=thought.content,
|
|
343
|
+
score=score,
|
|
344
|
+
)
|
|
345
|
+
parent.children.append(child)
|
|
346
|
+
by_id[child.id] = child
|
|
347
|
+
self._record_step(
|
|
348
|
+
state,
|
|
349
|
+
iteration=current_depth + 1,
|
|
350
|
+
kind="branch",
|
|
351
|
+
content={
|
|
352
|
+
"branch_id": child.id,
|
|
353
|
+
"parent_id": parent.id,
|
|
354
|
+
"score": score,
|
|
355
|
+
"thought": thought.content,
|
|
356
|
+
},
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
# PRUNE: above threshold; optionally top-K (beam_width)
|
|
360
|
+
kept = [by_id[t.id] for (t, s) in scored if s >= self._score_threshold]
|
|
361
|
+
kept.sort(key=lambda n: n.score, reverse=True)
|
|
362
|
+
if self._beam_width is not None:
|
|
363
|
+
kept = kept[: self._beam_width]
|
|
364
|
+
new_survivors.extend(kept)
|
|
365
|
+
|
|
366
|
+
# Across all parents at this level, also bound the global
|
|
367
|
+
# beam if set.
|
|
368
|
+
new_survivors.sort(key=lambda n: n.score, reverse=True)
|
|
369
|
+
if self._beam_width is not None:
|
|
370
|
+
new_survivors = new_survivors[: self._beam_width]
|
|
371
|
+
return new_survivors
|
|
372
|
+
|
|
373
|
+
async def _generate(
|
|
374
|
+
self, state: AgentState, parent: _Node, current_depth: int
|
|
375
|
+
) -> list[_Thought]:
|
|
376
|
+
"""Generate `branch_factor` candidate thoughts as children of `parent`."""
|
|
377
|
+
prompt = GENERATE_SYSTEM_PROMPT.format(branch_factor=self._branch_factor)
|
|
378
|
+
messages: list[Message] = [
|
|
379
|
+
Message(role="user", content=state.task),
|
|
380
|
+
]
|
|
381
|
+
if parent.depth > 0:
|
|
382
|
+
messages.append(
|
|
383
|
+
Message(
|
|
384
|
+
role="assistant",
|
|
385
|
+
content=(
|
|
386
|
+
f"Building on prior thought (score {parent.score:.2f}): {parent.content}"
|
|
387
|
+
),
|
|
388
|
+
)
|
|
389
|
+
)
|
|
390
|
+
response = await self._call_llm(
|
|
391
|
+
state,
|
|
392
|
+
iteration=current_depth + 1,
|
|
393
|
+
system=prompt,
|
|
394
|
+
messages=messages,
|
|
395
|
+
kind="think",
|
|
396
|
+
)
|
|
397
|
+
try:
|
|
398
|
+
thought_list = _ThoughtList.model_validate_json(_strip_code_fences(response.content))
|
|
399
|
+
except (ValidationError, json.JSONDecodeError, ValueError) as exc:
|
|
400
|
+
log.warning(
|
|
401
|
+
"TreeOfThoughts: candidate generation parse failed at depth %d: %s",
|
|
402
|
+
current_depth,
|
|
403
|
+
exc,
|
|
404
|
+
)
|
|
405
|
+
return []
|
|
406
|
+
return list(thought_list.thoughts)
|
|
407
|
+
|
|
408
|
+
async def _score(
|
|
409
|
+
self,
|
|
410
|
+
state: AgentState,
|
|
411
|
+
candidates: list[_Thought],
|
|
412
|
+
current_depth: int,
|
|
413
|
+
) -> list[tuple[_Thought, float]]:
|
|
414
|
+
"""Score each candidate; returns list of (thought, score)."""
|
|
415
|
+
# scorer="judge" deferred to feat-006; falls back to "self" for now.
|
|
416
|
+
candidate_text = "\n".join(f"- {t.id}: {t.content}" for t in candidates)
|
|
417
|
+
messages: list[Message] = [
|
|
418
|
+
Message(role="user", content=state.task),
|
|
419
|
+
Message(role="assistant", content=f"Candidate thoughts:\n{candidate_text}"),
|
|
420
|
+
]
|
|
421
|
+
response = await self._call_llm(
|
|
422
|
+
state,
|
|
423
|
+
iteration=current_depth + 1,
|
|
424
|
+
system=SCORE_SYSTEM_PROMPT,
|
|
425
|
+
messages=messages,
|
|
426
|
+
kind="think",
|
|
427
|
+
)
|
|
428
|
+
try:
|
|
429
|
+
score_list = _BranchScoreList.model_validate_json(_strip_code_fences(response.content))
|
|
430
|
+
except (ValidationError, json.JSONDecodeError, ValueError) as exc:
|
|
431
|
+
log.warning(
|
|
432
|
+
"TreeOfThoughts: scoring parse failed at depth %d: %s. "
|
|
433
|
+
"Defaulting all candidates to neutral score 0.5.",
|
|
434
|
+
current_depth,
|
|
435
|
+
exc,
|
|
436
|
+
)
|
|
437
|
+
return [(t, 0.5) for t in candidates]
|
|
438
|
+
|
|
439
|
+
score_by_id = {s.branch_id: s.score for s in score_list.scores}
|
|
440
|
+
# Any candidate the LLM didn't score gets the threshold-1 default
|
|
441
|
+
# so it's pruned unless explicitly above-threshold.
|
|
442
|
+
return [(t, score_by_id.get(t.id, 0.0)) for t in candidates]
|
|
443
|
+
|
|
444
|
+
def _can_afford_next_level(self, runtime: RuntimeContext, n_survivors: int) -> bool:
|
|
445
|
+
"""Estimate next-level cost; return True if it fits in remaining budget.
|
|
446
|
+
|
|
447
|
+
Estimate: each survivor needs (1 generate call + 1 score call).
|
|
448
|
+
Average cost is `spent_usd / iteration` so far, with a safety
|
|
449
|
+
floor; if no LLM calls have happened yet, assume a small
|
|
450
|
+
nonzero cost.
|
|
451
|
+
"""
|
|
452
|
+
budget = runtime.budget
|
|
453
|
+
avg = budget.spent_usd / max(1, budget.iteration) if budget.iteration else 0.001
|
|
454
|
+
estimated = n_survivors * 2 * avg
|
|
455
|
+
return bool(estimated <= budget.remaining_usd())
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
def _strip_code_fences(content: str) -> str:
|
|
459
|
+
"""Strip ```json ... ``` fences if the LLM wrapped the JSON in them."""
|
|
460
|
+
text = content.strip()
|
|
461
|
+
if text.startswith("```"):
|
|
462
|
+
first_newline = text.find("\n")
|
|
463
|
+
if first_newline == -1:
|
|
464
|
+
return text
|
|
465
|
+
body = text[first_newline + 1 :]
|
|
466
|
+
if body.endswith("```"):
|
|
467
|
+
body = body[: -len("```")]
|
|
468
|
+
return body.strip()
|
|
469
|
+
return text
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
__all__ = ["TreeOfThoughts"]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
See AGENTS.md for project conventions. AGENTS.md is the canonical
|
|
2
|
+
source. This file exists so Cursor's native .cursorrules discovery
|
|
3
|
+
keeps working.
|
|
4
|
+
|
|
5
|
+
Cursor-specific rules can be added below this marker:
|
|
6
|
+
|
|
7
|
+
<!-- agentforge:end-managed -->
|
|
8
|
+
|
|
9
|
+
<!-- agentforge:custom -->
|
|
10
|
+
<!-- Cursor-specific rules go here. This section survives
|
|
11
|
+
`agentforge upgrade`. -->
|
|
12
|
+
<!-- agentforge:end-custom -->
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
See [AGENTS.md](../AGENTS.md) for project conventions. AGENTS.md
|
|
2
|
+
is the canonical source for AI-assistant instructions. This file
|
|
3
|
+
exists so GitHub Copilot's native
|
|
4
|
+
`.github/copilot-instructions.md` discovery keeps working.
|
|
5
|
+
|
|
6
|
+
Copilot-specific instructions can be added below this marker:
|
|
7
|
+
|
|
8
|
+
<!-- agentforge:end-managed -->
|
|
9
|
+
|
|
10
|
+
<!-- agentforge:custom -->
|
|
11
|
+
<!-- Copilot-specific instructions go here. This section survives
|
|
12
|
+
`agentforge upgrade`. -->
|
|
13
|
+
<!-- agentforge:end-custom -->
|
|
File without changes
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# AgentForge agent — AI assistant instructions
|
|
2
|
+
|
|
3
|
+
This project is built on **AgentForge {{ framework_version }}**. Use
|
|
4
|
+
these rules when suggesting changes. The file is read by Claude
|
|
5
|
+
Code, Cursor, Aider, and any tool following the
|
|
6
|
+
[AGENTS.md convention](https://agents.md).
|
|
7
|
+
|
|
8
|
+
## Project shape (you must respect this)
|
|
9
|
+
|
|
10
|
+
- Framework version: `{{ framework_version }}`
|
|
11
|
+
- Template: `{{ template_name }}`
|
|
12
|
+
- Project slug: `{{ project_slug }}`
|
|
13
|
+
- LLM provider: `{{ llm_provider }}`
|
|
14
|
+
|
|
15
|
+
## File ownership rules
|
|
16
|
+
|
|
17
|
+
- Files starting with `AGENTFORGE-MANAGED:` are owned by the
|
|
18
|
+
framework. Do not edit. Suggest changes to YAML config or
|
|
19
|
+
developer-owned files instead.
|
|
20
|
+
- Files starting with `AGENTFORGE-FORKED:` were customised by the
|
|
21
|
+
developer. Edit normally; do not restore the marker.
|
|
22
|
+
- Files with no marker are developer-owned. Edit normally.
|
|
23
|
+
- Markdown documents may carry a `<!-- agentforge:end-managed -->`
|
|
24
|
+
marker. Everything above belongs to the framework; everything
|
|
25
|
+
below is the developer's custom section and survives upgrades.
|
|
26
|
+
|
|
27
|
+
## Architecture invariants
|
|
28
|
+
|
|
29
|
+
- **Tools** — use the `@tool` decorator on a typed function, OR
|
|
30
|
+
subclass `Tool`. Type hints drive the input schema; do not
|
|
31
|
+
hand-write JSON schemas.
|
|
32
|
+
- **Reasoning loop** — do not edit. Configure via
|
|
33
|
+
`agentforge.yaml > agent.strategy`.
|
|
34
|
+
- **LLM clients** — do not import vendor SDKs directly. Use
|
|
35
|
+
`agent.providers["..."]` or pass `model="<provider>:<model_id>"`.
|
|
36
|
+
- **Memory** — do not write SQL directly. Use `agent.memory.put`
|
|
37
|
+
/ `.get` / `.query`.
|
|
38
|
+
- **Costs** — do not bypass `BudgetPolicy`. Every LLM call is
|
|
39
|
+
checked. Bypassing it is a P3 (cost-safety) violation.
|
|
40
|
+
- **Run id** — do not invent your own correlation id. Use
|
|
41
|
+
`current_run().run_id` from `agentforge_core.production.run_context`.
|
|
42
|
+
- **Guardrails** — input / output / tool-gate validators live in
|
|
43
|
+
`agentforge.guardrails`. Custom validators implement the locked
|
|
44
|
+
ABCs in `agentforge_core.contracts.guardrails`.
|
|
45
|
+
|
|
46
|
+
## How to add common things
|
|
47
|
+
|
|
48
|
+
Open the runbook for the full step-by-step. Each runbook lives at
|
|
49
|
+
`docs/runbooks/NN-<topic>.md` and is upgrade-safe (the framework
|
|
50
|
+
maintains the managed section).
|
|
51
|
+
|
|
52
|
+
| Task | Runbook |
|
|
53
|
+
|---|---|
|
|
54
|
+
| Set up a fresh agent | `docs/runbooks/01-set-up-new-agent.md` |
|
|
55
|
+
| Add a tool | `docs/runbooks/02-add-a-tool.md` |
|
|
56
|
+
| Add a pipeline task | `docs/runbooks/03-add-a-pipeline-task.md` |
|
|
57
|
+
| Pick a reasoning strategy | `docs/runbooks/04-pick-reasoning-strategy.md` |
|
|
58
|
+
| Write prompts | `docs/runbooks/05-write-prompts.md` |
|
|
59
|
+
| Test your agent | `docs/runbooks/06-test-your-agent.md` |
|
|
60
|
+
| Debug a run | `docs/runbooks/07-debug-a-run.md` |
|
|
61
|
+
| Add memory / persistence | `docs/runbooks/08-add-memory.md` |
|
|
62
|
+
| Add MCP servers | `docs/runbooks/09-add-mcp.md` |
|
|
63
|
+
| Add evaluators | `docs/runbooks/10-add-evaluators.md` |
|
|
64
|
+
| Add safety guardrails | `docs/runbooks/11-add-safety-guardrails.md` |
|
|
65
|
+
| Add observability | `docs/runbooks/12-add-observability.md` |
|
|
66
|
+
| Configure multi-provider | `docs/runbooks/13-configure-multi-provider.md` |
|
|
67
|
+
| Deploy your agent | `docs/runbooks/14-deploy-your-agent.md` |
|
|
68
|
+
| Upgrade your agent | `docs/runbooks/15-upgrade-your-agent.md` |
|
|
69
|
+
| Configuration reference | `docs/runbooks/16-configuration-reference.md` |
|
|
70
|
+
| Add a reranker | `docs/runbooks/17-add-reranker.md` |
|
|
71
|
+
| Add hybrid search (BM25 + vector) | `docs/runbooks/18-add-hybrid-search.md` |
|
|
72
|
+
| Add GraphRAG (graph-augmented retrieval) | `docs/runbooks/19-add-graphrag.md` |
|
|
73
|
+
| Apply schema migrations | `docs/runbooks/20-apply-schema-migrations.md` |
|
|
74
|
+
| Use streaming guardrails (sentence-window) | `docs/runbooks/21-use-streaming-guardrails.md` |
|
|
75
|
+
|
|
76
|
+
## Anti-patterns (do not suggest these)
|
|
77
|
+
|
|
78
|
+
- **LangChain idioms** (`LCEL`, `Runnable`, `RunnablePassthrough`)
|
|
79
|
+
— wrong framework. Use AgentForge's `Agent` + `@tool` + strategy
|
|
80
|
+
composition instead.
|
|
81
|
+
- **Hand-rolling JSON schemas for tools** — use type hints on a
|
|
82
|
+
`@tool`-decorated function and the schema is derived.
|
|
83
|
+
- **Storing API keys in `agentforge.yaml` literals** — use
|
|
84
|
+
`${ENV_VAR}` interpolation; secrets live in `.env` (gitignored).
|
|
85
|
+
- **Catching exceptions inside tool code to "make the agent more
|
|
86
|
+
robust"** — let them surface; the framework records them as
|
|
87
|
+
observations and the LLM recovers naturally.
|
|
88
|
+
- **Adding a wrapper around `Agent.run()` to add logging** — the
|
|
89
|
+
framework already logs; install a custom hook (runbook 12).
|
|
90
|
+
- **Writing SQL directly against the memory backend** — use
|
|
91
|
+
`agent.memory.put / .get / .query`; SQL bypasses the contract
|
|
92
|
+
and breaks driver swaps.
|
|
93
|
+
- **Running expensive code outside `BudgetPolicy`** — every
|
|
94
|
+
external call (LLM, tool, retriever) should respect the run's
|
|
95
|
+
budget.
|
|
96
|
+
|
|
97
|
+
## Pre-commit checks (run these before suggesting a commit)
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
agentforge config validate
|
|
101
|
+
agentforge status # no managed file should be drifted
|
|
102
|
+
pytest -q
|
|
103
|
+
ruff check
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Where to find the framework's truth
|
|
107
|
+
|
|
108
|
+
- Locked ABCs: `agentforge_core/contracts/*.py`
|
|
109
|
+
- Default config schema: `agentforge_core/config/schema.py`
|
|
110
|
+
- Built-in evaluators / guardrails / tools: `agentforge/*/`
|
|
111
|
+
- Spec for any feature: `docs/features/feat-NNN-*.md` in the
|
|
112
|
+
framework repo
|
|
113
|
+
|
|
114
|
+
When in doubt, prefer the framework's locked types over inventing
|
|
115
|
+
new shapes. Suggesting an `Agent` subclass or a new ABC is almost
|
|
116
|
+
always wrong — the framework is composed from named modules.
|
|
117
|
+
|
|
118
|
+
<!-- agentforge:end-managed -->
|
|
119
|
+
|
|
120
|
+
<!-- agentforge:custom -->
|
|
121
|
+
<!-- Project-specific AI assistant instructions go here. This
|
|
122
|
+
section survives `agentforge upgrade`. -->
|
|
123
|
+
<!-- agentforge:end-custom -->
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
> See [AGENTS.md](./AGENTS.md) for AgentForge conventions. Claude
|
|
2
|
+
> Code reads both files; **AGENTS.md is the canonical source**.
|
|
3
|
+
|
|
4
|
+
This file exists so Claude Code's native `CLAUDE.md` discovery
|
|
5
|
+
keeps working. Put Claude-Code-specific instructions in the custom
|
|
6
|
+
section below.
|
|
7
|
+
|
|
8
|
+
<!-- agentforge:end-managed -->
|
|
9
|
+
|
|
10
|
+
<!-- agentforge:custom -->
|
|
11
|
+
<!-- Claude Code-specific instructions go here. This section
|
|
12
|
+
survives `agentforge upgrade`. -->
|
|
13
|
+
<!-- agentforge:end-custom -->
|