agentforge-py 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. agentforge/__init__.py +114 -0
  2. agentforge/_testing/__init__.py +19 -0
  3. agentforge/_testing/fake_llm.py +126 -0
  4. agentforge/_testing/fake_tool.py +122 -0
  5. agentforge/_tools/__init__.py +14 -0
  6. agentforge/_tools/calculator.py +102 -0
  7. agentforge/_tools/decorator.py +300 -0
  8. agentforge/_tools/file_read.py +112 -0
  9. agentforge/_tools/shell.py +134 -0
  10. agentforge/_tools/web_search.py +207 -0
  11. agentforge/agent.py +817 -0
  12. agentforge/auth.py +42 -0
  13. agentforge/cli/__init__.py +18 -0
  14. agentforge/cli/_build.py +323 -0
  15. agentforge/cli/_scaffold_state.py +250 -0
  16. agentforge/cli/_shared_scaffold.py +174 -0
  17. agentforge/cli/config_cmd.py +174 -0
  18. agentforge/cli/db_cmd.py +262 -0
  19. agentforge/cli/debug_cmd.py +168 -0
  20. agentforge/cli/docs_cmd.py +217 -0
  21. agentforge/cli/eval_cmd.py +181 -0
  22. agentforge/cli/health_cmd.py +139 -0
  23. agentforge/cli/list_modules.py +85 -0
  24. agentforge/cli/main.py +81 -0
  25. agentforge/cli/manifest_apply.py +368 -0
  26. agentforge/cli/module_cmd.py +247 -0
  27. agentforge/cli/new_cmd.py +171 -0
  28. agentforge/cli/run_cmd.py +234 -0
  29. agentforge/cli/upgrade_cmd.py +230 -0
  30. agentforge/config/__init__.py +45 -0
  31. agentforge/eval/__init__.py +18 -0
  32. agentforge/eval/consistency.py +107 -0
  33. agentforge/eval/coverage.py +100 -0
  34. agentforge/eval/format_compliance.py +107 -0
  35. agentforge/eval/regression.py +143 -0
  36. agentforge/findings.py +166 -0
  37. agentforge/guardrails/__init__.py +32 -0
  38. agentforge/guardrails/allowlist.py +49 -0
  39. agentforge/guardrails/capability_check.py +58 -0
  40. agentforge/guardrails/engine.py +289 -0
  41. agentforge/guardrails/pii_redact_basic.py +61 -0
  42. agentforge/guardrails/prompt_injection_basic.py +90 -0
  43. agentforge/memory/__init__.py +16 -0
  44. agentforge/memory/in_memory.py +130 -0
  45. agentforge/memory/in_memory_graph.py +262 -0
  46. agentforge/memory/in_memory_vector.py +167 -0
  47. agentforge/pipeline/__init__.py +26 -0
  48. agentforge/pipeline/engine.py +189 -0
  49. agentforge/pipeline/errors.py +19 -0
  50. agentforge/pipeline/tool.py +93 -0
  51. agentforge/py.typed +0 -0
  52. agentforge/recording.py +189 -0
  53. agentforge/renderers/__init__.py +28 -0
  54. agentforge/renderers/_defaults.py +32 -0
  55. agentforge/renderers/markdown.py +44 -0
  56. agentforge/renderers/patch_applier.py +46 -0
  57. agentforge/renderers/registry.py +108 -0
  58. agentforge/renderers/scorecard.py +59 -0
  59. agentforge/renderers/span_table.py +71 -0
  60. agentforge/replay.py +260 -0
  61. agentforge/resolver_register.py +41 -0
  62. agentforge/retrieval.py +410 -0
  63. agentforge/runtime.py +63 -0
  64. agentforge/strategies/__init__.py +27 -0
  65. agentforge/strategies/_base.py +280 -0
  66. agentforge/strategies/_plan.py +93 -0
  67. agentforge/strategies/multi_agent.py +541 -0
  68. agentforge/strategies/plan_execute.py +506 -0
  69. agentforge/strategies/react.py +237 -0
  70. agentforge/strategies/tot.py +472 -0
  71. agentforge/templates/_shared/.cursorrules +12 -0
  72. agentforge/templates/_shared/.github/copilot-instructions.md +13 -0
  73. agentforge/templates/_shared/.gitkeep +0 -0
  74. agentforge/templates/_shared/AGENTS.md.tmpl +123 -0
  75. agentforge/templates/_shared/CLAUDE.md +13 -0
  76. agentforge/templates/_shared/docs/runbooks/01-set-up-new-agent.md.tmpl +67 -0
  77. agentforge/templates/_shared/docs/runbooks/02-add-a-tool.md +67 -0
  78. agentforge/templates/_shared/docs/runbooks/03-add-a-pipeline-task.md +69 -0
  79. agentforge/templates/_shared/docs/runbooks/04-pick-reasoning-strategy.md +67 -0
  80. agentforge/templates/_shared/docs/runbooks/05-write-prompts.md +75 -0
  81. agentforge/templates/_shared/docs/runbooks/06-test-your-agent.md +75 -0
  82. agentforge/templates/_shared/docs/runbooks/07-debug-a-run.md +70 -0
  83. agentforge/templates/_shared/docs/runbooks/08-add-memory.md +75 -0
  84. agentforge/templates/_shared/docs/runbooks/09-add-mcp.md +78 -0
  85. agentforge/templates/_shared/docs/runbooks/10-add-evaluators.md +76 -0
  86. agentforge/templates/_shared/docs/runbooks/11-add-safety-guardrails.md +83 -0
  87. agentforge/templates/_shared/docs/runbooks/12-add-observability.md +77 -0
  88. agentforge/templates/_shared/docs/runbooks/13-configure-multi-provider.md +91 -0
  89. agentforge/templates/_shared/docs/runbooks/14-deploy-your-agent.md +70 -0
  90. agentforge/templates/_shared/docs/runbooks/15-upgrade-your-agent.md +67 -0
  91. agentforge/templates/_shared/docs/runbooks/16-configuration-reference.md +81 -0
  92. agentforge/templates/_shared/docs/runbooks/17-add-reranker.md +78 -0
  93. agentforge/templates/_shared/docs/runbooks/18-add-hybrid-search.md +78 -0
  94. agentforge/templates/_shared/docs/runbooks/19-add-graphrag.md +83 -0
  95. agentforge/templates/_shared/docs/runbooks/20-apply-schema-migrations.md +92 -0
  96. agentforge/templates/_shared/docs/runbooks/21-use-streaming-guardrails.md +82 -0
  97. agentforge/templates/_shared/docs/runbooks/README.md.tmpl +68 -0
  98. agentforge/templates/code-reviewer/.env.example +8 -0
  99. agentforge/templates/code-reviewer/.gitignore +7 -0
  100. agentforge/templates/code-reviewer/README.md +12 -0
  101. agentforge/templates/code-reviewer/agentforge.yaml +23 -0
  102. agentforge/templates/code-reviewer/copier.yml +34 -0
  103. agentforge/templates/code-reviewer/pyproject.toml +18 -0
  104. agentforge/templates/code-reviewer/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  105. agentforge/templates/code-reviewer/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
  106. agentforge/templates/docs-qa/.env.example +8 -0
  107. agentforge/templates/docs-qa/.gitignore +7 -0
  108. agentforge/templates/docs-qa/README.md +14 -0
  109. agentforge/templates/docs-qa/agentforge.yaml +19 -0
  110. agentforge/templates/docs-qa/copier.yml +31 -0
  111. agentforge/templates/docs-qa/pyproject.toml +18 -0
  112. agentforge/templates/docs-qa/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  113. agentforge/templates/docs-qa/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
  114. agentforge/templates/minimal/.env.example +11 -0
  115. agentforge/templates/minimal/.gitignore +10 -0
  116. agentforge/templates/minimal/README.md +28 -0
  117. agentforge/templates/minimal/agentforge.yaml +10 -0
  118. agentforge/templates/minimal/copier.yml +52 -0
  119. agentforge/templates/minimal/pyproject.toml +18 -0
  120. agentforge/templates/minimal/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  121. agentforge/templates/minimal/src/{{project_slug.replace('-', '_')}}/main.py +34 -0
  122. agentforge/templates/patch-bot/.env.example +8 -0
  123. agentforge/templates/patch-bot/.gitignore +7 -0
  124. agentforge/templates/patch-bot/README.md +13 -0
  125. agentforge/templates/patch-bot/agentforge.yaml +15 -0
  126. agentforge/templates/patch-bot/copier.yml +31 -0
  127. agentforge/templates/patch-bot/pyproject.toml +18 -0
  128. agentforge/templates/patch-bot/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  129. agentforge/templates/patch-bot/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
  130. agentforge/templates/research/.env.example +8 -0
  131. agentforge/templates/research/.gitignore +7 -0
  132. agentforge/templates/research/README.md +14 -0
  133. agentforge/templates/research/agentforge.yaml +17 -0
  134. agentforge/templates/research/copier.yml +31 -0
  135. agentforge/templates/research/pyproject.toml +18 -0
  136. agentforge/templates/research/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  137. agentforge/templates/research/src/{{project_slug.replace('-', '_')}}/main.py +31 -0
  138. agentforge/templates/triage/.env.example +8 -0
  139. agentforge/templates/triage/.gitignore +7 -0
  140. agentforge/templates/triage/README.md +14 -0
  141. agentforge/templates/triage/agentforge.yaml +25 -0
  142. agentforge/templates/triage/copier.yml +31 -0
  143. agentforge/templates/triage/pyproject.toml +18 -0
  144. agentforge/templates/triage/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  145. agentforge/templates/triage/src/{{project_slug.replace('-', '_')}}/main.py +30 -0
  146. agentforge/testing/__init__.py +69 -0
  147. agentforge/testing/conformance.py +40 -0
  148. agentforge/testing/factory.py +89 -0
  149. agentforge/testing/fixtures.py +42 -0
  150. agentforge/testing/llm.py +235 -0
  151. agentforge/testing/recording.py +177 -0
  152. agentforge/tools/__init__.py +41 -0
  153. agentforge_py-0.2.1.dist-info/METADATA +158 -0
  154. agentforge_py-0.2.1.dist-info/RECORD +157 -0
  155. agentforge_py-0.2.1.dist-info/WHEEL +4 -0
  156. agentforge_py-0.2.1.dist-info/entry_points.txt +2 -0
  157. agentforge_py-0.2.1.dist-info/licenses/LICENSE +202 -0
@@ -0,0 +1,472 @@
1
+ """`TreeOfThoughts` — beam-search reasoning with scored branches.
2
+
3
+ Per feat-002 §4.3:
4
+
5
+ GENERATE: one LLM call returns `branch_factor` candidate thoughts
6
+ at the current depth.
7
+ SCORE: each thought is scored 0..1 by the LLM ("self") or by
8
+ a cheap judge model ("judge").
9
+ PRUNE: keep thoughts above `score_threshold`. If `beam_width`
10
+ is set, additionally keep only the top-K.
11
+ EXPAND: recurse on survivors to depth=`depth`.
12
+ SYNTHESIZE: best leaf → final answer.
13
+
14
+ Modern: structured Pydantic schemas for branch generation and
15
+ scoring (no free-form parsing); budget-aware graceful degradation
16
+ (if the next level's estimated cost would exceed the remaining
17
+ budget, the strategy synthesises with what it has rather than
18
+ crashing).
19
+
20
+ `scorer="judge"` in v0.1 falls back to "self" — a separate cheap
21
+ judge model is introduced in feat-006 (`agentforge-eval-geval`).
22
+ The constructor accepts the value so the API surface is locked at
23
+ v0.1; the implementation upgrades transparently when feat-006 lands.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import json
29
+ import logging
30
+ from collections.abc import AsyncIterator
31
+ from dataclasses import dataclass, field
32
+ from typing import Literal
33
+ from uuid import uuid4
34
+
35
+ from agentforge_core.observability.tracing import get_tracer
36
+ from agentforge_core.values.chat import StreamingEvent
37
+ from agentforge_core.values.messages import Message
38
+ from agentforge_core.values.state import AgentState
39
+ from pydantic import BaseModel, ConfigDict, Field, ValidationError
40
+
41
+ from agentforge.resolver_register import register_strategy
42
+ from agentforge.runtime import RuntimeContext
43
+ from agentforge.strategies._base import StrategyBase, _events_for_new_steps, get_runtime
44
+
45
+ log = logging.getLogger(__name__)
46
+
47
+ ScorerKind = Literal["self", "judge"]
48
+
49
+ GENERATE_SYSTEM_PROMPT = (
50
+ "You are exploring multiple plausible reasoning paths for a task. "
51
+ "Generate {branch_factor} distinct candidate thoughts. Each thought "
52
+ "should be a different angle, approach, or partial solution. Return "
53
+ "ONLY a JSON object matching this schema (no other text):\n\n"
54
+ ' {{"thoughts": [{{"id": "<unique id>", "content": "<thought text>"}}, ...]}}\n\n'
55
+ "Provide exactly {branch_factor} thoughts."
56
+ )
57
+
58
+ SCORE_SYSTEM_PROMPT = (
59
+ "Score each of the candidate thoughts below from 0.0 (irrelevant / "
60
+ "wrong) to 1.0 (excellent / correct) for how well it advances the "
61
+ "user's task. Return ONLY a JSON object matching this schema (no "
62
+ "other text):\n\n"
63
+ ' {"scores": [{"branch_id": "<id>", "score": <0..1>, "reasoning": "<why>"}, ...]}'
64
+ )
65
+
66
+ SYNTHESIZE_SYSTEM_PROMPT = (
67
+ "You have explored multiple reasoning paths and selected the best "
68
+ "one. Produce the final answer based on the path's content; do not "
69
+ "introduce new claims unsupported by the path."
70
+ )
71
+
72
+
73
+ # ----------------------------------------------------------------------
74
+ # LLM I/O schemas
75
+ # ----------------------------------------------------------------------
76
+
77
+
78
+ class _Thought(BaseModel):
79
+ model_config = ConfigDict(frozen=True, strict=True)
80
+ id: str = Field(min_length=1)
81
+ content: str = Field(min_length=1)
82
+
83
+
84
+ class _ThoughtList(BaseModel):
85
+ model_config = ConfigDict(frozen=True, strict=True)
86
+ thoughts: list[_Thought] = Field(min_length=1)
87
+
88
+
89
+ class _BranchScore(BaseModel):
90
+ model_config = ConfigDict(frozen=True, strict=True)
91
+ branch_id: str
92
+ score: float = Field(ge=0.0, le=1.0)
93
+ reasoning: str = ""
94
+
95
+
96
+ class _BranchScoreList(BaseModel):
97
+ model_config = ConfigDict(frozen=True, strict=True)
98
+ scores: list[_BranchScore]
99
+
100
+
101
+ # ----------------------------------------------------------------------
102
+ # Internal node + tree
103
+ # ----------------------------------------------------------------------
104
+
105
+
106
+ @dataclass(slots=True)
107
+ class _Node:
108
+ """One thought in the search tree."""
109
+
110
+ id: str
111
+ parent_id: str | None
112
+ depth: int
113
+ content: str
114
+ score: float = 0.0
115
+ children: list[_Node] = field(default_factory=list)
116
+
117
+
118
+ def _path_to_root(leaf: _Node, by_id: dict[str, _Node]) -> list[_Node]:
119
+ path: list[_Node] = []
120
+ cursor: _Node | None = leaf
121
+ while cursor is not None:
122
+ path.append(cursor)
123
+ cursor = by_id.get(cursor.parent_id) if cursor.parent_id else None
124
+ return list(reversed(path))
125
+
126
+
127
+ # ----------------------------------------------------------------------
128
+ # Strategy
129
+ # ----------------------------------------------------------------------
130
+
131
+
132
+ @register_strategy("tot")
133
+ class TreeOfThoughts(StrategyBase):
134
+ """Beam-search reasoning over scored branches.
135
+
136
+ Per feat-002 §4.2 the constructor surface is locked at v0.1:
137
+
138
+ Args:
139
+ branch_factor: Number of candidate thoughts generated per
140
+ level. Default 3.
141
+ depth: Number of expansion levels (root + depth-1 expansions).
142
+ Default 2.
143
+ score_threshold: Minimum score for a branch to survive
144
+ pruning. Range [0, 1]. Default 0.5.
145
+ scorer: "self" uses the agent's primary LLM to score; "judge"
146
+ (deferred to feat-006) will use a cheap-judge model. Both
147
+ values currently behave identically.
148
+ beam_width: If set, keep at most this many of the highest-
149
+ scoring survivors per level. None = no top-K cap (only
150
+ score_threshold applies). Default None.
151
+ """
152
+
153
+ def __init__(
154
+ self,
155
+ *,
156
+ branch_factor: int = 3,
157
+ depth: int = 2,
158
+ score_threshold: float = 0.5,
159
+ scorer: ScorerKind = "self",
160
+ beam_width: int | None = None,
161
+ ) -> None:
162
+ if branch_factor < 1:
163
+ raise ValueError("branch_factor must be >= 1")
164
+ if depth < 1:
165
+ raise ValueError("depth must be >= 1")
166
+ if not 0.0 <= score_threshold <= 1.0:
167
+ raise ValueError("score_threshold must be in [0, 1]")
168
+ if scorer not in ("self", "judge"):
169
+ raise ValueError(f"scorer must be 'self' or 'judge', got {scorer!r}")
170
+ if beam_width is not None and beam_width < 1:
171
+ raise ValueError("beam_width must be >= 1 when set")
172
+ self._branch_factor = branch_factor
173
+ self._depth = depth
174
+ self._score_threshold = score_threshold
175
+ self._scorer: ScorerKind = scorer
176
+ self._beam_width = beam_width
177
+
178
+ async def run(self, state: AgentState) -> AgentState:
179
+ runtime = get_runtime(state)
180
+ by_id: dict[str, _Node] = {}
181
+ tracer = get_tracer()
182
+
183
+ # Root — start from the task itself; no LLM call yet.
184
+ root = _Node(id=str(uuid4()), parent_id=None, depth=0, content=state.task, score=1.0)
185
+ by_id[root.id] = root
186
+
187
+ survivors: list[_Node] = [root]
188
+ current_depth = 0
189
+
190
+ while current_depth < self._depth and survivors:
191
+ self._check_guardrails(state)
192
+
193
+ # Budget-aware graceful degradation: estimate the cost of
194
+ # the next level (branch + score per survivor); if it would
195
+ # exceed remaining budget, stop expanding and synthesise.
196
+ if not self._can_afford_next_level(runtime, len(survivors)):
197
+ log.warning(
198
+ "TreeOfThoughts: estimated next-level cost exceeds "
199
+ "remaining budget; synthesising with current best."
200
+ )
201
+ break
202
+
203
+ with tracer.start_as_current_span(
204
+ "strategy.iteration",
205
+ attributes={
206
+ "agentforge.iteration": current_depth,
207
+ "agentforge.strategy": "tot",
208
+ },
209
+ ):
210
+ survivors = await self._iterate_depth(state, by_id, survivors, current_depth)
211
+ current_depth += 1
212
+
213
+ # Pick the best leaf overall — the best surviving node, or the
214
+ # root if no level survived pruning.
215
+ best = max(by_id.values(), key=lambda n: n.score) if by_id else root
216
+ path = _path_to_root(best, by_id)
217
+ path_text = "\n".join(f" depth={n.depth} score={n.score:.2f}: {n.content}" for n in path)
218
+
219
+ # SYNTHESIZE the final answer from the best path.
220
+ await self._call_llm(
221
+ state,
222
+ iteration=current_depth + 1,
223
+ system=SYNTHESIZE_SYSTEM_PROMPT,
224
+ messages=[
225
+ Message(role="user", content=state.task),
226
+ Message(
227
+ role="assistant",
228
+ content=f"Best path explored:\n{path_text}",
229
+ ),
230
+ ],
231
+ kind="synthesize",
232
+ )
233
+
234
+ return state
235
+
236
+ async def stream(self, state: AgentState) -> AsyncIterator[StreamingEvent]:
237
+ """Per-depth streaming override (feat-002 v0.3 polish).
238
+
239
+ Mirrors :meth:`run` but yields a ``step`` `StreamingEvent`
240
+ for each ``branch`` step recorded inside ``_iterate_depth``
241
+ (flushed after the helper returns), plus the final
242
+ ``synthesize`` step and the terminal ``done`` event.
243
+ """
244
+ runtime = get_runtime(state)
245
+ by_id: dict[str, _Node] = {}
246
+ tracer = get_tracer()
247
+
248
+ root = _Node(id=str(uuid4()), parent_id=None, depth=0, content=state.task, score=1.0)
249
+ by_id[root.id] = root
250
+
251
+ survivors: list[_Node] = [root]
252
+ current_depth = 0
253
+ before = len(state.steps)
254
+
255
+ while current_depth < self._depth and survivors:
256
+ self._check_guardrails(state)
257
+
258
+ if not self._can_afford_next_level(runtime, len(survivors)):
259
+ log.warning(
260
+ "TreeOfThoughts: estimated next-level cost exceeds "
261
+ "remaining budget; synthesising with current best."
262
+ )
263
+ break
264
+
265
+ with tracer.start_as_current_span(
266
+ "strategy.iteration",
267
+ attributes={
268
+ "agentforge.iteration": current_depth,
269
+ "agentforge.strategy": "tot",
270
+ },
271
+ ):
272
+ survivors = await self._iterate_depth(state, by_id, survivors, current_depth)
273
+ for ev in _events_for_new_steps(state.steps, before):
274
+ yield ev
275
+ before = len(state.steps)
276
+ current_depth += 1
277
+
278
+ best = max(by_id.values(), key=lambda n: n.score) if by_id else root
279
+ path = _path_to_root(best, by_id)
280
+ path_text = "\n".join(f" depth={n.depth} score={n.score:.2f}: {n.content}" for n in path)
281
+
282
+ await self._call_llm(
283
+ state,
284
+ iteration=current_depth + 1,
285
+ system=SYNTHESIZE_SYSTEM_PROMPT,
286
+ messages=[
287
+ Message(role="user", content=state.task),
288
+ Message(
289
+ role="assistant",
290
+ content=f"Best path explored:\n{path_text}",
291
+ ),
292
+ ],
293
+ kind="synthesize",
294
+ )
295
+ for ev in _events_for_new_steps(state.steps, before):
296
+ yield ev
297
+
298
+ yield StreamingEvent(
299
+ kind="done",
300
+ content={
301
+ "run_id": state.run_id,
302
+ "cost_usd": float(runtime.budget.spent_usd),
303
+ },
304
+ metadata={},
305
+ )
306
+
307
+ # ------------------------------------------------------------------
308
+ # Phase helpers
309
+ # ------------------------------------------------------------------
310
+
311
+ async def _iterate_depth(
312
+ self,
313
+ state: AgentState,
314
+ by_id: dict[str, _Node],
315
+ survivors: list[_Node],
316
+ current_depth: int,
317
+ ) -> list[_Node]:
318
+ """Run one depth-level of generate/score/prune.
319
+
320
+ Returns the survivors of this level after threshold filtering
321
+ and (optional) global beam-width truncation. Records one
322
+ ``branch`` step per generated child.
323
+ """
324
+ new_survivors: list[_Node] = []
325
+ for parent in survivors:
326
+ self._check_guardrails(state)
327
+
328
+ # GENERATE candidates for this parent
329
+ candidates = await self._generate(state, parent, current_depth)
330
+ if not candidates:
331
+ continue
332
+
333
+ # SCORE candidates
334
+ scored = await self._score(state, candidates, current_depth)
335
+
336
+ # Build child nodes + record branch step
337
+ for thought, score in scored:
338
+ child = _Node(
339
+ id=thought.id,
340
+ parent_id=parent.id,
341
+ depth=current_depth + 1,
342
+ content=thought.content,
343
+ score=score,
344
+ )
345
+ parent.children.append(child)
346
+ by_id[child.id] = child
347
+ self._record_step(
348
+ state,
349
+ iteration=current_depth + 1,
350
+ kind="branch",
351
+ content={
352
+ "branch_id": child.id,
353
+ "parent_id": parent.id,
354
+ "score": score,
355
+ "thought": thought.content,
356
+ },
357
+ )
358
+
359
+ # PRUNE: above threshold; optionally top-K (beam_width)
360
+ kept = [by_id[t.id] for (t, s) in scored if s >= self._score_threshold]
361
+ kept.sort(key=lambda n: n.score, reverse=True)
362
+ if self._beam_width is not None:
363
+ kept = kept[: self._beam_width]
364
+ new_survivors.extend(kept)
365
+
366
+ # Across all parents at this level, also bound the global
367
+ # beam if set.
368
+ new_survivors.sort(key=lambda n: n.score, reverse=True)
369
+ if self._beam_width is not None:
370
+ new_survivors = new_survivors[: self._beam_width]
371
+ return new_survivors
372
+
373
+ async def _generate(
374
+ self, state: AgentState, parent: _Node, current_depth: int
375
+ ) -> list[_Thought]:
376
+ """Generate `branch_factor` candidate thoughts as children of `parent`."""
377
+ prompt = GENERATE_SYSTEM_PROMPT.format(branch_factor=self._branch_factor)
378
+ messages: list[Message] = [
379
+ Message(role="user", content=state.task),
380
+ ]
381
+ if parent.depth > 0:
382
+ messages.append(
383
+ Message(
384
+ role="assistant",
385
+ content=(
386
+ f"Building on prior thought (score {parent.score:.2f}): {parent.content}"
387
+ ),
388
+ )
389
+ )
390
+ response = await self._call_llm(
391
+ state,
392
+ iteration=current_depth + 1,
393
+ system=prompt,
394
+ messages=messages,
395
+ kind="think",
396
+ )
397
+ try:
398
+ thought_list = _ThoughtList.model_validate_json(_strip_code_fences(response.content))
399
+ except (ValidationError, json.JSONDecodeError, ValueError) as exc:
400
+ log.warning(
401
+ "TreeOfThoughts: candidate generation parse failed at depth %d: %s",
402
+ current_depth,
403
+ exc,
404
+ )
405
+ return []
406
+ return list(thought_list.thoughts)
407
+
408
+ async def _score(
409
+ self,
410
+ state: AgentState,
411
+ candidates: list[_Thought],
412
+ current_depth: int,
413
+ ) -> list[tuple[_Thought, float]]:
414
+ """Score each candidate; returns list of (thought, score)."""
415
+ # scorer="judge" deferred to feat-006; falls back to "self" for now.
416
+ candidate_text = "\n".join(f"- {t.id}: {t.content}" for t in candidates)
417
+ messages: list[Message] = [
418
+ Message(role="user", content=state.task),
419
+ Message(role="assistant", content=f"Candidate thoughts:\n{candidate_text}"),
420
+ ]
421
+ response = await self._call_llm(
422
+ state,
423
+ iteration=current_depth + 1,
424
+ system=SCORE_SYSTEM_PROMPT,
425
+ messages=messages,
426
+ kind="think",
427
+ )
428
+ try:
429
+ score_list = _BranchScoreList.model_validate_json(_strip_code_fences(response.content))
430
+ except (ValidationError, json.JSONDecodeError, ValueError) as exc:
431
+ log.warning(
432
+ "TreeOfThoughts: scoring parse failed at depth %d: %s. "
433
+ "Defaulting all candidates to neutral score 0.5.",
434
+ current_depth,
435
+ exc,
436
+ )
437
+ return [(t, 0.5) for t in candidates]
438
+
439
+ score_by_id = {s.branch_id: s.score for s in score_list.scores}
440
+ # Any candidate the LLM didn't score gets the threshold-1 default
441
+ # so it's pruned unless explicitly above-threshold.
442
+ return [(t, score_by_id.get(t.id, 0.0)) for t in candidates]
443
+
444
+ def _can_afford_next_level(self, runtime: RuntimeContext, n_survivors: int) -> bool:
445
+ """Estimate next-level cost; return True if it fits in remaining budget.
446
+
447
+ Estimate: each survivor needs (1 generate call + 1 score call).
448
+ Average cost is `spent_usd / iteration` so far, with a safety
449
+ floor; if no LLM calls have happened yet, assume a small
450
+ nonzero cost.
451
+ """
452
+ budget = runtime.budget
453
+ avg = budget.spent_usd / max(1, budget.iteration) if budget.iteration else 0.001
454
+ estimated = n_survivors * 2 * avg
455
+ return bool(estimated <= budget.remaining_usd())
456
+
457
+
458
+ def _strip_code_fences(content: str) -> str:
459
+ """Strip ```json ... ``` fences if the LLM wrapped the JSON in them."""
460
+ text = content.strip()
461
+ if text.startswith("```"):
462
+ first_newline = text.find("\n")
463
+ if first_newline == -1:
464
+ return text
465
+ body = text[first_newline + 1 :]
466
+ if body.endswith("```"):
467
+ body = body[: -len("```")]
468
+ return body.strip()
469
+ return text
470
+
471
+
472
+ __all__ = ["TreeOfThoughts"]
@@ -0,0 +1,12 @@
1
+ See AGENTS.md for project conventions. AGENTS.md is the canonical
2
+ source. This file exists so Cursor's native .cursorrules discovery
3
+ keeps working.
4
+
5
+ Cursor-specific rules can be added below this marker:
6
+
7
+ <!-- agentforge:end-managed -->
8
+
9
+ <!-- agentforge:custom -->
10
+ <!-- Cursor-specific rules go here. This section survives
11
+ `agentforge upgrade`. -->
12
+ <!-- agentforge:end-custom -->
@@ -0,0 +1,13 @@
1
+ See [AGENTS.md](../AGENTS.md) for project conventions. AGENTS.md
2
+ is the canonical source for AI-assistant instructions. This file
3
+ exists so GitHub Copilot's native
4
+ `.github/copilot-instructions.md` discovery keeps working.
5
+
6
+ Copilot-specific instructions can be added below this marker:
7
+
8
+ <!-- agentforge:end-managed -->
9
+
10
+ <!-- agentforge:custom -->
11
+ <!-- Copilot-specific instructions go here. This section survives
12
+ `agentforge upgrade`. -->
13
+ <!-- agentforge:end-custom -->
File without changes
@@ -0,0 +1,123 @@
1
+ # AgentForge agent — AI assistant instructions
2
+
3
+ This project is built on **AgentForge {{ framework_version }}**. Use
4
+ these rules when suggesting changes. The file is read by Claude
5
+ Code, Cursor, Aider, and any tool following the
6
+ [AGENTS.md convention](https://agents.md).
7
+
8
+ ## Project shape (you must respect this)
9
+
10
+ - Framework version: `{{ framework_version }}`
11
+ - Template: `{{ template_name }}`
12
+ - Project slug: `{{ project_slug }}`
13
+ - LLM provider: `{{ llm_provider }}`
14
+
15
+ ## File ownership rules
16
+
17
+ - Files starting with `AGENTFORGE-MANAGED:` are owned by the
18
+ framework. Do not edit. Suggest changes to YAML config or
19
+ developer-owned files instead.
20
+ - Files starting with `AGENTFORGE-FORKED:` were customised by the
21
+ developer. Edit normally; do not restore the marker.
22
+ - Files with no marker are developer-owned. Edit normally.
23
+ - Markdown documents may carry a `<!-- agentforge:end-managed -->`
24
+ marker. Everything above belongs to the framework; everything
25
+ below is the developer's custom section and survives upgrades.
26
+
27
+ ## Architecture invariants
28
+
29
+ - **Tools** — use the `@tool` decorator on a typed function, OR
30
+ subclass `Tool`. Type hints drive the input schema; do not
31
+ hand-write JSON schemas.
32
+ - **Reasoning loop** — do not edit. Configure via
33
+ `agentforge.yaml > agent.strategy`.
34
+ - **LLM clients** — do not import vendor SDKs directly. Use
35
+ `agent.providers["..."]` or pass `model="<provider>:<model_id>"`.
36
+ - **Memory** — do not write SQL directly. Use `agent.memory.put`
37
+ / `.get` / `.query`.
38
+ - **Costs** — do not bypass `BudgetPolicy`. Every LLM call is
39
+ checked. Bypassing it is a P3 (cost-safety) violation.
40
+ - **Run id** — do not invent your own correlation id. Use
41
+ `current_run().run_id` from `agentforge_core.production.run_context`.
42
+ - **Guardrails** — input / output / tool-gate validators live in
43
+ `agentforge.guardrails`. Custom validators implement the locked
44
+ ABCs in `agentforge_core.contracts.guardrails`.
45
+
46
+ ## How to add common things
47
+
48
+ Open the runbook for the full step-by-step. Each runbook lives at
49
+ `docs/runbooks/NN-<topic>.md` and is upgrade-safe (the framework
50
+ maintains the managed section).
51
+
52
+ | Task | Runbook |
53
+ |---|---|
54
+ | Set up a fresh agent | `docs/runbooks/01-set-up-new-agent.md` |
55
+ | Add a tool | `docs/runbooks/02-add-a-tool.md` |
56
+ | Add a pipeline task | `docs/runbooks/03-add-a-pipeline-task.md` |
57
+ | Pick a reasoning strategy | `docs/runbooks/04-pick-reasoning-strategy.md` |
58
+ | Write prompts | `docs/runbooks/05-write-prompts.md` |
59
+ | Test your agent | `docs/runbooks/06-test-your-agent.md` |
60
+ | Debug a run | `docs/runbooks/07-debug-a-run.md` |
61
+ | Add memory / persistence | `docs/runbooks/08-add-memory.md` |
62
+ | Add MCP servers | `docs/runbooks/09-add-mcp.md` |
63
+ | Add evaluators | `docs/runbooks/10-add-evaluators.md` |
64
+ | Add safety guardrails | `docs/runbooks/11-add-safety-guardrails.md` |
65
+ | Add observability | `docs/runbooks/12-add-observability.md` |
66
+ | Configure multi-provider | `docs/runbooks/13-configure-multi-provider.md` |
67
+ | Deploy your agent | `docs/runbooks/14-deploy-your-agent.md` |
68
+ | Upgrade your agent | `docs/runbooks/15-upgrade-your-agent.md` |
69
+ | Configuration reference | `docs/runbooks/16-configuration-reference.md` |
70
+ | Add a reranker | `docs/runbooks/17-add-reranker.md` |
71
+ | Add hybrid search (BM25 + vector) | `docs/runbooks/18-add-hybrid-search.md` |
72
+ | Add GraphRAG (graph-augmented retrieval) | `docs/runbooks/19-add-graphrag.md` |
73
+ | Apply schema migrations | `docs/runbooks/20-apply-schema-migrations.md` |
74
+ | Use streaming guardrails (sentence-window) | `docs/runbooks/21-use-streaming-guardrails.md` |
75
+
76
+ ## Anti-patterns (do not suggest these)
77
+
78
+ - **LangChain idioms** (`LCEL`, `Runnable`, `RunnablePassthrough`)
79
+ — wrong framework. Use AgentForge's `Agent` + `@tool` + strategy
80
+ composition instead.
81
+ - **Hand-rolling JSON schemas for tools** — use type hints on a
82
+ `@tool`-decorated function and the schema is derived.
83
+ - **Storing API keys in `agentforge.yaml` literals** — use
84
+ `${ENV_VAR}` interpolation; secrets live in `.env` (gitignored).
85
+ - **Catching exceptions inside tool code to "make the agent more
86
+ robust"** — let them surface; the framework records them as
87
+ observations and the LLM recovers naturally.
88
+ - **Adding a wrapper around `Agent.run()` to add logging** — the
89
+ framework already logs; install a custom hook (runbook 12).
90
+ - **Writing SQL directly against the memory backend** — use
91
+ `agent.memory.put / .get / .query`; SQL bypasses the contract
92
+ and breaks driver swaps.
93
+ - **Running expensive code outside `BudgetPolicy`** — every
94
+ external call (LLM, tool, retriever) should respect the run's
95
+ budget.
96
+
97
+ ## Pre-commit checks (run these before suggesting a commit)
98
+
99
+ ```bash
100
+ agentforge config validate
101
+ agentforge status # no managed file should be drifted
102
+ pytest -q
103
+ ruff check
104
+ ```
105
+
106
+ ## Where to find the framework's truth
107
+
108
+ - Locked ABCs: `agentforge_core/contracts/*.py`
109
+ - Default config schema: `agentforge_core/config/schema.py`
110
+ - Built-in evaluators / guardrails / tools: `agentforge/*/`
111
+ - Spec for any feature: `docs/features/feat-NNN-*.md` in the
112
+ framework repo
113
+
114
+ When in doubt, prefer the framework's locked types over inventing
115
+ new shapes. Suggesting an `Agent` subclass or a new ABC is almost
116
+ always wrong — the framework is composed from named modules.
117
+
118
+ <!-- agentforge:end-managed -->
119
+
120
+ <!-- agentforge:custom -->
121
+ <!-- Project-specific AI assistant instructions go here. This
122
+ section survives `agentforge upgrade`. -->
123
+ <!-- agentforge:end-custom -->
@@ -0,0 +1,13 @@
1
+ > See [AGENTS.md](./AGENTS.md) for AgentForge conventions. Claude
2
+ > Code reads both files; **AGENTS.md is the canonical source**.
3
+
4
+ This file exists so Claude Code's native `CLAUDE.md` discovery
5
+ keeps working. Put Claude-Code-specific instructions in the custom
6
+ section below.
7
+
8
+ <!-- agentforge:end-managed -->
9
+
10
+ <!-- agentforge:custom -->
11
+ <!-- Claude Code-specific instructions go here. This section
12
+ survives `agentforge upgrade`. -->
13
+ <!-- agentforge:end-custom -->