dialectica 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dialectica/.env.example +36 -0
- dialectica/__init__.py +74 -0
- dialectica/agent.py +149 -0
- dialectica/agent_factory.py +139 -0
- dialectica/agent_runtime.py +28 -0
- dialectica/coordinator.py +212 -0
- dialectica/gan_evaluator.py +199 -0
- dialectica/generation.py +109 -0
- dialectica/llm_config.py +87 -0
- dialectica/models.py +73 -0
- dialectica/protocols.py +60 -0
- dialectica/selection.py +20 -0
- dialectica/synthesis.py +58 -0
- dialectica/validation.py +42 -0
- dialectica-0.3.0.dist-info/METADATA +475 -0
- dialectica-0.3.0.dist-info/RECORD +18 -0
- dialectica-0.3.0.dist-info/WHEEL +4 -0
- dialectica-0.3.0.dist-info/licenses/LICENSE +21 -0
dialectica/.env.example
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Example Environment Variables
|
|
2
|
+
# Copy this file to .env and fill in your actual credentials and configurations.
|
|
3
|
+
|
|
4
|
+
# --- Google AI Studio / Vertex AI ---
|
|
5
|
+
# Set GOOGLE_GENAI_USE_VERTEXAI to False if using Google AI Studio API Key
|
|
6
|
+
GOOGLE_GENAI_USE_VERTEXAI=False
|
|
7
|
+
# Provide your Google AI Studio API Key if GOOGLE_GENAI_USE_VERTEXAI is False
|
|
8
|
+
# GOOGLE_API_KEY=AIzaSy...
|
|
9
|
+
|
|
10
|
+
# OR
|
|
11
|
+
|
|
12
|
+
# Set GOOGLE_GENAI_USE_VERTEXAI to True if using Vertex AI on Google Cloud
|
|
13
|
+
# GOOGLE_GENAI_USE_VERTEXAI=True
|
|
14
|
+
# Provide your Google Cloud Project ID and Location if using Vertex AI
|
|
15
|
+
# GOOGLE_CLOUD_PROJECT="your-project-id"
|
|
16
|
+
# GOOGLE_CLOUD_LOCATION="your-location" #e.g. us-central1
|
|
17
|
+
|
|
18
|
+
# --- OpenAI / Compatible ---
|
|
19
|
+
# Provide your OpenAI API Key (or compatible service key)
|
|
20
|
+
# OPENAI_API_KEY=sk-...
|
|
21
|
+
# Provide the Base URL for the API endpoint (if not using standard OpenAI)
|
|
22
|
+
# OPENAI_API_BASE=https://api.example.com/v1
|
|
23
|
+
|
|
24
|
+
# --- OpenRouter ---
|
|
25
|
+
# Provide your OpenRouter API Key
|
|
26
|
+
# OPENROUTER_API_KEY=sk-or-v1-...
|
|
27
|
+
|
|
28
|
+
# --- Model Configuration (ADK 2.0+) ---
|
|
29
|
+
# Default model for all dynamically created agents.
|
|
30
|
+
# Format: provider:model_name (e.g., google:gemini-3.5-flash, openai:gpt-4o, openrouter:google/gemini-3.1-pro)
|
|
31
|
+
DEFAULT_MODEL_CONFIG=google:gemini-3.5-flash
|
|
32
|
+
|
|
33
|
+
# Optional: Role-specific overrides (these override DEFAULT_MODEL_CONFIG for specific roles)
|
|
34
|
+
# GENERATOR_MODEL_CONFIG=google:gemini-3.1-pro
|
|
35
|
+
# DISCRIMINATOR_MODEL_CONFIG=google:gemini-3.1-pro
|
|
36
|
+
# SYNTHESIZER_MODEL_CONFIG=google:gemini-3.1-pro
|
dialectica/__init__.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dialectica — a pluggable adversarial reasoning engine.
|
|
3
|
+
|
|
4
|
+
The Engine runs a beam-style tree search and delegates each stage to a
|
|
5
|
+
swappable component (Generator / Evaluator / Selector / Synthesizer):
|
|
6
|
+
thesis -> antithesis -> synthesis. The defaults give a Tree-of-Thoughts +
|
|
7
|
+
GAN-adversarial pipeline, but any stage can be replaced without touching the
|
|
8
|
+
engine.
|
|
9
|
+
|
|
10
|
+
Main Components:
|
|
11
|
+
- Engine: Runs the search control flow over pluggable stages
|
|
12
|
+
- Generator / Evaluator / Selector / Synthesizer: the stage protocols
|
|
13
|
+
- LlmGenerator, AdversarialEvaluator/SinglePassEvaluator, BeamSearch/GreedySearch,
|
|
14
|
+
LlmSynthesizer: the default implementations
|
|
15
|
+
- ThoughtData / EvaluationResult: data models
|
|
16
|
+
|
|
17
|
+
Example:
|
|
18
|
+
from dialectica import create_engine
|
|
19
|
+
|
|
20
|
+
engine = create_engine("Your problem here")
|
|
21
|
+
result = await engine.run()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
Configuration is read from ``os.environ`` — as a library, Dialectica does NOT
|
|
25
|
+
load ``.env`` itself; the consuming application owns environment setup.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from .agent import (
|
|
29
|
+
Engine,
|
|
30
|
+
build_default_components,
|
|
31
|
+
create_coordinator,
|
|
32
|
+
create_engine,
|
|
33
|
+
run_tot_workflow,
|
|
34
|
+
)
|
|
35
|
+
from .agent_factory import ROLE_TEMPLATES, create_agent
|
|
36
|
+
from .coordinator import Coordinator
|
|
37
|
+
from .gan_evaluator import AdversarialEvaluator, SinglePassEvaluator
|
|
38
|
+
from .generation import LlmGenerator
|
|
39
|
+
from .models import DiscriminatorVerdict, EvaluationResult, ThoughtData
|
|
40
|
+
from .protocols import Evaluator, Generator, Selector, Synthesizer
|
|
41
|
+
from .selection import BeamSearch, GreedySearch
|
|
42
|
+
from .synthesis import LlmSynthesizer
|
|
43
|
+
|
|
44
|
+
__all__ = [
|
|
45
|
+
# Main entry points
|
|
46
|
+
"create_engine",
|
|
47
|
+
"Engine",
|
|
48
|
+
"build_default_components",
|
|
49
|
+
"run_tot_workflow",
|
|
50
|
+
# Backward-compatible aliases
|
|
51
|
+
"create_coordinator",
|
|
52
|
+
"Coordinator",
|
|
53
|
+
# Stage protocols (the pluggable interfaces)
|
|
54
|
+
"Generator",
|
|
55
|
+
"Evaluator",
|
|
56
|
+
"Selector",
|
|
57
|
+
"Synthesizer",
|
|
58
|
+
# Default stage implementations
|
|
59
|
+
"LlmGenerator",
|
|
60
|
+
"AdversarialEvaluator",
|
|
61
|
+
"SinglePassEvaluator",
|
|
62
|
+
"BeamSearch",
|
|
63
|
+
"GreedySearch",
|
|
64
|
+
"LlmSynthesizer",
|
|
65
|
+
# Data models
|
|
66
|
+
"ThoughtData",
|
|
67
|
+
"EvaluationResult",
|
|
68
|
+
"DiscriminatorVerdict",
|
|
69
|
+
# Agent creation
|
|
70
|
+
"create_agent",
|
|
71
|
+
"ROLE_TEMPLATES",
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
__version__ = "0.3.0"
|
dialectica/agent.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tree of Thoughts with GAN-style Adversarial Evaluation — composition root.
|
|
3
|
+
|
|
4
|
+
``create_coordinator`` wires the default pluggable components (LLM generator,
|
|
5
|
+
GAN evaluator, beam-search selector, LLM synthesizer) into a ``Coordinator``.
|
|
6
|
+
To customize, build the components yourself and construct ``Coordinator``
|
|
7
|
+
directly — see ``build_default_components``.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
from dialectica.agent import create_coordinator
|
|
11
|
+
|
|
12
|
+
coordinator = create_coordinator("Your problem statement here")
|
|
13
|
+
result = await coordinator.run()
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
from .agent_factory import create_agent
|
|
20
|
+
from .coordinator import Coordinator
|
|
21
|
+
from .gan_evaluator import AdversarialEvaluator
|
|
22
|
+
from .generation import LlmGenerator
|
|
23
|
+
from .llm_config import get_model_config
|
|
24
|
+
from .models import DiscriminatorVerdict
|
|
25
|
+
from .protocols import Evaluator, Generator, Selector, Synthesizer
|
|
26
|
+
from .selection import BeamSearch
|
|
27
|
+
from .synthesis import LlmSynthesizer
|
|
28
|
+
|
|
29
|
+
# Configure logging
|
|
30
|
+
logging.basicConfig(
|
|
31
|
+
level=logging.INFO,
|
|
32
|
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
33
|
+
)
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def build_default_components(
|
|
38
|
+
beam_width: int = 3,
|
|
39
|
+
max_gan_rounds: int = 3,
|
|
40
|
+
score_threshold: float = 7.0,
|
|
41
|
+
synthesizer_model: Optional[str] = None,
|
|
42
|
+
) -> tuple[Generator, Evaluator, Selector, Synthesizer]:
|
|
43
|
+
"""Build the default (generator, evaluator, selector, synthesizer).
|
|
44
|
+
|
|
45
|
+
The generator agent is shared with the evaluator so it is created once and
|
|
46
|
+
reused for both generation and GAN refinement.
|
|
47
|
+
"""
|
|
48
|
+
generator_agent = create_agent(
|
|
49
|
+
role="Generator",
|
|
50
|
+
role_name="Generator",
|
|
51
|
+
model_config=get_model_config("GENERATOR"),
|
|
52
|
+
)
|
|
53
|
+
discriminator_agent = create_agent(
|
|
54
|
+
role="Discriminator",
|
|
55
|
+
role_name="Discriminator",
|
|
56
|
+
model_config=get_model_config("DISCRIMINATOR"),
|
|
57
|
+
output_schema=DiscriminatorVerdict,
|
|
58
|
+
)
|
|
59
|
+
synthesizer_agent = create_agent(
|
|
60
|
+
role="Synthesizer",
|
|
61
|
+
role_name="Synthesizer",
|
|
62
|
+
model_config=synthesizer_model or get_model_config("SYNTHESIZER"),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
generator = LlmGenerator(generator_agent)
|
|
66
|
+
evaluator = AdversarialEvaluator(
|
|
67
|
+
generator=generator_agent,
|
|
68
|
+
discriminator=discriminator_agent,
|
|
69
|
+
max_rounds=max_gan_rounds,
|
|
70
|
+
score_threshold=score_threshold,
|
|
71
|
+
)
|
|
72
|
+
selector = BeamSearch(width=beam_width)
|
|
73
|
+
synthesizer = LlmSynthesizer(synthesizer_agent)
|
|
74
|
+
return generator, evaluator, selector, synthesizer
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def create_coordinator(
|
|
78
|
+
problem: str,
|
|
79
|
+
max_depth: int = 4,
|
|
80
|
+
beam_width: int = 3,
|
|
81
|
+
max_gan_rounds: int = 3,
|
|
82
|
+
score_threshold: float = 7.0,
|
|
83
|
+
synthesizer_model: Optional[str] = None,
|
|
84
|
+
) -> Coordinator:
|
|
85
|
+
"""Create a Coordinator wired with the default ToT + GAN components.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
problem: The problem statement to solve
|
|
89
|
+
max_depth: Maximum depth of the thought tree (default: 4)
|
|
90
|
+
beam_width: Number of top candidates the beam keeps (default: 3)
|
|
91
|
+
max_gan_rounds: Maximum adversarial refinement rounds (default: 3)
|
|
92
|
+
score_threshold: Minimum score for a thought to continue (default: 7.0)
|
|
93
|
+
synthesizer_model: Optional specific model for synthesis
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
Configured Coordinator instance
|
|
97
|
+
|
|
98
|
+
Example:
|
|
99
|
+
>>> coordinator = create_coordinator("Design a sustainable urban transport system")
|
|
100
|
+
>>> result = await coordinator.run()
|
|
101
|
+
>>> print(result["final_answer"])
|
|
102
|
+
"""
|
|
103
|
+
logger.info(f"Creating coordinator for problem: {problem[:50]}...")
|
|
104
|
+
|
|
105
|
+
generator, evaluator, selector, synthesizer = build_default_components(
|
|
106
|
+
beam_width=beam_width,
|
|
107
|
+
max_gan_rounds=max_gan_rounds,
|
|
108
|
+
score_threshold=score_threshold,
|
|
109
|
+
synthesizer_model=synthesizer_model,
|
|
110
|
+
)
|
|
111
|
+
return Coordinator(
|
|
112
|
+
problem=problem,
|
|
113
|
+
generator=generator,
|
|
114
|
+
evaluator=evaluator,
|
|
115
|
+
selector=selector,
|
|
116
|
+
synthesizer=synthesizer,
|
|
117
|
+
max_depth=max_depth,
|
|
118
|
+
score_threshold=score_threshold,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# Convenience wrapper for embedding the engine in other async code.
|
|
123
|
+
async def run_tot_workflow(problem: str, **kwargs):
|
|
124
|
+
"""Run a complete ToT workflow for a given problem.
|
|
125
|
+
|
|
126
|
+
Convenience wrapper for frameworks that need a simple async function.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
Dictionary with final_answer, thought_tree, best_path, and stats
|
|
130
|
+
"""
|
|
131
|
+
coordinator = create_coordinator(problem, **kwargs)
|
|
132
|
+
return await coordinator.run()
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# Canonical Dialectica names. The Coordinator/create_coordinator names are kept
|
|
136
|
+
# as aliases for backward compatibility.
|
|
137
|
+
create_engine = create_coordinator
|
|
138
|
+
Engine = Coordinator
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
__all__ = [
|
|
142
|
+
"create_engine",
|
|
143
|
+
"Engine",
|
|
144
|
+
"build_default_components",
|
|
145
|
+
"run_tot_workflow",
|
|
146
|
+
# Backward-compatible aliases
|
|
147
|
+
"create_coordinator",
|
|
148
|
+
"Coordinator",
|
|
149
|
+
]
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""Dynamic agent factory for creating specialist agents at runtime.
|
|
2
|
+
|
|
3
|
+
Creates LlmAgent instances from role templates (Generator, Discriminator,
|
|
4
|
+
Synthesizer, ...) with per-role prompts, tools, and model configuration.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from google.adk.agents import LlmAgent
|
|
11
|
+
from google.adk.tools import google_search
|
|
12
|
+
|
|
13
|
+
from .llm_config import get_model_config
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Agent role templates - used to generate instructions for common roles
|
|
19
|
+
ROLE_TEMPLATES = {
|
|
20
|
+
"Generator": {
|
|
21
|
+
"system_prompt": """You are a {role_name} responsible for generating high-quality thoughts.
|
|
22
|
+
|
|
23
|
+
Your task:
|
|
24
|
+
- Generate creative, diverse, and well-reasoned thought branches
|
|
25
|
+
- Each thought should be distinct and explore different angles
|
|
26
|
+
- Build on the parent context when provided
|
|
27
|
+
- Be specific and actionable, not vague or generic
|
|
28
|
+
|
|
29
|
+
{additional_context}
|
|
30
|
+
|
|
31
|
+
Generate thoughts that advance the problem-solving process.""",
|
|
32
|
+
"tools": [],
|
|
33
|
+
},
|
|
34
|
+
"Discriminator": {
|
|
35
|
+
"system_prompt": """You are a {role_name} responsible for critically evaluating thoughts.
|
|
36
|
+
|
|
37
|
+
Your task:
|
|
38
|
+
- Evaluate thoughts with rigorous skepticism
|
|
39
|
+
- Identify logical flaws, weak assumptions, and potential issues
|
|
40
|
+
- Provide specific, actionable feedback for improvement
|
|
41
|
+
- Assess feasibility and quality objectively
|
|
42
|
+
- Recommend termination only if the path is fundamentally flawed
|
|
43
|
+
|
|
44
|
+
{additional_context}
|
|
45
|
+
|
|
46
|
+
Your evaluation will drive iterative refinement, so be thorough and constructive.""",
|
|
47
|
+
"tools": [],
|
|
48
|
+
},
|
|
49
|
+
"ResearchGenerator": {
|
|
50
|
+
"system_prompt": """You are a {role_name} that combines research with thought generation.
|
|
51
|
+
|
|
52
|
+
Your task:
|
|
53
|
+
- Use the google_search tool to gather relevant information
|
|
54
|
+
- Generate thoughts grounded in real-world facts and data
|
|
55
|
+
- Synthesize research findings into actionable ideas
|
|
56
|
+
- Cite sources when making claims
|
|
57
|
+
|
|
58
|
+
{additional_context}
|
|
59
|
+
|
|
60
|
+
Generate informed thoughts that leverage external knowledge.""",
|
|
61
|
+
"tools": [google_search],
|
|
62
|
+
},
|
|
63
|
+
"Synthesizer": {
|
|
64
|
+
"system_prompt": """You are a {role_name} responsible for integrating insights into a final answer.
|
|
65
|
+
|
|
66
|
+
Your task:
|
|
67
|
+
- Analyze the best-performing thought branches
|
|
68
|
+
- Identify common themes and complementary insights
|
|
69
|
+
- Synthesize a coherent, comprehensive solution
|
|
70
|
+
- Resolve any conflicts between different approaches
|
|
71
|
+
- Present the final answer clearly and completely
|
|
72
|
+
|
|
73
|
+
{additional_context}
|
|
74
|
+
|
|
75
|
+
Create a unified solution from the strongest reasoning paths.""",
|
|
76
|
+
"tools": [],
|
|
77
|
+
},
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def create_agent(
|
|
82
|
+
role: str,
|
|
83
|
+
role_name: str | None = None,
|
|
84
|
+
additional_context: str = "",
|
|
85
|
+
tools: list[Any] | None = None,
|
|
86
|
+
model_config: str | None = None,
|
|
87
|
+
output_schema: type | None = None,
|
|
88
|
+
) -> LlmAgent:
|
|
89
|
+
"""Create a specialist agent with a specific role.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
role: The agent role (Generator, Discriminator, ResearchGenerator, Synthesizer)
|
|
93
|
+
role_name: Optional custom name for the role (defaults to role)
|
|
94
|
+
additional_context: Extra context to inject into the system prompt
|
|
95
|
+
tools: Optional list of tools to give the agent
|
|
96
|
+
model_config: Optional model config string (defaults to role-based config)
|
|
97
|
+
output_schema: Optional Pydantic model forcing structured JSON output.
|
|
98
|
+
ADK disallows combining ``output_schema`` with tools, so tools are
|
|
99
|
+
dropped when a schema is supplied.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
LlmAgent configured for the specified role
|
|
103
|
+
"""
|
|
104
|
+
if role not in ROLE_TEMPLATES:
|
|
105
|
+
logger.warning("Unknown role '%s', using Generator template", role)
|
|
106
|
+
role = "Generator"
|
|
107
|
+
|
|
108
|
+
template = ROLE_TEMPLATES[role]
|
|
109
|
+
effective_role_name = role_name or role
|
|
110
|
+
|
|
111
|
+
# Build the system prompt
|
|
112
|
+
system_prompt = template["system_prompt"].format(
|
|
113
|
+
role_name=effective_role_name,
|
|
114
|
+
additional_context=additional_context,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# output_schema and tools are mutually exclusive in ADK.
|
|
118
|
+
effective_tools = [] if output_schema else (tools if tools is not None else template["tools"])
|
|
119
|
+
|
|
120
|
+
# Get model config (use role-specific override if available)
|
|
121
|
+
effective_model = model_config if model_config else get_model_config(role)
|
|
122
|
+
|
|
123
|
+
agent = LlmAgent(
|
|
124
|
+
name=effective_role_name,
|
|
125
|
+
instruction=system_prompt,
|
|
126
|
+
model=effective_model,
|
|
127
|
+
tools=effective_tools,
|
|
128
|
+
output_schema=output_schema,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
logger.info(
|
|
132
|
+
"Created agent '%s' (role=%s, tools=%d, structured=%s)",
|
|
133
|
+
effective_role_name,
|
|
134
|
+
role,
|
|
135
|
+
len(effective_tools),
|
|
136
|
+
output_schema is not None,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
return agent
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Single entry point for invoking an LlmAgent.
|
|
2
|
+
|
|
3
|
+
Centralizing the ADK Runner call gives every pluggable component (generator,
|
|
4
|
+
evaluator, synthesizer) one shared seam — which is also the one place tests
|
|
5
|
+
patch to run the engine without the network.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
|
|
10
|
+
from google.adk.agents import LlmAgent
|
|
11
|
+
from google.adk.runners import InMemoryRunner
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
async def run_agent(agent: LlmAgent, instruction: str) -> str:
|
|
17
|
+
"""Run ``agent`` on ``instruction`` and return its concatenated text output."""
|
|
18
|
+
runner = InMemoryRunner(agent=agent, app_name="dialectica")
|
|
19
|
+
events = await runner.run_debug(instruction, quiet=True)
|
|
20
|
+
|
|
21
|
+
response_text = ""
|
|
22
|
+
for event in events:
|
|
23
|
+
if event.content and event.content.parts:
|
|
24
|
+
for part in event.content.parts:
|
|
25
|
+
if part.text and not part.thought:
|
|
26
|
+
response_text += part.text
|
|
27
|
+
|
|
28
|
+
return response_text.strip()
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""Tree-of-Thoughts search engine.
|
|
2
|
+
|
|
3
|
+
The Coordinator owns the search *control flow* (build root -> expand frontier ->
|
|
4
|
+
score -> select -> synthesize) but delegates every decision to injected,
|
|
5
|
+
swappable components: a ``Generator``, an ``Evaluator``, a ``Selector`` and a
|
|
6
|
+
``Synthesizer``. Swap any of them to retarget the engine without touching this
|
|
7
|
+
file — that is what makes it general-purpose rather than a single hardcoded
|
|
8
|
+
ToT+GAN pipeline.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from .models import ThoughtData, score_of
|
|
16
|
+
from .protocols import Evaluator, Generator, Selector, Synthesizer
|
|
17
|
+
from .validation import validate_thought_node
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Coordinator:
|
|
23
|
+
"""Runs a beam-style tree search using pluggable stage components.
|
|
24
|
+
|
|
25
|
+
Phases:
|
|
26
|
+
1. Initialize - create the root, expand it into strategies, score them.
|
|
27
|
+
2. Explore - iteratively select the frontier, expand, score, re-select.
|
|
28
|
+
3. Synthesize - combine the evaluated thoughts into a final answer.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
problem: str,
|
|
34
|
+
generator: Generator,
|
|
35
|
+
evaluator: Evaluator,
|
|
36
|
+
selector: Selector,
|
|
37
|
+
synthesizer: Synthesizer,
|
|
38
|
+
max_depth: int = 4,
|
|
39
|
+
score_threshold: float = 7.0,
|
|
40
|
+
):
|
|
41
|
+
self.problem = problem
|
|
42
|
+
self.generator = generator
|
|
43
|
+
self.evaluator = evaluator
|
|
44
|
+
self.selector = selector
|
|
45
|
+
self.synthesizer = synthesizer
|
|
46
|
+
self.max_depth = max_depth
|
|
47
|
+
self.score_threshold = score_threshold
|
|
48
|
+
|
|
49
|
+
# State
|
|
50
|
+
self.thought_tree: dict[str, ThoughtData] = {}
|
|
51
|
+
self.active_beam: list[str] = []
|
|
52
|
+
|
|
53
|
+
logger.info(f"Coordinator initialized for problem: {problem[:50]}...")
|
|
54
|
+
|
|
55
|
+
async def run(self) -> dict[str, Any]:
|
|
56
|
+
"""Execute the full search and return the answer plus tree and stats."""
|
|
57
|
+
start_time = datetime.now()
|
|
58
|
+
|
|
59
|
+
logger.info("Phase 1: Initializing thought tree")
|
|
60
|
+
await self._initialize()
|
|
61
|
+
|
|
62
|
+
logger.info("Phase 2: Exploring with beam search")
|
|
63
|
+
await self._explore()
|
|
64
|
+
|
|
65
|
+
logger.info("Phase 3: Synthesizing final answer")
|
|
66
|
+
final_answer = await self.synthesizer.synthesize(
|
|
67
|
+
self.problem, list(self.thought_tree.values())
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
duration = (datetime.now() - start_time).total_seconds()
|
|
71
|
+
return {
|
|
72
|
+
"final_answer": final_answer,
|
|
73
|
+
"thought_tree": {k: v.model_dump() for k, v in self.thought_tree.items()},
|
|
74
|
+
"best_path": self._get_best_path(),
|
|
75
|
+
"stats": {
|
|
76
|
+
"total_thoughts": len(self.thought_tree),
|
|
77
|
+
"max_depth_reached": max(t.depth for t in self.thought_tree.values()),
|
|
78
|
+
"duration_seconds": duration,
|
|
79
|
+
},
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async def _initialize(self):
|
|
83
|
+
"""Phase 1: create the root, expand into strategies, score each."""
|
|
84
|
+
root = self._add_node("root", parent_id=None, content=self.problem, depth=0, status="active")
|
|
85
|
+
|
|
86
|
+
logger.info("Generating initial strategies")
|
|
87
|
+
strategies = await self.generator.expand(root, self.problem)
|
|
88
|
+
|
|
89
|
+
strategy_ids = []
|
|
90
|
+
for i, strategy in enumerate(strategies):
|
|
91
|
+
node = self._add_node(f"root_s{i}", parent_id="root", content=strategy, depth=1)
|
|
92
|
+
if node is not None:
|
|
93
|
+
strategy_ids.append(node.thoughtId)
|
|
94
|
+
|
|
95
|
+
# Score strategies before beam selection so the beam reflects merit,
|
|
96
|
+
# not generation order. The beam is the strategies clearing the bar.
|
|
97
|
+
self.active_beam = []
|
|
98
|
+
for sid in strategy_ids:
|
|
99
|
+
score = await self._evaluate_node(self.thought_tree[sid], self.problem)
|
|
100
|
+
if score >= self.score_threshold:
|
|
101
|
+
self.active_beam.append(sid)
|
|
102
|
+
|
|
103
|
+
# Don't stall the whole run if nothing cleared the bar: seed exploration
|
|
104
|
+
# with the best strategies the selector would keep.
|
|
105
|
+
if not self.active_beam and strategy_ids:
|
|
106
|
+
kept = self.selector.select([self.thought_tree[sid] for sid in strategy_ids])
|
|
107
|
+
self.active_beam = [n.thoughtId for n in kept]
|
|
108
|
+
logger.info("No strategy passed threshold; seeding beam with top %d", len(self.active_beam))
|
|
109
|
+
|
|
110
|
+
logger.info(
|
|
111
|
+
"Scored %d strategies; %d entered the beam", len(strategy_ids), len(self.active_beam)
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
async def _explore(self):
|
|
115
|
+
"""Phase 2: beam search — select, expand, score, repeat."""
|
|
116
|
+
iteration = 0
|
|
117
|
+
while self.active_beam and iteration < self.max_depth:
|
|
118
|
+
iteration += 1
|
|
119
|
+
logger.info(f"Explore iteration {iteration}, beam size: {len(self.active_beam)}")
|
|
120
|
+
|
|
121
|
+
frontier = self.selector.select([self.thought_tree[nid] for nid in self.active_beam])
|
|
122
|
+
|
|
123
|
+
new_beam: list[str] = []
|
|
124
|
+
for parent in frontier:
|
|
125
|
+
if parent.depth >= self.max_depth:
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
children = await self.generator.expand(parent, self.problem)
|
|
129
|
+
for i, content in enumerate(children):
|
|
130
|
+
child = self._add_node(
|
|
131
|
+
f"{parent.thoughtId}_c{i}",
|
|
132
|
+
parent_id=parent.thoughtId,
|
|
133
|
+
content=content,
|
|
134
|
+
depth=parent.depth + 1,
|
|
135
|
+
)
|
|
136
|
+
if child is None:
|
|
137
|
+
continue
|
|
138
|
+
score = await self._evaluate_node(child, parent.thought)
|
|
139
|
+
if score >= self.score_threshold:
|
|
140
|
+
new_beam.append(child.thoughtId)
|
|
141
|
+
|
|
142
|
+
self.active_beam = new_beam
|
|
143
|
+
logger.info(f"Iteration {iteration} complete, new beam size: {len(self.active_beam)}")
|
|
144
|
+
|
|
145
|
+
if not self.active_beam:
|
|
146
|
+
logger.info("No candidates meet threshold, stopping exploration")
|
|
147
|
+
break
|
|
148
|
+
|
|
149
|
+
async def _evaluate_node(self, node: ThoughtData, parent_thought: str) -> float:
|
|
150
|
+
"""Score ``node`` via the evaluator, persisting the refined thought.
|
|
151
|
+
|
|
152
|
+
The evaluator scores the *refined* thought, so the node's text is
|
|
153
|
+
updated to that refined version — otherwise synthesis would run on the
|
|
154
|
+
original wording while reporting the improved score.
|
|
155
|
+
"""
|
|
156
|
+
result = await self.evaluator.evaluate(
|
|
157
|
+
thought_content=node.thought,
|
|
158
|
+
context={
|
|
159
|
+
"problem": self.problem,
|
|
160
|
+
"parent_thought": parent_thought,
|
|
161
|
+
"depth": node.depth,
|
|
162
|
+
},
|
|
163
|
+
)
|
|
164
|
+
node.thought = result.refined_thought or node.thought
|
|
165
|
+
node.evaluationScore = result.score
|
|
166
|
+
node.status = "evaluated"
|
|
167
|
+
node.adversarialRounds = result.adversarial_rounds
|
|
168
|
+
node.refinementHistory = result.history
|
|
169
|
+
return result.score
|
|
170
|
+
|
|
171
|
+
def _add_node(
|
|
172
|
+
self,
|
|
173
|
+
node_id: str,
|
|
174
|
+
parent_id: str | None,
|
|
175
|
+
content: str,
|
|
176
|
+
depth: int,
|
|
177
|
+
status: str = "generated",
|
|
178
|
+
) -> ThoughtData | None:
|
|
179
|
+
"""Validate and insert a node; return it, or None if validation fails."""
|
|
180
|
+
node = validate_thought_node(
|
|
181
|
+
thought_id=node_id,
|
|
182
|
+
parent_id=parent_id,
|
|
183
|
+
content=content,
|
|
184
|
+
depth=depth,
|
|
185
|
+
status=status,
|
|
186
|
+
)
|
|
187
|
+
if node is None:
|
|
188
|
+
if node_id == "root":
|
|
189
|
+
raise ValueError(f"Root node validation failed for content: {content!r}")
|
|
190
|
+
logger.warning("Skipping invalid node %s", node_id)
|
|
191
|
+
return None
|
|
192
|
+
self.thought_tree[node_id] = node
|
|
193
|
+
return node
|
|
194
|
+
|
|
195
|
+
def _get_best_path(self) -> list[str]:
|
|
196
|
+
"""Get the path from root to the highest-scoring evaluated node."""
|
|
197
|
+
if not self.thought_tree:
|
|
198
|
+
return []
|
|
199
|
+
|
|
200
|
+
scored = [t for t in self.thought_tree.values() if t.evaluationScore is not None]
|
|
201
|
+
if not scored:
|
|
202
|
+
return ["root"] if "root" in self.thought_tree else []
|
|
203
|
+
|
|
204
|
+
best = max(scored, key=score_of)
|
|
205
|
+
path = []
|
|
206
|
+
current_id: str | None = best.thoughtId
|
|
207
|
+
while current_id:
|
|
208
|
+
path.append(current_id)
|
|
209
|
+
current = self.thought_tree.get(current_id)
|
|
210
|
+
current_id = current.parentId if current else None
|
|
211
|
+
|
|
212
|
+
return list(reversed(path))
|