fast-agent-mcp 0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fast-agent-mcp might be problematic. Click here for more details.
- fast_agent_mcp-0.0.7.dist-info/METADATA +322 -0
- fast_agent_mcp-0.0.7.dist-info/RECORD +100 -0
- fast_agent_mcp-0.0.7.dist-info/WHEEL +4 -0
- fast_agent_mcp-0.0.7.dist-info/entry_points.txt +5 -0
- fast_agent_mcp-0.0.7.dist-info/licenses/LICENSE +201 -0
- mcp_agent/__init__.py +0 -0
- mcp_agent/agents/__init__.py +0 -0
- mcp_agent/agents/agent.py +277 -0
- mcp_agent/app.py +303 -0
- mcp_agent/cli/__init__.py +0 -0
- mcp_agent/cli/__main__.py +4 -0
- mcp_agent/cli/commands/bootstrap.py +221 -0
- mcp_agent/cli/commands/config.py +11 -0
- mcp_agent/cli/commands/setup.py +229 -0
- mcp_agent/cli/main.py +68 -0
- mcp_agent/cli/terminal.py +24 -0
- mcp_agent/config.py +334 -0
- mcp_agent/console.py +28 -0
- mcp_agent/context.py +251 -0
- mcp_agent/context_dependent.py +48 -0
- mcp_agent/core/fastagent.py +1013 -0
- mcp_agent/eval/__init__.py +0 -0
- mcp_agent/event_progress.py +88 -0
- mcp_agent/executor/__init__.py +0 -0
- mcp_agent/executor/decorator_registry.py +120 -0
- mcp_agent/executor/executor.py +293 -0
- mcp_agent/executor/task_registry.py +34 -0
- mcp_agent/executor/temporal.py +405 -0
- mcp_agent/executor/workflow.py +197 -0
- mcp_agent/executor/workflow_signal.py +325 -0
- mcp_agent/human_input/__init__.py +0 -0
- mcp_agent/human_input/handler.py +49 -0
- mcp_agent/human_input/types.py +58 -0
- mcp_agent/logging/__init__.py +0 -0
- mcp_agent/logging/events.py +123 -0
- mcp_agent/logging/json_serializer.py +163 -0
- mcp_agent/logging/listeners.py +216 -0
- mcp_agent/logging/logger.py +365 -0
- mcp_agent/logging/rich_progress.py +120 -0
- mcp_agent/logging/tracing.py +140 -0
- mcp_agent/logging/transport.py +461 -0
- mcp_agent/mcp/__init__.py +0 -0
- mcp_agent/mcp/gen_client.py +85 -0
- mcp_agent/mcp/mcp_activity.py +18 -0
- mcp_agent/mcp/mcp_agent_client_session.py +242 -0
- mcp_agent/mcp/mcp_agent_server.py +56 -0
- mcp_agent/mcp/mcp_aggregator.py +394 -0
- mcp_agent/mcp/mcp_connection_manager.py +330 -0
- mcp_agent/mcp/stdio.py +104 -0
- mcp_agent/mcp_server_registry.py +275 -0
- mcp_agent/progress_display.py +10 -0
- mcp_agent/resources/examples/decorator/main.py +26 -0
- mcp_agent/resources/examples/decorator/optimizer.py +78 -0
- mcp_agent/resources/examples/decorator/orchestrator.py +68 -0
- mcp_agent/resources/examples/decorator/parallel.py +81 -0
- mcp_agent/resources/examples/decorator/router.py +56 -0
- mcp_agent/resources/examples/decorator/tiny.py +22 -0
- mcp_agent/resources/examples/mcp_researcher/main-evalopt.py +53 -0
- mcp_agent/resources/examples/mcp_researcher/main.py +38 -0
- mcp_agent/telemetry/__init__.py +0 -0
- mcp_agent/telemetry/usage_tracking.py +18 -0
- mcp_agent/workflows/__init__.py +0 -0
- mcp_agent/workflows/embedding/__init__.py +0 -0
- mcp_agent/workflows/embedding/embedding_base.py +61 -0
- mcp_agent/workflows/embedding/embedding_cohere.py +49 -0
- mcp_agent/workflows/embedding/embedding_openai.py +46 -0
- mcp_agent/workflows/evaluator_optimizer/__init__.py +0 -0
- mcp_agent/workflows/evaluator_optimizer/evaluator_optimizer.py +359 -0
- mcp_agent/workflows/intent_classifier/__init__.py +0 -0
- mcp_agent/workflows/intent_classifier/intent_classifier_base.py +120 -0
- mcp_agent/workflows/intent_classifier/intent_classifier_embedding.py +134 -0
- mcp_agent/workflows/intent_classifier/intent_classifier_embedding_cohere.py +45 -0
- mcp_agent/workflows/intent_classifier/intent_classifier_embedding_openai.py +45 -0
- mcp_agent/workflows/intent_classifier/intent_classifier_llm.py +161 -0
- mcp_agent/workflows/intent_classifier/intent_classifier_llm_anthropic.py +60 -0
- mcp_agent/workflows/intent_classifier/intent_classifier_llm_openai.py +60 -0
- mcp_agent/workflows/llm/__init__.py +0 -0
- mcp_agent/workflows/llm/augmented_llm.py +645 -0
- mcp_agent/workflows/llm/augmented_llm_anthropic.py +539 -0
- mcp_agent/workflows/llm/augmented_llm_openai.py +615 -0
- mcp_agent/workflows/llm/llm_selector.py +345 -0
- mcp_agent/workflows/llm/model_factory.py +175 -0
- mcp_agent/workflows/orchestrator/__init__.py +0 -0
- mcp_agent/workflows/orchestrator/orchestrator.py +407 -0
- mcp_agent/workflows/orchestrator/orchestrator_models.py +154 -0
- mcp_agent/workflows/orchestrator/orchestrator_prompts.py +113 -0
- mcp_agent/workflows/parallel/__init__.py +0 -0
- mcp_agent/workflows/parallel/fan_in.py +350 -0
- mcp_agent/workflows/parallel/fan_out.py +187 -0
- mcp_agent/workflows/parallel/parallel_llm.py +141 -0
- mcp_agent/workflows/router/__init__.py +0 -0
- mcp_agent/workflows/router/router_base.py +276 -0
- mcp_agent/workflows/router/router_embedding.py +240 -0
- mcp_agent/workflows/router/router_embedding_cohere.py +59 -0
- mcp_agent/workflows/router/router_embedding_openai.py +59 -0
- mcp_agent/workflows/router/router_llm.py +301 -0
- mcp_agent/workflows/swarm/__init__.py +0 -0
- mcp_agent/workflows/swarm/swarm.py +320 -0
- mcp_agent/workflows/swarm/swarm_anthropic.py +42 -0
- mcp_agent/workflows/swarm/swarm_openai.py +41 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
|
|
3
|
+
from mcp_agent.core.fastagent import FastAgent
|
|
4
|
+
# from rich import print
|
|
5
|
+
|
|
6
|
+
agents = FastAgent(name="Researcher")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@agents.agent(
|
|
10
|
+
"Researcher",
|
|
11
|
+
instruction="""
|
|
12
|
+
You are a research assistant, with access to internet search (via Brave),
|
|
13
|
+
website fetch, a python interpreter (you can install packages with uv) and a filesystem.
|
|
14
|
+
Use the current working directory to save and create files with both the Interpreter and Filesystem tools.
|
|
15
|
+
The interpreter has numpy, pandas, matplotlib and seaborn already installed
|
|
16
|
+
""",
|
|
17
|
+
servers=["brave", "interpreter", "filesystem", "fetch"],
|
|
18
|
+
)
|
|
19
|
+
async def main():
|
|
20
|
+
research_prompt = """
|
|
21
|
+
Produce an investment report for the company Eutelsat. The final report should be saved in the filesystem in markdown format, and
|
|
22
|
+
contain at least the following:
|
|
23
|
+
1 - A brief description of the company
|
|
24
|
+
2 - Current financial position (find data, create and incorporate charts)
|
|
25
|
+
3 - A PESTLE analysis
|
|
26
|
+
4 - An investment thesis for the next 3 years. Include both 'buy side' and 'sell side' arguments, and a final
|
|
27
|
+
summary and recommendation.
|
|
28
|
+
Todays date is 15 February 2025. Include the main data sources consulted in presenting the report.""" # noqa: F841
|
|
29
|
+
|
|
30
|
+
async with agents.run() as agent:
|
|
31
|
+
await agent.prompt()
|
|
32
|
+
|
|
33
|
+
# await agent.prompt(default="STOP")
|
|
34
|
+
# await agent.prompt(default=research_prompt)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
if __name__ == "__main__":
|
|
38
|
+
asyncio.run(main())
|
|
File without changes
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from mcp_agent.config import get_settings
|
|
3
|
+
|
|
4
|
+
logger = logging.getLogger(__name__)
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def send_usage_data():
|
|
8
|
+
config = get_settings()
|
|
9
|
+
if not config.usage_telemetry.enabled:
|
|
10
|
+
logger.info("Usage tracking is disabled")
|
|
11
|
+
return
|
|
12
|
+
|
|
13
|
+
# TODO: saqadri - implement usage tracking
|
|
14
|
+
# data = {"installation_id": str(uuid.uuid4()), "version": "0.1.0"}
|
|
15
|
+
# try:
|
|
16
|
+
# requests.post("https://telemetry.example.com/usage", json=data, timeout=2)
|
|
17
|
+
# except:
|
|
18
|
+
# pass
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Dict, List
|
|
3
|
+
|
|
4
|
+
from numpy import float32
|
|
5
|
+
from numpy.typing import NDArray
|
|
6
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
7
|
+
|
|
8
|
+
from mcp_agent.context_dependent import ContextDependent
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
FloatArray = NDArray[float32]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class EmbeddingModel(ABC, ContextDependent):
|
|
15
|
+
"""Abstract interface for embedding models"""
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
async def embed(self, data: List[str]) -> FloatArray:
|
|
19
|
+
"""
|
|
20
|
+
Generate embeddings for a list of messages
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
data: List of text strings to embed
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Array of embeddings, shape (len(texts), embedding_dim)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def embedding_dim(self) -> int:
|
|
32
|
+
"""Return the dimensionality of the embeddings"""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def compute_similarity_scores(
|
|
36
|
+
embedding_a: FloatArray, embedding_b: FloatArray
|
|
37
|
+
) -> Dict[str, float]:
|
|
38
|
+
"""
|
|
39
|
+
Compute different similarity metrics between embeddings
|
|
40
|
+
"""
|
|
41
|
+
# Reshape for sklearn's cosine_similarity
|
|
42
|
+
a_emb = embedding_a.reshape(1, -1)
|
|
43
|
+
b_emb = embedding_b.reshape(1, -1)
|
|
44
|
+
|
|
45
|
+
cosine_sim = float(cosine_similarity(a_emb, b_emb)[0, 0])
|
|
46
|
+
|
|
47
|
+
# Could add other similarity metrics here
|
|
48
|
+
return {
|
|
49
|
+
"cosine": cosine_sim,
|
|
50
|
+
# "euclidean": float(euclidean_similarity),
|
|
51
|
+
# "dot_product": float(dot_product)
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def compute_confidence(similarity_scores: Dict[str, float]) -> float:
|
|
56
|
+
"""
|
|
57
|
+
Compute overall confidence score from individual similarity metrics
|
|
58
|
+
"""
|
|
59
|
+
# For now, just use cosine similarity as confidence
|
|
60
|
+
# Could implement more sophisticated combination of scores
|
|
61
|
+
return similarity_scores["cosine"]
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from typing import List, Optional, TYPE_CHECKING
|
|
2
|
+
|
|
3
|
+
from cohere import Client
|
|
4
|
+
from numpy import array, float32
|
|
5
|
+
|
|
6
|
+
from mcp_agent.workflows.embedding.embedding_base import EmbeddingModel, FloatArray
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from mcp_agent.context import Context
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CohereEmbeddingModel(EmbeddingModel):
|
|
13
|
+
"""Cohere embedding model implementation"""
|
|
14
|
+
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
model: str = "embed-multilingual-v3.0",
|
|
18
|
+
context: Optional["Context"] = None,
|
|
19
|
+
**kwargs,
|
|
20
|
+
):
|
|
21
|
+
super().__init__(context=context, **kwargs)
|
|
22
|
+
self.client = Client(api_key=self.context.config.cohere.api_key)
|
|
23
|
+
self.model = model
|
|
24
|
+
# Cache the dimension since it's fixed per model
|
|
25
|
+
# https://docs.cohere.com/v2/docs/cohere-embed
|
|
26
|
+
self._embedding_dim = {
|
|
27
|
+
"embed-english-v2.0": 4096,
|
|
28
|
+
"embed-english-light-v2.0": 1024,
|
|
29
|
+
"embed-english-v3.0": 1024,
|
|
30
|
+
"embed-english-light-v3.0": 384,
|
|
31
|
+
"embed-multilingual-v2.0": 768,
|
|
32
|
+
"embed-multilingual-v3.0": 1024,
|
|
33
|
+
"embed-multilingual-light-v3.0": 384,
|
|
34
|
+
}[model]
|
|
35
|
+
|
|
36
|
+
async def embed(self, data: List[str]) -> FloatArray:
|
|
37
|
+
response = self.client.embed(
|
|
38
|
+
texts=data,
|
|
39
|
+
model=self.model,
|
|
40
|
+
input_type="classification",
|
|
41
|
+
embedding_types=["float"],
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
embeddings = array(response.embeddings, dtype=float32)
|
|
45
|
+
return embeddings
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def embedding_dim(self) -> int:
|
|
49
|
+
return self._embedding_dim
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from typing import List, Optional, TYPE_CHECKING
|
|
2
|
+
|
|
3
|
+
from numpy import array, float32, stack
|
|
4
|
+
from openai import OpenAI
|
|
5
|
+
|
|
6
|
+
from mcp_agent.workflows.embedding.embedding_base import EmbeddingModel, FloatArray
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from mcp_agent.context import Context
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class OpenAIEmbeddingModel(EmbeddingModel):
|
|
13
|
+
"""OpenAI embedding model implementation"""
|
|
14
|
+
|
|
15
|
+
def __init__(
|
|
16
|
+
self, model: str = "text-embedding-3-small", context: Optional["Context"] = None
|
|
17
|
+
):
|
|
18
|
+
super().__init__(context=context)
|
|
19
|
+
self.client = OpenAI(api_key=self.context.config.openai.api_key)
|
|
20
|
+
self.model = model
|
|
21
|
+
# Cache the dimension since it's fixed per model
|
|
22
|
+
self._embedding_dim = {
|
|
23
|
+
"text-embedding-3-small": 1536,
|
|
24
|
+
"text-embedding-3-large": 3072,
|
|
25
|
+
}[model]
|
|
26
|
+
|
|
27
|
+
async def embed(self, data: List[str]) -> FloatArray:
|
|
28
|
+
response = self.client.embeddings.create(
|
|
29
|
+
model=self.model, input=data, encoding_format="float"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Sort the embeddings by their index to ensure correct order
|
|
33
|
+
sorted_embeddings = sorted(response.data, key=lambda x: x["index"])
|
|
34
|
+
|
|
35
|
+
# Stack all embeddings into a single array
|
|
36
|
+
embeddings = stack(
|
|
37
|
+
[
|
|
38
|
+
array(embedding["embedding"], dtype=float32)
|
|
39
|
+
for embedding in sorted_embeddings
|
|
40
|
+
]
|
|
41
|
+
)
|
|
42
|
+
return embeddings
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def embedding_dim(self) -> int:
|
|
46
|
+
return self._embedding_dim
|
|
File without changes
|
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import Callable, List, Optional, Type, TYPE_CHECKING
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
from mcp_agent.workflows.llm.augmented_llm import (
|
|
7
|
+
AugmentedLLM,
|
|
8
|
+
MessageParamT,
|
|
9
|
+
MessageT,
|
|
10
|
+
ModelT,
|
|
11
|
+
RequestParams,
|
|
12
|
+
)
|
|
13
|
+
from mcp_agent.agents.agent import Agent
|
|
14
|
+
from mcp_agent.logging.logger import get_logger
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from mcp_agent.context import Context
|
|
18
|
+
|
|
19
|
+
logger = get_logger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class QualityRating(str, Enum):
|
|
23
|
+
"""Enum for evaluation quality ratings"""
|
|
24
|
+
|
|
25
|
+
POOR = 0 # Major improvements needed
|
|
26
|
+
FAIR = 1 # Several improvements needed
|
|
27
|
+
GOOD = 2 # Minor improvements possible
|
|
28
|
+
EXCELLENT = 3 # No improvements needed
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class EvaluationResult(BaseModel):
|
|
32
|
+
"""Model representing the evaluation result from the evaluator LLM"""
|
|
33
|
+
|
|
34
|
+
rating: QualityRating = Field(description="Quality rating of the response")
|
|
35
|
+
feedback: str = Field(
|
|
36
|
+
description="Specific feedback and suggestions for improvement"
|
|
37
|
+
)
|
|
38
|
+
needs_improvement: bool = Field(
|
|
39
|
+
description="Whether the output needs further improvement"
|
|
40
|
+
)
|
|
41
|
+
focus_areas: List[str] = Field(
|
|
42
|
+
default_factory=list, description="Specific areas to focus on in next iteration"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class EvaluatorOptimizerLLM(AugmentedLLM[MessageParamT, MessageT]):
|
|
47
|
+
"""
|
|
48
|
+
Implementation of the evaluator-optimizer workflow where one LLM generates responses
|
|
49
|
+
while another provides evaluation and feedback in a refinement loop.
|
|
50
|
+
|
|
51
|
+
This can be used either:
|
|
52
|
+
1. As a standalone workflow with its own optimizer agent
|
|
53
|
+
2. As a wrapper around another workflow (Orchestrator, Router, ParallelLLM) to add
|
|
54
|
+
evaluation and refinement capabilities
|
|
55
|
+
|
|
56
|
+
When to use this workflow:
|
|
57
|
+
- When you have clear evaluation criteria and iterative refinement provides value
|
|
58
|
+
- When LLM responses improve with articulated feedback
|
|
59
|
+
- When the task benefits from focused iteration on specific aspects
|
|
60
|
+
|
|
61
|
+
Examples:
|
|
62
|
+
- Literary translation with "expert" refinement
|
|
63
|
+
- Complex search tasks needing multiple rounds
|
|
64
|
+
- Document writing requiring multiple revisions
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
optimizer: Agent | AugmentedLLM,
|
|
70
|
+
evaluator: str | Agent | AugmentedLLM,
|
|
71
|
+
min_rating: QualityRating = QualityRating.GOOD,
|
|
72
|
+
max_refinements: int = 3,
|
|
73
|
+
llm_factory: Callable[[Agent], AugmentedLLM] | None = None, # TODO: Remove legacy - factory should only be needed for str evaluator
|
|
74
|
+
context: Optional["Context"] = None,
|
|
75
|
+
):
|
|
76
|
+
"""
|
|
77
|
+
Initialize the evaluator-optimizer workflow.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
optimizer: The agent/LLM/workflow that generates responses. Can be:
|
|
81
|
+
- An Agent that will be converted to an AugmentedLLM
|
|
82
|
+
- An AugmentedLLM instance
|
|
83
|
+
- An Orchestrator/Router/ParallelLLM workflow
|
|
84
|
+
evaluator_agent: The agent/LLM that evaluates responses
|
|
85
|
+
evaluation_criteria: Criteria for the evaluator to assess responses
|
|
86
|
+
min_rating: Minimum acceptable quality rating
|
|
87
|
+
max_refinements: Maximum refinement iterations
|
|
88
|
+
llm_factory: Optional factory to create LLMs from agents
|
|
89
|
+
"""
|
|
90
|
+
super().__init__(context=context)
|
|
91
|
+
|
|
92
|
+
# Set up the optimizer
|
|
93
|
+
self.name = optimizer.name
|
|
94
|
+
self.llm_factory = llm_factory
|
|
95
|
+
self.optimizer = optimizer
|
|
96
|
+
self.evaluator = evaluator
|
|
97
|
+
|
|
98
|
+
# TODO: Remove legacy - optimizer should always be an AugmentedLLM, no conversion needed
|
|
99
|
+
if isinstance(optimizer, Agent):
|
|
100
|
+
if not llm_factory:
|
|
101
|
+
raise ValueError("llm_factory is required when using an Agent")
|
|
102
|
+
|
|
103
|
+
# Only create new LLM if agent doesn't have one
|
|
104
|
+
if hasattr(optimizer, "_llm") and optimizer._llm:
|
|
105
|
+
self.optimizer_llm = optimizer._llm
|
|
106
|
+
else:
|
|
107
|
+
self.optimizer_llm = llm_factory(agent=optimizer)
|
|
108
|
+
|
|
109
|
+
self.aggregator = optimizer
|
|
110
|
+
self.instruction = (
|
|
111
|
+
optimizer.instruction
|
|
112
|
+
if isinstance(optimizer.instruction, str)
|
|
113
|
+
else None
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
elif isinstance(optimizer, AugmentedLLM):
|
|
117
|
+
self.optimizer_llm = optimizer
|
|
118
|
+
self.aggregator = optimizer.aggregator
|
|
119
|
+
self.instruction = optimizer.instruction
|
|
120
|
+
|
|
121
|
+
else:
|
|
122
|
+
raise ValueError(f"Unsupported optimizer type: {type(optimizer)}")
|
|
123
|
+
|
|
124
|
+
self.history = self.optimizer_llm.history
|
|
125
|
+
|
|
126
|
+
# Set up the evaluator
|
|
127
|
+
if isinstance(evaluator, AugmentedLLM):
|
|
128
|
+
self.evaluator_llm = evaluator
|
|
129
|
+
# TODO: Remove legacy - evaluator should be either AugmentedLLM or str
|
|
130
|
+
elif isinstance(evaluator, Agent):
|
|
131
|
+
if not llm_factory:
|
|
132
|
+
raise ValueError(
|
|
133
|
+
"llm_factory is required when using an Agent evaluator"
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Only create new LLM if agent doesn't have one
|
|
137
|
+
if hasattr(evaluator, "_llm") and evaluator._llm:
|
|
138
|
+
self.evaluator_llm = evaluator._llm
|
|
139
|
+
else:
|
|
140
|
+
self.evaluator_llm = llm_factory(agent=evaluator)
|
|
141
|
+
elif isinstance(evaluator, str):
|
|
142
|
+
# If a string is passed as the evaluator, we use it as the evaluation criteria
|
|
143
|
+
# and create an evaluator agent with that instruction
|
|
144
|
+
if not llm_factory:
|
|
145
|
+
raise ValueError(
|
|
146
|
+
"llm_factory is required when using a string evaluator"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
self.evaluator_llm = llm_factory(
|
|
150
|
+
agent=Agent(name="Evaluator", instruction=evaluator)
|
|
151
|
+
)
|
|
152
|
+
else:
|
|
153
|
+
raise ValueError(f"Unsupported evaluator type: {type(evaluator)}")
|
|
154
|
+
|
|
155
|
+
self.min_rating = min_rating
|
|
156
|
+
self.max_refinements = max_refinements
|
|
157
|
+
|
|
158
|
+
# Track iteration history
|
|
159
|
+
self.refinement_history = []
|
|
160
|
+
|
|
161
|
+
async def generate(
|
|
162
|
+
self,
|
|
163
|
+
message: str | MessageParamT | List[MessageParamT],
|
|
164
|
+
request_params: RequestParams | None = None,
|
|
165
|
+
) -> List[MessageT]:
|
|
166
|
+
"""Generate an optimized response through evaluation-guided refinement"""
|
|
167
|
+
refinement_count = 0
|
|
168
|
+
response = None
|
|
169
|
+
best_response = None
|
|
170
|
+
best_rating = QualityRating.POOR
|
|
171
|
+
self.refinement_history = []
|
|
172
|
+
|
|
173
|
+
# Initial generation
|
|
174
|
+
async with contextlib.AsyncExitStack() as stack:
|
|
175
|
+
if isinstance(self.optimizer, Agent):
|
|
176
|
+
await stack.enter_async_context(self.optimizer)
|
|
177
|
+
response = await self.optimizer_llm.generate(
|
|
178
|
+
message=message,
|
|
179
|
+
request_params=request_params,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
best_response = response
|
|
183
|
+
|
|
184
|
+
while refinement_count < self.max_refinements:
|
|
185
|
+
logger.debug("Optimizer result:", data=response)
|
|
186
|
+
|
|
187
|
+
# Evaluate current response
|
|
188
|
+
eval_prompt = self._build_eval_prompt(
|
|
189
|
+
original_request=str(message),
|
|
190
|
+
current_response="\n".join(str(r) for r in response)
|
|
191
|
+
if isinstance(response, list)
|
|
192
|
+
else str(response),
|
|
193
|
+
iteration=refinement_count,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
evaluation_result = None
|
|
197
|
+
async with contextlib.AsyncExitStack() as stack:
|
|
198
|
+
if isinstance(self.evaluator, Agent):
|
|
199
|
+
await stack.enter_async_context(self.evaluator)
|
|
200
|
+
|
|
201
|
+
evaluation_result = await self.evaluator_llm.generate_structured(
|
|
202
|
+
message=eval_prompt,
|
|
203
|
+
response_model=EvaluationResult,
|
|
204
|
+
request_params=request_params,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# Track iteration
|
|
208
|
+
self.refinement_history.append(
|
|
209
|
+
{
|
|
210
|
+
"attempt": refinement_count + 1,
|
|
211
|
+
"response": response,
|
|
212
|
+
"evaluation_result": evaluation_result,
|
|
213
|
+
}
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
logger.debug("Evaluator result:", data=evaluation_result)
|
|
217
|
+
|
|
218
|
+
# Track best response (using enum ordering)
|
|
219
|
+
if evaluation_result.rating.value > best_rating.value:
|
|
220
|
+
best_rating = evaluation_result.rating
|
|
221
|
+
best_response = response
|
|
222
|
+
logger.debug(
|
|
223
|
+
"New best response:",
|
|
224
|
+
data={"rating": best_rating, "response": best_response},
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Check if we've reached acceptable quality
|
|
228
|
+
if (
|
|
229
|
+
evaluation_result.rating.value >= self.min_rating.value
|
|
230
|
+
or not evaluation_result.needs_improvement
|
|
231
|
+
):
|
|
232
|
+
logger.debug(
|
|
233
|
+
f"Acceptable quality {evaluation_result.rating.value} reached",
|
|
234
|
+
data={
|
|
235
|
+
"rating": evaluation_result.rating.value,
|
|
236
|
+
"needs_improvement": evaluation_result.needs_improvement,
|
|
237
|
+
"min_rating": self.min_rating.value,
|
|
238
|
+
},
|
|
239
|
+
)
|
|
240
|
+
break
|
|
241
|
+
|
|
242
|
+
# Generate refined response
|
|
243
|
+
refinement_prompt = self._build_refinement_prompt(
|
|
244
|
+
original_request=str(message),
|
|
245
|
+
current_response="\n".join(str(r) for r in response)
|
|
246
|
+
if isinstance(response, list)
|
|
247
|
+
else str(response),
|
|
248
|
+
feedback=evaluation_result,
|
|
249
|
+
iteration=refinement_count,
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
async with contextlib.AsyncExitStack() as stack:
|
|
253
|
+
if isinstance(self.optimizer, Agent):
|
|
254
|
+
await stack.enter_async_context(self.optimizer)
|
|
255
|
+
|
|
256
|
+
response = await self.optimizer_llm.generate(
|
|
257
|
+
message=refinement_prompt,
|
|
258
|
+
request_params=request_params,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
refinement_count += 1
|
|
262
|
+
|
|
263
|
+
return best_response
|
|
264
|
+
|
|
265
|
+
async def generate_str(
|
|
266
|
+
self,
|
|
267
|
+
message: str | MessageParamT | List[MessageParamT],
|
|
268
|
+
request_params: RequestParams | None = None,
|
|
269
|
+
) -> str:
|
|
270
|
+
"""Generate an optimized response and return it as a string"""
|
|
271
|
+
response = await self.generate(
|
|
272
|
+
message=message,
|
|
273
|
+
request_params=request_params,
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# Handle case where response is a single message
|
|
277
|
+
if not isinstance(response, list):
|
|
278
|
+
return str(response)
|
|
279
|
+
|
|
280
|
+
# Convert all messages to strings, handling different message types
|
|
281
|
+
result_strings = []
|
|
282
|
+
for r in response:
|
|
283
|
+
if hasattr(r, 'text'):
|
|
284
|
+
result_strings.append(r.text)
|
|
285
|
+
elif hasattr(r, 'content'):
|
|
286
|
+
# Handle ToolUseBlock and similar
|
|
287
|
+
if isinstance(r.content, list):
|
|
288
|
+
# Typically content is a list of blocks
|
|
289
|
+
result_strings.extend(str(block) for block in r.content)
|
|
290
|
+
else:
|
|
291
|
+
result_strings.append(str(r.content))
|
|
292
|
+
else:
|
|
293
|
+
# Fallback to string representation
|
|
294
|
+
result_strings.append(str(r))
|
|
295
|
+
|
|
296
|
+
return "\n".join(result_strings)
|
|
297
|
+
|
|
298
|
+
async def generate_structured(
|
|
299
|
+
self,
|
|
300
|
+
message: str | MessageParamT | List[MessageParamT],
|
|
301
|
+
response_model: Type[ModelT],
|
|
302
|
+
request_params: RequestParams | None = None,
|
|
303
|
+
) -> ModelT:
|
|
304
|
+
"""Generate an optimized structured response"""
|
|
305
|
+
response_str = await self.generate_str(
|
|
306
|
+
message=message, request_params=request_params
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
return await self.optimizer.generate_structured(
|
|
310
|
+
message=response_str,
|
|
311
|
+
response_model=response_model,
|
|
312
|
+
request_params=request_params,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
def _build_eval_prompt(
|
|
316
|
+
self, original_request: str, current_response: str, iteration: int
|
|
317
|
+
) -> str:
|
|
318
|
+
"""Build the evaluation prompt for the evaluator"""
|
|
319
|
+
return f"""
|
|
320
|
+
Evaluate the following response based on these criteria:
|
|
321
|
+
{self.evaluator.instruction}
|
|
322
|
+
|
|
323
|
+
Original Request: {original_request}
|
|
324
|
+
Current Response (Iteration {iteration + 1}): {current_response}
|
|
325
|
+
|
|
326
|
+
Provide your evaluation as a structured response with:
|
|
327
|
+
1. A quality rating (EXCELLENT, GOOD, FAIR, or POOR)
|
|
328
|
+
2. Specific feedback and suggestions
|
|
329
|
+
3. Whether improvement is needed (true/false)
|
|
330
|
+
4. Focus areas for improvement
|
|
331
|
+
|
|
332
|
+
Rate as EXCELLENT only if no improvements are needed.
|
|
333
|
+
Rate as GOOD if only minor improvements are possible.
|
|
334
|
+
Rate as FAIR if several improvements are needed.
|
|
335
|
+
Rate as POOR if major improvements are needed.
|
|
336
|
+
"""
|
|
337
|
+
|
|
338
|
+
def _build_refinement_prompt(
|
|
339
|
+
self,
|
|
340
|
+
original_request: str,
|
|
341
|
+
current_response: str,
|
|
342
|
+
feedback: EvaluationResult,
|
|
343
|
+
iteration: int,
|
|
344
|
+
) -> str:
|
|
345
|
+
"""Build the refinement prompt for the optimizer"""
|
|
346
|
+
return f"""
|
|
347
|
+
Improve your previous response based on the evaluation feedback.
|
|
348
|
+
|
|
349
|
+
Original Request: {original_request}
|
|
350
|
+
|
|
351
|
+
Previous Response (Iteration {iteration + 1}):
|
|
352
|
+
{current_response}
|
|
353
|
+
|
|
354
|
+
Quality Rating: {feedback.rating}
|
|
355
|
+
Feedback: {feedback.feedback}
|
|
356
|
+
Areas to Focus On: {", ".join(feedback.focus_areas)}
|
|
357
|
+
|
|
358
|
+
Generate an improved version addressing the feedback while maintaining accuracy and relevance.
|
|
359
|
+
"""
|
|
File without changes
|