PyPI - finagent-eval - Versions diffs - 1.0.0__py3-none-any.whl - Mend

finagent-eval 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

finagent/__init__.py +81 -0
finagent/_compat.py +8 -0
finagent/adapter/__init__.py +27 -0
finagent/adapter/autogen.py +146 -0
finagent/adapter/crewai.py +142 -0
finagent/adapter/http.py +318 -0
finagent/adapter/langgraph.py +278 -0
finagent/adapter/registry.py +245 -0
finagent/adversarial/__init__.py +57 -0
finagent/adversarial/adversarial.py +744 -0
finagent/adversarial/attacks.py +23 -0
finagent/adversarial/financial.py +585 -0
finagent/adversarial/mutators.py +19 -0
finagent/api/__init__.py +57 -0
finagent/api/app.py +260 -0
finagent/api/middleware/__init__.py +23 -0
finagent/api/middleware/auth.py +132 -0
finagent/api/middleware/ratelimit.py +123 -0
finagent/api/middleware/responsetime.py +263 -0
finagent/api/routes.py +951 -0
finagent/api/schemas.py +220 -0
finagent/api/websocket.py +110 -0
finagent/audit/__init__.py +14 -0
finagent/audit/tool_auditor.py +399 -0
finagent/cli.py +292 -0
finagent/config.py +101 -0
finagent/interface/__init__.py +64 -0
finagent/interface/base.py +248 -0
finagent/interface/exceptions.py +223 -0
finagent/interface/models.py +169 -0
finagent/isolation/__init__.py +15 -0
finagent/isolation/manager.py +200 -0
finagent/isolation/production.py +423 -0
finagent/judge/__init__.py +31 -0
finagent/judge/consensus.py +153 -0
finagent/judge/judge.py +897 -0
finagent/judge/models.py +23 -0
finagent/judge/prompts.py +133 -0
finagent/mcp/__init__.py +19 -0
finagent/mcp/health.py +178 -0
finagent/mcp/manager.py +244 -0
finagent/mcp/restart_policy.py +221 -0
finagent/monitor/__init__.py +9 -0
finagent/monitor/metrics.py +221 -0
finagent/pipeline/__init__.py +66 -0
finagent/pipeline/checkpointer.py +237 -0
finagent/pipeline/distributed_scheduler.py +588 -0
finagent/pipeline/engine.py +214 -0
finagent/pipeline/nodes.py +356 -0
finagent/pipeline/pipeline.py +1719 -0
finagent/pipeline/quota.py +124 -0
finagent/pipeline/scheduler.py +237 -0
finagent/report/__init__.py +17 -0
finagent/report/charts.py +427 -0
finagent/report/generator.py +291 -0
finagent/scoring/__init__.py +87 -0
finagent/scoring/aggregator.py +12 -0
finagent/scoring/engine.py +1294 -0
finagent/scoring/llm_judge_scorer.py +139 -0
finagent/scoring/metrics.py +35 -0
finagent/scoring/rater.py +13 -0
finagent/scoring/rules.py +301 -0
finagent/scoring/trading_performance.py +334 -0
finagent/scoring/veto.py +464 -0
finagent/taskgen/__init__.py +34 -0
finagent/taskgen/datasets.py +28 -0
finagent/taskgen/generator.py +1308 -0
finagent/taskgen/sampler.py +12 -0
finagent/tracing/__init__.py +3 -0
finagent/tracing/langsmith.py +476 -0
finagent/utils/__init__.py +3 -0
finagent/utils/logging.py +120 -0
finagent_eval-1.0.0.dist-info/METADATA +494 -0
finagent_eval-1.0.0.dist-info/RECORD +78 -0
finagent_eval-1.0.0.dist-info/WHEEL +5 -0
finagent_eval-1.0.0.dist-info/entry_points.txt +2 -0
finagent_eval-1.0.0.dist-info/licenses/LICENSE +21 -0
finagent_eval-1.0.0.dist-info/top_level.txt +1 -0

finagent/__init__.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""
+FinAgent-Eval: 金融AI Agent评测系统
+一个标准化的金融AI Agent评测框架，支持多维度评分、对抗性测试和自动化评测流程。
+"""
+__version__ = "1.0.0"
+__author__ = "FinAgent Team"
+from .adapter import (
+    AdapterRegistry,
+    HTTPAdapter,
+    LangGraphAdapter,
+)
+from .adversarial import (
+    AdversarialConfig,
+    AdversarialTester,
+)
+from .interface import (
+    AgentConfig,
+    AgentType,
+    DifficultyLevel,
+    EvalDimension,
+    EvalMode,
+    EvalResponse,
+    EvalStatus,
+    EvalTask,
+    FinancialAgentInterface,
+    TaskType,
+)
+from .judge import (
+    JudgeConfig,
+    LLMJudge,
+)
+from .pipeline import (
+    EvalPipeline,
+    PipelineConfig,
+)
+from .scoring import (
+    RatingLevel,
+    ScoringConfig,
+    ScoringEngine,
+)
+from .taskgen import (
+    EvalTaskGenerator,
+    TaskGeneratorConfig,
+)
+__all__ = [
+    # Interface
+    "FinancialAgentInterface",
+    "AgentConfig",
+    "EvalTask",
+    "EvalResponse",
+    "EvalMode",
+    "EvalStatus",
+    "EvalDimension",
+    "DifficultyLevel",
+    "TaskType",
+    "AgentType",
+    # Adapter
+    "LangGraphAdapter",
+    "HTTPAdapter",
+    "AdapterRegistry",
+    # TaskGen
+    "EvalTaskGenerator",
+    "TaskGeneratorConfig",
+    # Scoring
+    "ScoringEngine",
+    "ScoringConfig",
+    "RatingLevel",
+    # Pipeline
+    "EvalPipeline",
+    "PipelineConfig",
+    # Judge
+    "LLMJudge",
+    "JudgeConfig",
+    # Adversarial
+    "AdversarialTester",
+    "AdversarialConfig",
+]

finagent/_compat.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""Python version compatibility utilities."""
+import enum
+class StrEnum(str, enum.Enum):  # noqa: UP042
+    """Python 3.10 compatible StrEnum (stdlib added in 3.11)."""
+    def __str__(self) -> str:
+        return self.value

finagent/adapter/__init__.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""
+框架适配器模块
+对应需求: FR-002 框架适配器
+支持多种 Agent 框架接入评测系统：
+- LangGraphAdapter: 支持 LangGraph 框架
+- HTTPAdapter: 支持通过 HTTP API 接入任意 Agent
+- AutoGenAdapter: 支持 AutoGen 框架
+- CrewAIAdapter: 支持 CrewAI 框架
+- AdapterRegistry: 运行时注册新框架适配器
+"""
+from .autogen import AutoGenAdapter
+from .crewai import CrewAIAdapter
+from .http import HTTPAdapter
+from .langgraph import LangGraphAdapter
+from .registry import AdapterRegistry, registry
+__all__ = [
+    "LangGraphAdapter",
+    "HTTPAdapter",
+    "AutoGenAdapter",
+    "CrewAIAdapter",
+    "AdapterRegistry",
+    "registry",
+]

finagent/adapter/autogen.py ADDED Viewed

@@ -0,0 +1,146 @@
+"""
+AutoGen 适配器
+将 AutoGen 框架的 Agent 适配为 FinancialAgentInterface。
+"""
+import asyncio
+import logging
+from collections.abc import AsyncIterator
+from typing import Any
+from ..interface.base import FinancialAgentInterface
+from ..interface.models import (
+    AgentConfig,
+    AgentState,
+    EvalResponse,
+    EvalTask,
+)
+logger = logging.getLogger(__name__)
+class AutoGenAdapter(FinancialAgentInterface):
+    """
+    AutoGen 框架适配器
+    将 AutoGen 的 AgentGroupChat 或 ConversableAgent 适配为标准接口。
+    """
+    def __init__(
+        self,
+        agent,
+        config: AgentConfig | None = None,
+    ):
+        self._agent = agent
+        self._config = config or AgentConfig(
+            agent_name="autogen-agent",
+            agent_type="autogen",
+            version="0.1.0",
+            framework="autogen",
+            llm_backend="unknown",
+        )
+        # 存储执行追踪
+        self._traces: dict[str, dict] = {}
+    def get_config(self) -> AgentConfig:
+        return self._config
+    async def ainvoke(self, task: EvalTask) -> EvalResponse:
+        """调用 AutoGen Agent"""
+        try:
+            # 从 task.input_data 中提取输入消息
+            message = self._build_input_content(task)
+            # AutoGen 同步调用
+            result = self._agent.initiate_chat(
+                message=message,
+                max_turns=10,
+                summary_method="last_msg",
+            )
+            # 提取最后的回复
+            if isinstance(result, dict):
+                output = result.get("chat_history", "")[-1].get("content", "") if result.get("chat_history") else str(result)
+            elif isinstance(result, str):
+                output = result
+            else:
+                output = str(result)
+            return EvalResponse(
+                task_id=task.task_id,
+                output=output,
+                tool_calls=self._extract_tool_calls(result),
+            )
+        except Exception as e:
+            return EvalResponse(
+                task_id=task.task_id,
+                output="",
+                error=f"AutoGen执行失败: {str(e)}",
+            )
+    async def abatch(self, tasks: list[EvalTask]) -> list[EvalResponse]:
+        """批量调用"""
+        return await asyncio.gather(*[self.ainvoke(task) for task in tasks])
+    async def astream(self, task: EvalTask) -> AsyncIterator[dict]:
+        """流式调用（AutoGen暂不支持原生流式）"""
+        response = await self.ainvoke(task)
+        yield {
+            "event": "message",
+            "data": {"content": response.output or ""},
+            "task_id": task.task_id,
+        }
+    def get_state(self) -> AgentState:
+        """获取Agent状态"""
+        return AgentState(
+            status="idle",
+            metadata={"framework": "autogen"},
+        )
+    def reset(self, scope: str = "all") -> None:
+        """重置Agent状态"""
+        logger.info("AutoGenAdapter: 重置适配器状态 (scope=%s)", scope)
+        self._traces.clear()
+        if hasattr(self._agent, "clear_history"):
+            self._agent.clear_history()
+        if hasattr(self._agent, "reset"):
+            self._agent.reset()
+    def serialize_state(self, state: AgentState) -> bytes:
+        """序列化状态"""
+        import json
+        return json.dumps(state.model_dump()).encode('utf-8')
+    def deserialize_state(self, data: bytes) -> AgentState:
+        """反序列化状态"""
+        import json
+        return AgentState(**json.loads(data.decode('utf-8')))
+    def get_trace(self, task_id: str) -> dict | None:
+        """获取指定任务的执行追踪"""
+        return self._traces.get(task_id)
+    def _build_input_content(self, task: EvalTask) -> str:
+        """从 EvalTask 构建输入内容"""
+        if "question" in task.input_data:
+            return task.input_data["question"]
+        elif "instruction" in task.input_data:
+            return task.input_data["instruction"]
+        else:
+            return str(task.input_data)
+    def _extract_tool_calls(self, result: Any) -> list[dict]:
+        """从AutoGen结果中提取工具调用"""
+        tool_calls = []
+        if isinstance(result, dict):
+            for msg in result.get("chat_history", []):
+                if msg.get("tool_calls"):
+                    for tc in msg["tool_calls"]:
+                        tool_calls.append({
+                            "name": tc.get("function", {}).get("name", ""),
+                            "args": tc.get("function", {}).get("arguments", {}),
+                            "success": True,
+                        })
+        return tool_calls

finagent/adapter/crewai.py ADDED Viewed

@@ -0,0 +1,142 @@
+"""
+CrewAI 适配器
+将 CrewAI 框架的 Agent 适配为 FinancialAgentInterface。
+"""
+import asyncio
+import logging
+from collections.abc import AsyncIterator
+from typing import Any
+from ..interface.base import FinancialAgentInterface
+from ..interface.models import (
+    AgentConfig,
+    AgentState,
+    EvalResponse,
+    EvalTask,
+)
+logger = logging.getLogger(__name__)
+class CrewAIAdapter(FinancialAgentInterface):
+    """
+    CrewAI 框架适配器
+    将 CrewAI 的 Crew 适配为标准接口。
+    """
+    def __init__(
+        self,
+        crew,
+        config: AgentConfig | None = None,
+    ):
+        self._crew = crew
+        self._config = config or AgentConfig(
+            agent_name="crewai-agent",
+            agent_type="crewai",
+            version="0.1.0",
+            framework="crewai",
+            llm_backend="unknown",
+        )
+        # 存储执行追踪
+        self._traces: dict[str, dict] = {}
+    def get_config(self) -> AgentConfig:
+        return self._config
+    async def ainvoke(self, task: EvalTask) -> EvalResponse:
+        """调用 CrewAI Crew"""
+        try:
+            # 从 task.input_data 中提取输入消息
+            message = self._build_input_content(task)
+            result = self._crew.kickoff(inputs={"query": message})
+            if isinstance(result, dict):
+                output = result.get("result", str(result))
+            elif isinstance(result, str):
+                output = result
+            else:
+                output = str(result)
+            return EvalResponse(
+                task_id=task.task_id,
+                output=output,
+                tool_calls=self._extract_tool_calls(result),
+            )
+        except Exception as e:
+            return EvalResponse(
+                task_id=task.task_id,
+                output="",
+                error=f"CrewAI执行失败: {str(e)}",
+            )
+    async def abatch(self, tasks: list[EvalTask]) -> list[EvalResponse]:
+        """批量调用"""
+        return await asyncio.gather(*[self.ainvoke(task) for task in tasks])
+    async def astream(self, task: EvalTask) -> AsyncIterator[dict]:
+        """流式调用"""
+        response = await self.ainvoke(task)
+        yield {
+            "event": "message",
+            "data": {"content": response.output or ""},
+            "task_id": task.task_id,
+        }
+    def get_state(self) -> AgentState:
+        return AgentState(
+            status="idle",
+            metadata={"framework": "crewai"},
+        )
+    def reset(self, scope: str = "all") -> None:
+        """重置Agent状态"""
+        logger.info("CrewAIAdapter: 重置适配器状态 (scope=%s)", scope)
+        self._traces.clear()
+        if hasattr(self._crew, "reset_memory"):
+            self._crew.reset_memory()
+        if hasattr(self._crew, "agent"):
+            crew_agent = self._crew.agent if isinstance(self._crew.agent, list) else [self._crew.agent]
+            for agent in crew_agent:
+                if hasattr(agent, "reset_memory"):
+                    agent.reset_memory()
+    def serialize_state(self, state: AgentState) -> bytes:
+        """序列化状态"""
+        import json
+        return json.dumps(state.model_dump()).encode('utf-8')
+    def deserialize_state(self, data: bytes) -> AgentState:
+        """反序列化状态"""
+        import json
+        return AgentState(**json.loads(data.decode('utf-8')))
+    def get_trace(self, task_id: str) -> dict | None:
+        """获取指定任务的执行追踪"""
+        return self._traces.get(task_id)
+    def _build_input_content(self, task: EvalTask) -> str:
+        """从 EvalTask 构建输入内容"""
+        if "question" in task.input_data:
+            return task.input_data["question"]
+        elif "instruction" in task.input_data:
+            return task.input_data["instruction"]
+        else:
+            return str(task.input_data)
+    def _extract_tool_calls(self, result: Any) -> list[dict]:
+        """从CrewAI结果中提取工具调用"""
+        tool_calls = []
+        if isinstance(result, dict):
+            for agent_result in result.get("tasks_output", []):
+                if hasattr(agent_result, "tools_output") and agent_result.tools_output:
+                    for tool_out in agent_result.tools_output:
+                        tool_calls.append({
+                            "name": getattr(tool_out, "tool_name", ""),
+                            "args": getattr(tool_out, "args", {}),
+                            "success": True,
+                        })
+        return tool_calls