finagent-eval 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. finagent/__init__.py +81 -0
  2. finagent/_compat.py +8 -0
  3. finagent/adapter/__init__.py +27 -0
  4. finagent/adapter/autogen.py +146 -0
  5. finagent/adapter/crewai.py +142 -0
  6. finagent/adapter/http.py +318 -0
  7. finagent/adapter/langgraph.py +278 -0
  8. finagent/adapter/registry.py +245 -0
  9. finagent/adversarial/__init__.py +57 -0
  10. finagent/adversarial/adversarial.py +744 -0
  11. finagent/adversarial/attacks.py +23 -0
  12. finagent/adversarial/financial.py +585 -0
  13. finagent/adversarial/mutators.py +19 -0
  14. finagent/api/__init__.py +57 -0
  15. finagent/api/app.py +260 -0
  16. finagent/api/middleware/__init__.py +23 -0
  17. finagent/api/middleware/auth.py +132 -0
  18. finagent/api/middleware/ratelimit.py +123 -0
  19. finagent/api/middleware/responsetime.py +263 -0
  20. finagent/api/routes.py +951 -0
  21. finagent/api/schemas.py +220 -0
  22. finagent/api/websocket.py +110 -0
  23. finagent/audit/__init__.py +14 -0
  24. finagent/audit/tool_auditor.py +399 -0
  25. finagent/cli.py +292 -0
  26. finagent/config.py +101 -0
  27. finagent/interface/__init__.py +64 -0
  28. finagent/interface/base.py +248 -0
  29. finagent/interface/exceptions.py +223 -0
  30. finagent/interface/models.py +169 -0
  31. finagent/isolation/__init__.py +15 -0
  32. finagent/isolation/manager.py +200 -0
  33. finagent/isolation/production.py +423 -0
  34. finagent/judge/__init__.py +31 -0
  35. finagent/judge/consensus.py +153 -0
  36. finagent/judge/judge.py +897 -0
  37. finagent/judge/models.py +23 -0
  38. finagent/judge/prompts.py +133 -0
  39. finagent/mcp/__init__.py +19 -0
  40. finagent/mcp/health.py +178 -0
  41. finagent/mcp/manager.py +244 -0
  42. finagent/mcp/restart_policy.py +221 -0
  43. finagent/monitor/__init__.py +9 -0
  44. finagent/monitor/metrics.py +221 -0
  45. finagent/pipeline/__init__.py +66 -0
  46. finagent/pipeline/checkpointer.py +237 -0
  47. finagent/pipeline/distributed_scheduler.py +588 -0
  48. finagent/pipeline/engine.py +214 -0
  49. finagent/pipeline/nodes.py +356 -0
  50. finagent/pipeline/pipeline.py +1719 -0
  51. finagent/pipeline/quota.py +124 -0
  52. finagent/pipeline/scheduler.py +237 -0
  53. finagent/report/__init__.py +17 -0
  54. finagent/report/charts.py +427 -0
  55. finagent/report/generator.py +291 -0
  56. finagent/scoring/__init__.py +87 -0
  57. finagent/scoring/aggregator.py +12 -0
  58. finagent/scoring/engine.py +1294 -0
  59. finagent/scoring/llm_judge_scorer.py +139 -0
  60. finagent/scoring/metrics.py +35 -0
  61. finagent/scoring/rater.py +13 -0
  62. finagent/scoring/rules.py +301 -0
  63. finagent/scoring/trading_performance.py +334 -0
  64. finagent/scoring/veto.py +464 -0
  65. finagent/taskgen/__init__.py +34 -0
  66. finagent/taskgen/datasets.py +28 -0
  67. finagent/taskgen/generator.py +1308 -0
  68. finagent/taskgen/sampler.py +12 -0
  69. finagent/tracing/__init__.py +3 -0
  70. finagent/tracing/langsmith.py +476 -0
  71. finagent/utils/__init__.py +3 -0
  72. finagent/utils/logging.py +120 -0
  73. finagent_eval-1.0.0.dist-info/METADATA +494 -0
  74. finagent_eval-1.0.0.dist-info/RECORD +78 -0
  75. finagent_eval-1.0.0.dist-info/WHEEL +5 -0
  76. finagent_eval-1.0.0.dist-info/entry_points.txt +2 -0
  77. finagent_eval-1.0.0.dist-info/licenses/LICENSE +21 -0
  78. finagent_eval-1.0.0.dist-info/top_level.txt +1 -0
finagent/__init__.py ADDED
@@ -0,0 +1,81 @@
1
+ """
2
+ FinAgent-Eval: 金融AI Agent评测系统
3
+
4
+ 一个标准化的金融AI Agent评测框架,支持多维度评分、对抗性测试和自动化评测流程。
5
+ """
6
+
7
+ __version__ = "1.0.0"
8
+ __author__ = "FinAgent Team"
9
+
10
+ from .adapter import (
11
+ AdapterRegistry,
12
+ HTTPAdapter,
13
+ LangGraphAdapter,
14
+ )
15
+ from .adversarial import (
16
+ AdversarialConfig,
17
+ AdversarialTester,
18
+ )
19
+ from .interface import (
20
+ AgentConfig,
21
+ AgentType,
22
+ DifficultyLevel,
23
+ EvalDimension,
24
+ EvalMode,
25
+ EvalResponse,
26
+ EvalStatus,
27
+ EvalTask,
28
+ FinancialAgentInterface,
29
+ TaskType,
30
+ )
31
+ from .judge import (
32
+ JudgeConfig,
33
+ LLMJudge,
34
+ )
35
+ from .pipeline import (
36
+ EvalPipeline,
37
+ PipelineConfig,
38
+ )
39
+ from .scoring import (
40
+ RatingLevel,
41
+ ScoringConfig,
42
+ ScoringEngine,
43
+ )
44
+ from .taskgen import (
45
+ EvalTaskGenerator,
46
+ TaskGeneratorConfig,
47
+ )
48
+
49
+ __all__ = [
50
+ # Interface
51
+ "FinancialAgentInterface",
52
+ "AgentConfig",
53
+ "EvalTask",
54
+ "EvalResponse",
55
+ "EvalMode",
56
+ "EvalStatus",
57
+ "EvalDimension",
58
+ "DifficultyLevel",
59
+ "TaskType",
60
+ "AgentType",
61
+ # Adapter
62
+ "LangGraphAdapter",
63
+ "HTTPAdapter",
64
+ "AdapterRegistry",
65
+ # TaskGen
66
+ "EvalTaskGenerator",
67
+ "TaskGeneratorConfig",
68
+ # Scoring
69
+ "ScoringEngine",
70
+ "ScoringConfig",
71
+ "RatingLevel",
72
+ # Pipeline
73
+ "EvalPipeline",
74
+ "PipelineConfig",
75
+ # Judge
76
+ "LLMJudge",
77
+ "JudgeConfig",
78
+ # Adversarial
79
+ "AdversarialTester",
80
+ "AdversarialConfig",
81
+ ]
finagent/_compat.py ADDED
@@ -0,0 +1,8 @@
1
+ """Python version compatibility utilities."""
2
+ import enum
3
+
4
+
5
+ class StrEnum(str, enum.Enum): # noqa: UP042
6
+ """Python 3.10 compatible StrEnum (stdlib added in 3.11)."""
7
+ def __str__(self) -> str:
8
+ return self.value
@@ -0,0 +1,27 @@
1
+ """
2
+ 框架适配器模块
3
+
4
+ 对应需求: FR-002 框架适配器
5
+
6
+ 支持多种 Agent 框架接入评测系统:
7
+ - LangGraphAdapter: 支持 LangGraph 框架
8
+ - HTTPAdapter: 支持通过 HTTP API 接入任意 Agent
9
+ - AutoGenAdapter: 支持 AutoGen 框架
10
+ - CrewAIAdapter: 支持 CrewAI 框架
11
+ - AdapterRegistry: 运行时注册新框架适配器
12
+ """
13
+
14
+ from .autogen import AutoGenAdapter
15
+ from .crewai import CrewAIAdapter
16
+ from .http import HTTPAdapter
17
+ from .langgraph import LangGraphAdapter
18
+ from .registry import AdapterRegistry, registry
19
+
20
+ __all__ = [
21
+ "LangGraphAdapter",
22
+ "HTTPAdapter",
23
+ "AutoGenAdapter",
24
+ "CrewAIAdapter",
25
+ "AdapterRegistry",
26
+ "registry",
27
+ ]
@@ -0,0 +1,146 @@
1
+ """
2
+ AutoGen 适配器
3
+
4
+ 将 AutoGen 框架的 Agent 适配为 FinancialAgentInterface。
5
+ """
6
+
7
+ import asyncio
8
+ import logging
9
+ from collections.abc import AsyncIterator
10
+ from typing import Any
11
+
12
+ from ..interface.base import FinancialAgentInterface
13
+ from ..interface.models import (
14
+ AgentConfig,
15
+ AgentState,
16
+ EvalResponse,
17
+ EvalTask,
18
+ )
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class AutoGenAdapter(FinancialAgentInterface):
24
+ """
25
+ AutoGen 框架适配器
26
+
27
+ 将 AutoGen 的 AgentGroupChat 或 ConversableAgent 适配为标准接口。
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ agent,
33
+ config: AgentConfig | None = None,
34
+ ):
35
+ self._agent = agent
36
+ self._config = config or AgentConfig(
37
+ agent_name="autogen-agent",
38
+ agent_type="autogen",
39
+ version="0.1.0",
40
+ framework="autogen",
41
+ llm_backend="unknown",
42
+ )
43
+ # 存储执行追踪
44
+ self._traces: dict[str, dict] = {}
45
+
46
+ def get_config(self) -> AgentConfig:
47
+ return self._config
48
+
49
+ async def ainvoke(self, task: EvalTask) -> EvalResponse:
50
+ """调用 AutoGen Agent"""
51
+ try:
52
+ # 从 task.input_data 中提取输入消息
53
+ message = self._build_input_content(task)
54
+
55
+ # AutoGen 同步调用
56
+ result = self._agent.initiate_chat(
57
+ message=message,
58
+ max_turns=10,
59
+ summary_method="last_msg",
60
+ )
61
+
62
+ # 提取最后的回复
63
+ if isinstance(result, dict):
64
+ output = result.get("chat_history", "")[-1].get("content", "") if result.get("chat_history") else str(result)
65
+ elif isinstance(result, str):
66
+ output = result
67
+ else:
68
+ output = str(result)
69
+
70
+ return EvalResponse(
71
+ task_id=task.task_id,
72
+ output=output,
73
+ tool_calls=self._extract_tool_calls(result),
74
+ )
75
+ except Exception as e:
76
+ return EvalResponse(
77
+ task_id=task.task_id,
78
+ output="",
79
+ error=f"AutoGen执行失败: {str(e)}",
80
+ )
81
+
82
+ async def abatch(self, tasks: list[EvalTask]) -> list[EvalResponse]:
83
+ """批量调用"""
84
+ return await asyncio.gather(*[self.ainvoke(task) for task in tasks])
85
+
86
+ async def astream(self, task: EvalTask) -> AsyncIterator[dict]:
87
+ """流式调用(AutoGen暂不支持原生流式)"""
88
+ response = await self.ainvoke(task)
89
+ yield {
90
+ "event": "message",
91
+ "data": {"content": response.output or ""},
92
+ "task_id": task.task_id,
93
+ }
94
+
95
+ def get_state(self) -> AgentState:
96
+ """获取Agent状态"""
97
+ return AgentState(
98
+ status="idle",
99
+ metadata={"framework": "autogen"},
100
+ )
101
+
102
+ def reset(self, scope: str = "all") -> None:
103
+ """重置Agent状态"""
104
+ logger.info("AutoGenAdapter: 重置适配器状态 (scope=%s)", scope)
105
+ self._traces.clear()
106
+ if hasattr(self._agent, "clear_history"):
107
+ self._agent.clear_history()
108
+ if hasattr(self._agent, "reset"):
109
+ self._agent.reset()
110
+
111
+ def serialize_state(self, state: AgentState) -> bytes:
112
+ """序列化状态"""
113
+ import json
114
+ return json.dumps(state.model_dump()).encode('utf-8')
115
+
116
+ def deserialize_state(self, data: bytes) -> AgentState:
117
+ """反序列化状态"""
118
+ import json
119
+ return AgentState(**json.loads(data.decode('utf-8')))
120
+
121
+ def get_trace(self, task_id: str) -> dict | None:
122
+ """获取指定任务的执行追踪"""
123
+ return self._traces.get(task_id)
124
+
125
+ def _build_input_content(self, task: EvalTask) -> str:
126
+ """从 EvalTask 构建输入内容"""
127
+ if "question" in task.input_data:
128
+ return task.input_data["question"]
129
+ elif "instruction" in task.input_data:
130
+ return task.input_data["instruction"]
131
+ else:
132
+ return str(task.input_data)
133
+
134
+ def _extract_tool_calls(self, result: Any) -> list[dict]:
135
+ """从AutoGen结果中提取工具调用"""
136
+ tool_calls = []
137
+ if isinstance(result, dict):
138
+ for msg in result.get("chat_history", []):
139
+ if msg.get("tool_calls"):
140
+ for tc in msg["tool_calls"]:
141
+ tool_calls.append({
142
+ "name": tc.get("function", {}).get("name", ""),
143
+ "args": tc.get("function", {}).get("arguments", {}),
144
+ "success": True,
145
+ })
146
+ return tool_calls
@@ -0,0 +1,142 @@
1
+ """
2
+ CrewAI 适配器
3
+
4
+ 将 CrewAI 框架的 Agent 适配为 FinancialAgentInterface。
5
+ """
6
+
7
+ import asyncio
8
+ import logging
9
+ from collections.abc import AsyncIterator
10
+ from typing import Any
11
+
12
+ from ..interface.base import FinancialAgentInterface
13
+ from ..interface.models import (
14
+ AgentConfig,
15
+ AgentState,
16
+ EvalResponse,
17
+ EvalTask,
18
+ )
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class CrewAIAdapter(FinancialAgentInterface):
24
+ """
25
+ CrewAI 框架适配器
26
+
27
+ 将 CrewAI 的 Crew 适配为标准接口。
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ crew,
33
+ config: AgentConfig | None = None,
34
+ ):
35
+ self._crew = crew
36
+ self._config = config or AgentConfig(
37
+ agent_name="crewai-agent",
38
+ agent_type="crewai",
39
+ version="0.1.0",
40
+ framework="crewai",
41
+ llm_backend="unknown",
42
+ )
43
+ # 存储执行追踪
44
+ self._traces: dict[str, dict] = {}
45
+
46
+ def get_config(self) -> AgentConfig:
47
+ return self._config
48
+
49
+ async def ainvoke(self, task: EvalTask) -> EvalResponse:
50
+ """调用 CrewAI Crew"""
51
+ try:
52
+ # 从 task.input_data 中提取输入消息
53
+ message = self._build_input_content(task)
54
+
55
+ result = self._crew.kickoff(inputs={"query": message})
56
+
57
+ if isinstance(result, dict):
58
+ output = result.get("result", str(result))
59
+ elif isinstance(result, str):
60
+ output = result
61
+ else:
62
+ output = str(result)
63
+
64
+ return EvalResponse(
65
+ task_id=task.task_id,
66
+ output=output,
67
+ tool_calls=self._extract_tool_calls(result),
68
+ )
69
+ except Exception as e:
70
+ return EvalResponse(
71
+ task_id=task.task_id,
72
+ output="",
73
+ error=f"CrewAI执行失败: {str(e)}",
74
+ )
75
+
76
+ async def abatch(self, tasks: list[EvalTask]) -> list[EvalResponse]:
77
+ """批量调用"""
78
+ return await asyncio.gather(*[self.ainvoke(task) for task in tasks])
79
+
80
+ async def astream(self, task: EvalTask) -> AsyncIterator[dict]:
81
+ """流式调用"""
82
+ response = await self.ainvoke(task)
83
+ yield {
84
+ "event": "message",
85
+ "data": {"content": response.output or ""},
86
+ "task_id": task.task_id,
87
+ }
88
+
89
+ def get_state(self) -> AgentState:
90
+ return AgentState(
91
+ status="idle",
92
+ metadata={"framework": "crewai"},
93
+ )
94
+
95
+ def reset(self, scope: str = "all") -> None:
96
+ """重置Agent状态"""
97
+ logger.info("CrewAIAdapter: 重置适配器状态 (scope=%s)", scope)
98
+ self._traces.clear()
99
+ if hasattr(self._crew, "reset_memory"):
100
+ self._crew.reset_memory()
101
+ if hasattr(self._crew, "agent"):
102
+ crew_agent = self._crew.agent if isinstance(self._crew.agent, list) else [self._crew.agent]
103
+ for agent in crew_agent:
104
+ if hasattr(agent, "reset_memory"):
105
+ agent.reset_memory()
106
+
107
+ def serialize_state(self, state: AgentState) -> bytes:
108
+ """序列化状态"""
109
+ import json
110
+ return json.dumps(state.model_dump()).encode('utf-8')
111
+
112
+ def deserialize_state(self, data: bytes) -> AgentState:
113
+ """反序列化状态"""
114
+ import json
115
+ return AgentState(**json.loads(data.decode('utf-8')))
116
+
117
+ def get_trace(self, task_id: str) -> dict | None:
118
+ """获取指定任务的执行追踪"""
119
+ return self._traces.get(task_id)
120
+
121
+ def _build_input_content(self, task: EvalTask) -> str:
122
+ """从 EvalTask 构建输入内容"""
123
+ if "question" in task.input_data:
124
+ return task.input_data["question"]
125
+ elif "instruction" in task.input_data:
126
+ return task.input_data["instruction"]
127
+ else:
128
+ return str(task.input_data)
129
+
130
+ def _extract_tool_calls(self, result: Any) -> list[dict]:
131
+ """从CrewAI结果中提取工具调用"""
132
+ tool_calls = []
133
+ if isinstance(result, dict):
134
+ for agent_result in result.get("tasks_output", []):
135
+ if hasattr(agent_result, "tools_output") and agent_result.tools_output:
136
+ for tool_out in agent_result.tools_output:
137
+ tool_calls.append({
138
+ "name": getattr(tool_out, "tool_name", ""),
139
+ "args": getattr(tool_out, "args", {}),
140
+ "success": True,
141
+ })
142
+ return tool_calls