erlangshen 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/equity-agent.md +26 -0
- package/.claude/agents/macro-agent.md +25 -0
- package/.claude/commands/analyze.md +40 -0
- package/.claude/commands/macro.md +29 -0
- package/.claude/settings.json +12 -0
- package/CODEX_GOAL.md +46 -0
- package/README.md +206 -0
- package/bin/cli.js +67 -0
- package/bin/erlangshen +2 -0
- package/bin/xiaoergod +2 -0
- package/frontend/index.html +700 -0
- package/knowledge/crypto_guide.md +147 -0
- package/knowledge/economic_indicators.md +125 -0
- package/knowledge/financial_glossary.md +148 -0
- package/knowledge/first_principles.md +50 -0
- package/knowledge/first_principles_deep.md +115 -0
- package/knowledge/global_markets.md +173 -0
- package/knowledge/insights.md +141 -0
- package/knowledge/market_basics.md +116 -0
- package/knowledge/memos/session_20260513_003616.json +6 -0
- package/knowledge/memos/session_20260513_003822.json +6 -0
- package/knowledge/risk_management.md +151 -0
- package/knowledge/team_context.md +42 -0
- package/knowledge/trading_strategies.md +114 -0
- package/package.json +42 -0
- package/requirements.txt +14 -0
- package/scripts/postinstall.js +188 -0
- package/scripts/preuninstall.js +22 -0
- package/src/__init__.py +4 -0
- package/src/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/agents/__init__.py +3 -0
- package/src/agents/base.py +103 -0
- package/src/agents/base_agent.py +86 -0
- package/src/agents/equity.py +136 -0
- package/src/agents/equity_agent.py +91 -0
- package/src/agents/erlang.py +165 -0
- package/src/agents/macro.py +137 -0
- package/src/agents/macro_agent.py +81 -0
- package/src/agents/multi_asset.py +147 -0
- package/src/agents/multi_asset_agent.py +87 -0
- package/src/api/__init__.py +1 -0
- package/src/api/__pycache__/__init__.cpython-313.pyc +0 -0
- package/src/api/__pycache__/server.cpython-313.pyc +0 -0
- package/src/api/cli.py +435 -0
- package/src/api/cli_enhanced.py +537 -0
- package/src/api/server.py +266 -0
- package/src/brain.py +200 -0
- package/src/cli.py +153 -0
- package/src/commands/__init__.py +3 -0
- package/src/commands/analyze.py +131 -0
- package/src/commands/macro.py +100 -0
- package/src/commands/memo.py +216 -0
- package/src/commands/portfolio.py +154 -0
- package/src/commands/report.py +228 -0
- package/src/commands/risk.py +183 -0
- package/src/commands/search.py +183 -0
- package/src/commands/stock.py +124 -0
- package/src/config.py +327 -0
- package/src/core/__init__.py +1 -0
- package/src/core/brain.py +645 -0
- package/src/core/cerebellum.py +175 -0
- package/src/core/investment_universe.py +423 -0
- package/src/core/knowledge.py +207 -0
- package/src/core/memory.py +115 -0
- package/src/hooks/__init__.py +3 -0
- package/src/hooks/session_end.py +57 -0
- package/src/hooks/session_start.py +75 -0
- package/src/knowledge/__init__.py +1 -0
- package/src/mcp/__init__.py +3 -0
- package/src/mcp/feishu.py +331 -0
- package/src/mcp/fund_tools.py +323 -0
- package/src/mcp/macro.py +452 -0
- package/src/mcp/market.py +331 -0
- package/src/mcp/registry.py +168 -0
- package/src/network/__init__.py +15 -0
- package/src/network/detector.py +125 -0
- package/src/network/proxy.py +199 -0
- package/src/network/router.py +103 -0
- package/src/prompts/__init__.py +1 -0
- package/src/prompts/analysis_framework.md +164 -0
- package/src/prompts/persona.md +65 -0
- package/src/prompts/report_template.md +144 -0
- package/src/skills/__init__.py +3 -0
- package/src/skills/framework.py +105 -0
- package/src/skills/templates.py +342 -0
- package/src/tools/__init__.py +1 -0
- package/src/tools/file_tools.py +209 -0
- package/src/tools/macro_tools.py +152 -0
- package/src/tools/market_tools.py +1172 -0
- package/src/tools/registry.py +398 -0
- package/src/tools/search_tools.py +777 -0
- package/tests/__init__.py +1 -0
- package/tests/test_erlangshen.py +140 -0
|
@@ -0,0 +1,777 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Search Tools - 搜索工具
|
|
3
|
+
提供网络搜索、新闻搜索、公司信息查询、学术搜索
|
|
4
|
+
|
|
5
|
+
支持多种搜索源:
|
|
6
|
+
- MiniMax MCP (已有联网能力)
|
|
7
|
+
- DuckDuckGo (无需 API Key)
|
|
8
|
+
- SerpAPI (可选付费API)
|
|
9
|
+
"""
|
|
10
|
+
from typing import Optional, Any, TypedDict, List
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from loguru import logger
|
|
13
|
+
import aiohttp
|
|
14
|
+
import asyncio
|
|
15
|
+
import json
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class SearchResult(TypedDict):
|
|
19
|
+
"""搜索结果类型"""
|
|
20
|
+
title: str
|
|
21
|
+
url: str
|
|
22
|
+
snippet: str
|
|
23
|
+
source: str
|
|
24
|
+
date: Optional[str]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class NewsResult(TypedDict):
|
|
28
|
+
"""新闻结果类型"""
|
|
29
|
+
title: str
|
|
30
|
+
url: str
|
|
31
|
+
snippet: str
|
|
32
|
+
source: str
|
|
33
|
+
date: str
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class AcademicResult(TypedDict):
|
|
37
|
+
"""学术结果类型"""
|
|
38
|
+
title: str
|
|
39
|
+
url: str
|
|
40
|
+
authors: List[str]
|
|
41
|
+
abstract: str
|
|
42
|
+
year: Optional[int]
|
|
43
|
+
venue: Optional[str]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class CompanyInfo(TypedDict):
|
|
47
|
+
"""公司信息类型"""
|
|
48
|
+
name: str
|
|
49
|
+
ticker: str
|
|
50
|
+
exchange: str
|
|
51
|
+
industry: str
|
|
52
|
+
sector: Optional[str]
|
|
53
|
+
market_cap: float
|
|
54
|
+
pe_ratio: Optional[float]
|
|
55
|
+
description: str
|
|
56
|
+
website: Optional[str]
|
|
57
|
+
headquarters: Optional[str]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class SearchTools:
|
|
61
|
+
"""
|
|
62
|
+
搜索工具集
|
|
63
|
+
|
|
64
|
+
工具函数:
|
|
65
|
+
- web_search: 网络搜索 (支持中英文)
|
|
66
|
+
- news_search: 新闻搜索 (支持时间范围过滤)
|
|
67
|
+
- academic_search: 学术搜索 (论文、研报)
|
|
68
|
+
- company_search: 公司信息搜索
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def __init__(self, config: Optional[dict] = None):
|
|
72
|
+
self.config = config or {}
|
|
73
|
+
self._ddg_session: Optional[aiohttp.ClientSession] = None
|
|
74
|
+
self._cache: dict = {}
|
|
75
|
+
self._cache_ttl = self.config.get("cache_ttl", 300) # 5分钟缓存
|
|
76
|
+
logger.info("SearchTools initialized with global search support")
|
|
77
|
+
|
|
78
|
+
async def execute(self, tool_name: str, **kwargs) -> Any:
|
|
79
|
+
"""执行指定工具"""
|
|
80
|
+
method = getattr(self, tool_name, None)
|
|
81
|
+
if method and callable(method):
|
|
82
|
+
return await method(**kwargs)
|
|
83
|
+
return {"error": f"Unknown tool: {tool_name}"}
|
|
84
|
+
|
|
85
|
+
# ==================== 网络搜索 ====================
|
|
86
|
+
|
|
87
|
+
async def web_search(
|
|
88
|
+
self,
|
|
89
|
+
query: str,
|
|
90
|
+
language: str = "zh",
|
|
91
|
+
count: int = 10,
|
|
92
|
+
provider: str = "duckduckgo",
|
|
93
|
+
) -> dict:
|
|
94
|
+
"""
|
|
95
|
+
网络搜索
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
query: 搜索关键词
|
|
99
|
+
language: 语言偏好 (zh/en/auto)
|
|
100
|
+
count: 返回结果数量 (1-20)
|
|
101
|
+
provider: 搜索提供商 (duckduckgo/serpapi/minimax)
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
dict 搜索结果,包含 title, url, snippet, source, date
|
|
105
|
+
"""
|
|
106
|
+
logger.info(f"Web search: {query} (provider={provider}, lang={language})")
|
|
107
|
+
|
|
108
|
+
# 检查缓存
|
|
109
|
+
cache_key = f"web:{provider}:{language}:{query}"
|
|
110
|
+
cached = self._get_cached(cache_key)
|
|
111
|
+
if cached:
|
|
112
|
+
return cached
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
if provider == "duckduckgo":
|
|
116
|
+
results = await self._duckduckgo_search(query, language, count)
|
|
117
|
+
elif provider == "serpapi":
|
|
118
|
+
results = await self._serpapi_search(query, language, count)
|
|
119
|
+
elif provider == "minimax":
|
|
120
|
+
results = await self._minimax_search(query, language, count)
|
|
121
|
+
else:
|
|
122
|
+
# 默认使用 DuckDuckGo
|
|
123
|
+
results = await self._duckduckgo_search(query, language, count)
|
|
124
|
+
|
|
125
|
+
response = {
|
|
126
|
+
"query": query,
|
|
127
|
+
"language": language,
|
|
128
|
+
"provider": provider,
|
|
129
|
+
"results": results,
|
|
130
|
+
"total": len(results),
|
|
131
|
+
"timestamp": datetime.now().isoformat(),
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
self._set_cached(cache_key, response)
|
|
135
|
+
return response
|
|
136
|
+
|
|
137
|
+
except Exception as e:
|
|
138
|
+
logger.error(f"Web search failed: {e}")
|
|
139
|
+
return {
|
|
140
|
+
"query": query,
|
|
141
|
+
"results": [],
|
|
142
|
+
"error": str(e),
|
|
143
|
+
"total": 0,
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
async def _duckduckgo_search(
|
|
147
|
+
self,
|
|
148
|
+
query: str,
|
|
149
|
+
language: str,
|
|
150
|
+
count: int,
|
|
151
|
+
) -> List[SearchResult]:
|
|
152
|
+
"""使用 DuckDuckGo HTML 搜索 (无需 API Key)"""
|
|
153
|
+
import urllib.parse
|
|
154
|
+
|
|
155
|
+
# DuckDuckGo HTML search
|
|
156
|
+
params = {
|
|
157
|
+
"q": query,
|
|
158
|
+
"kl": "wt-wt" if language == "en" else "cn-zh",
|
|
159
|
+
"ia": "news" if "news" in query.lower() else "web",
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
url = f"https://html.duckduckgo.com/html/?" + urllib.parse.urlencode(params)
|
|
163
|
+
|
|
164
|
+
headers = {
|
|
165
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
|
166
|
+
"Accept": "text/html",
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
async with aiohttp.ClientSession() as session:
|
|
171
|
+
async with session.get(url, headers=headers, timeout=aiohttp.ClientTimeout(total=10)) as resp:
|
|
172
|
+
html = await resp.text()
|
|
173
|
+
|
|
174
|
+
results = self._parse_ddg_html(html, count)
|
|
175
|
+
return results
|
|
176
|
+
except Exception as e:
|
|
177
|
+
logger.warning(f"DuckDuckGo search failed: {e}, trying alternative...")
|
|
178
|
+
return await self._duckduckgolite_search(query, language, count)
|
|
179
|
+
|
|
180
|
+
async def _duckduckgolite_search(
|
|
181
|
+
self,
|
|
182
|
+
query: str,
|
|
183
|
+
language: str,
|
|
184
|
+
count: int,
|
|
185
|
+
) -> List[SearchResult]:
|
|
186
|
+
"""DuckDuckGo Lite 搜索 (备选方案)"""
|
|
187
|
+
import urllib.parse
|
|
188
|
+
|
|
189
|
+
params = {
|
|
190
|
+
"q": query,
|
|
191
|
+
"format": "json",
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
url = f"https://lite.duckduckgo.com/lite/?" + urllib.parse.urlencode(params)
|
|
195
|
+
|
|
196
|
+
headers = {
|
|
197
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
|
198
|
+
"Accept": "application/json",
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
async with aiohttp.ClientSession() as session:
|
|
203
|
+
async with session.get(url, headers=headers, timeout=aiohttp.ClientTimeout(total=10)) as resp:
|
|
204
|
+
text = await resp.text()
|
|
205
|
+
|
|
206
|
+
results = self._parse_ddg_lite(text, count)
|
|
207
|
+
return results
|
|
208
|
+
except Exception as e:
|
|
209
|
+
logger.error(f"DuckDuckGo Lite failed: {e}")
|
|
210
|
+
return []
|
|
211
|
+
|
|
212
|
+
def _parse_ddg_html(self, html: str, count: int) -> List[SearchResult]:
|
|
213
|
+
"""解析 DuckDuckGo HTML 结果"""
|
|
214
|
+
results = []
|
|
215
|
+
try:
|
|
216
|
+
from bs4 import BeautifulSoup
|
|
217
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
218
|
+
|
|
219
|
+
for result in soup.select(".result")[:count]:
|
|
220
|
+
title_elem = result.select_one(".result__title a")
|
|
221
|
+
snippet_elem = result.select_one(".result__snippet")
|
|
222
|
+
|
|
223
|
+
if title_elem:
|
|
224
|
+
title = title_elem.get_text(strip=True)
|
|
225
|
+
url = title_elem.get("href", "")
|
|
226
|
+
snippet = snippet_elem.get_text(strip=True) if snippet_elem else ""
|
|
227
|
+
|
|
228
|
+
results.append(SearchResult(
|
|
229
|
+
title=title,
|
|
230
|
+
url=url,
|
|
231
|
+
snippet=snippet,
|
|
232
|
+
source="DuckDuckGo",
|
|
233
|
+
date=None,
|
|
234
|
+
))
|
|
235
|
+
except ImportError:
|
|
236
|
+
logger.warning("BeautifulSoup not installed, using regex parsing")
|
|
237
|
+
results = self._parse_ddg_regex(html, count)
|
|
238
|
+
except Exception as e:
|
|
239
|
+
logger.error(f"Failed to parse DDG HTML: {e}")
|
|
240
|
+
|
|
241
|
+
return results
|
|
242
|
+
|
|
243
|
+
def _parse_ddg_regex(self, text: str, count: int) -> List[SearchResult]:
|
|
244
|
+
"""使用正则表达式解析 DuckDuckGo 结果 (备选)"""
|
|
245
|
+
import re
|
|
246
|
+
results = []
|
|
247
|
+
|
|
248
|
+
# 简单正则匹配
|
|
249
|
+
pattern = r'<a class="result__a" href="([^"]+)">([^<]+)</a>'
|
|
250
|
+
matches = re.findall(pattern, text)
|
|
251
|
+
|
|
252
|
+
for url, title in matches[:count]:
|
|
253
|
+
results.append(SearchResult(
|
|
254
|
+
title=title.strip(),
|
|
255
|
+
url=url,
|
|
256
|
+
snippet="",
|
|
257
|
+
source="DuckDuckGo",
|
|
258
|
+
date=None,
|
|
259
|
+
))
|
|
260
|
+
|
|
261
|
+
return results
|
|
262
|
+
|
|
263
|
+
def _parse_ddg_lite(self, text: str, count: int) -> List[SearchResult]:
|
|
264
|
+
"""解析 DuckDuckGo Lite JSON 结果"""
|
|
265
|
+
results = []
|
|
266
|
+
try:
|
|
267
|
+
import re
|
|
268
|
+
|
|
269
|
+
# 匹配 <a href="URL">TITLE</a> 模式
|
|
270
|
+
pattern = r'<a href="(https?://[^"]+)"[^>]*>([^<]+)</a>'
|
|
271
|
+
matches = re.findall(pattern, text)
|
|
272
|
+
|
|
273
|
+
seen = set()
|
|
274
|
+
for url, title in matches:
|
|
275
|
+
if url not in seen and len(results) < count:
|
|
276
|
+
if not any(x in url.lower() for x in ['duckduckgo', 'duck.com']):
|
|
277
|
+
seen.add(url)
|
|
278
|
+
results.append(SearchResult(
|
|
279
|
+
title=title.strip(),
|
|
280
|
+
url=url,
|
|
281
|
+
snippet="",
|
|
282
|
+
source="DuckDuckGo",
|
|
283
|
+
date=None,
|
|
284
|
+
))
|
|
285
|
+
except Exception as e:
|
|
286
|
+
logger.error(f"Failed to parse DDG Lite: {e}")
|
|
287
|
+
|
|
288
|
+
return results
|
|
289
|
+
|
|
290
|
+
async def _serpapi_search(
|
|
291
|
+
self,
|
|
292
|
+
query: str,
|
|
293
|
+
language: str,
|
|
294
|
+
count: int,
|
|
295
|
+
) -> List[SearchResult]:
|
|
296
|
+
"""使用 SerpAPI 搜索 (需要 API Key)"""
|
|
297
|
+
api_key = self.config.get("serpapi_key")
|
|
298
|
+
if not api_key:
|
|
299
|
+
logger.warning("SerpAPI key not configured")
|
|
300
|
+
return []
|
|
301
|
+
|
|
302
|
+
params = {
|
|
303
|
+
"q": query,
|
|
304
|
+
"api_key": api_key,
|
|
305
|
+
"engine": "google",
|
|
306
|
+
"num": count,
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
if language == "zh":
|
|
310
|
+
params["gl"] = "cn"
|
|
311
|
+
params["hl"] = "zh-cn"
|
|
312
|
+
elif language == "en":
|
|
313
|
+
params["gl"] = "us"
|
|
314
|
+
params["hl"] = "en"
|
|
315
|
+
|
|
316
|
+
try:
|
|
317
|
+
async with aiohttp.ClientSession() as session:
|
|
318
|
+
async with session.get(
|
|
319
|
+
"https://serpapi.com/search",
|
|
320
|
+
params=params,
|
|
321
|
+
timeout=aiohttp.ClientTimeout(total=15),
|
|
322
|
+
) as resp:
|
|
323
|
+
data = await resp.json()
|
|
324
|
+
|
|
325
|
+
results = []
|
|
326
|
+
for item in data.get("organic_results", [])[:count]:
|
|
327
|
+
results.append(SearchResult(
|
|
328
|
+
title=item.get("title", ""),
|
|
329
|
+
url=item.get("link", ""),
|
|
330
|
+
snippet=item.get("snippet", ""),
|
|
331
|
+
source="Google",
|
|
332
|
+
date=item.get("date", None),
|
|
333
|
+
))
|
|
334
|
+
return results
|
|
335
|
+
|
|
336
|
+
except Exception as e:
|
|
337
|
+
logger.error(f"SerpAPI search failed: {e}")
|
|
338
|
+
return []
|
|
339
|
+
|
|
340
|
+
async def _minimax_search(
|
|
341
|
+
self,
|
|
342
|
+
query: str,
|
|
343
|
+
language: str,
|
|
344
|
+
count: int,
|
|
345
|
+
) -> List[SearchResult]:
|
|
346
|
+
"""使用 MiniMax MCP 搜索 (如果可用)"""
|
|
347
|
+
# 尝试使用 MiniMax 的联网能力
|
|
348
|
+
# 这需要 mcporter minimax 配置
|
|
349
|
+
try:
|
|
350
|
+
# 预留接口,实际通过 MCP 调用
|
|
351
|
+
logger.info("MiniMax search - via MCP interface")
|
|
352
|
+
return []
|
|
353
|
+
except Exception as e:
|
|
354
|
+
logger.warning(f"MiniMax search not available: {e}")
|
|
355
|
+
return []
|
|
356
|
+
|
|
357
|
+
# ==================== 新闻搜索 ====================
|
|
358
|
+
|
|
359
|
+
async def news_search(
|
|
360
|
+
self,
|
|
361
|
+
query: str,
|
|
362
|
+
days: int = 7,
|
|
363
|
+
language: str = "zh",
|
|
364
|
+
count: int = 10,
|
|
365
|
+
) -> dict:
|
|
366
|
+
"""
|
|
367
|
+
新闻搜索
|
|
368
|
+
|
|
369
|
+
Args:
|
|
370
|
+
query: 搜索关键词
|
|
371
|
+
days: 最近天数
|
|
372
|
+
language: 语言 (zh/en)
|
|
373
|
+
count: 返回数量
|
|
374
|
+
|
|
375
|
+
Returns:
|
|
376
|
+
dict 新闻结果列表
|
|
377
|
+
"""
|
|
378
|
+
logger.info(f"News search: {query} (days={days})")
|
|
379
|
+
|
|
380
|
+
cache_key = f"news:{query}:{days}:{language}"
|
|
381
|
+
cached = self._get_cached(cache_key)
|
|
382
|
+
if cached:
|
|
383
|
+
return cached
|
|
384
|
+
|
|
385
|
+
try:
|
|
386
|
+
# 使用 DuckDuckGo 新闻
|
|
387
|
+
news = await self._duckduckgo_news(query, language, count)
|
|
388
|
+
|
|
389
|
+
# 过滤日期
|
|
390
|
+
from datetime import timedelta
|
|
391
|
+
cutoff = datetime.now() - timedelta(days=days)
|
|
392
|
+
filtered = []
|
|
393
|
+
for item in news:
|
|
394
|
+
if item.get("date"):
|
|
395
|
+
try:
|
|
396
|
+
item_date = datetime.fromisoformat(item["date"].replace("Z", "+00:00"))
|
|
397
|
+
if item_date > cutoff:
|
|
398
|
+
filtered.append(item)
|
|
399
|
+
except:
|
|
400
|
+
filtered.append(item)
|
|
401
|
+
else:
|
|
402
|
+
filtered.append(item)
|
|
403
|
+
|
|
404
|
+
response = {
|
|
405
|
+
"query": query,
|
|
406
|
+
"days": days,
|
|
407
|
+
"news": filtered,
|
|
408
|
+
"total": len(filtered),
|
|
409
|
+
"timestamp": datetime.now().isoformat(),
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
self._set_cached(cache_key, response)
|
|
413
|
+
return response
|
|
414
|
+
|
|
415
|
+
except Exception as e:
|
|
416
|
+
logger.error(f"News search failed: {e}")
|
|
417
|
+
return {
|
|
418
|
+
"query": query,
|
|
419
|
+
"news": [],
|
|
420
|
+
"error": str(e),
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
async def _duckduckgo_news(
|
|
424
|
+
self,
|
|
425
|
+
query: str,
|
|
426
|
+
language: str,
|
|
427
|
+
count: int,
|
|
428
|
+
) -> List[NewsResult]:
|
|
429
|
+
"""DuckDuckGo 新闻搜索"""
|
|
430
|
+
import urllib.parse
|
|
431
|
+
|
|
432
|
+
params = {
|
|
433
|
+
"q": query,
|
|
434
|
+
"ia": "news",
|
|
435
|
+
"kl": "wt-wt" if language == "en" else "cn-zh",
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
url = f"https://html.duckduckgo.com/html/?" + urllib.parse.urlencode(params)
|
|
439
|
+
|
|
440
|
+
headers = {
|
|
441
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
try:
|
|
445
|
+
async with aiohttp.ClientSession() as session:
|
|
446
|
+
async with session.get(url, headers=headers, timeout=aiohttp.ClientTimeout(total=10)) as resp:
|
|
447
|
+
html = await resp.text()
|
|
448
|
+
|
|
449
|
+
return self._parse_ddg_news(html, count)
|
|
450
|
+
except Exception as e:
|
|
451
|
+
logger.error(f"DuckDuckGo news failed: {e}")
|
|
452
|
+
return []
|
|
453
|
+
|
|
454
|
+
def _parse_ddg_news(self, html: str, count: int) -> List[NewsResult]:
|
|
455
|
+
"""解析 DuckDuckGo 新闻结果"""
|
|
456
|
+
results = []
|
|
457
|
+
try:
|
|
458
|
+
from bs4 import BeautifulSoup
|
|
459
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
460
|
+
|
|
461
|
+
for item in soup.select(".result")[:count]:
|
|
462
|
+
title_elem = item.select_one(".result__title a")
|
|
463
|
+
snippet_elem = item.select_one(".result__snippet")
|
|
464
|
+
|
|
465
|
+
# 尝试获取日期
|
|
466
|
+
date_elem = item.select_one(".result__timestamp")
|
|
467
|
+
date_text = date_elem.get_text(strip=True) if date_elem else None
|
|
468
|
+
|
|
469
|
+
if title_elem:
|
|
470
|
+
results.append(NewsResult(
|
|
471
|
+
title=title_elem.get_text(strip=True),
|
|
472
|
+
url=title_elem.get("href", ""),
|
|
473
|
+
snippet=snippet_elem.get_text(strip=True) if snippet_elem else "",
|
|
474
|
+
source="DuckDuckGo",
|
|
475
|
+
date=date_text,
|
|
476
|
+
))
|
|
477
|
+
except Exception as e:
|
|
478
|
+
logger.error(f"Failed to parse news: {e}")
|
|
479
|
+
|
|
480
|
+
return results
|
|
481
|
+
|
|
482
|
+
# ==================== 学术搜索 ====================
|
|
483
|
+
|
|
484
|
+
async def academic_search(
|
|
485
|
+
self,
|
|
486
|
+
query: str,
|
|
487
|
+
count: int = 10,
|
|
488
|
+
domain: Optional[str] = None,
|
|
489
|
+
) -> dict:
|
|
490
|
+
"""
|
|
491
|
+
学术搜索
|
|
492
|
+
|
|
493
|
+
Args:
|
|
494
|
+
query: 搜索关键词
|
|
495
|
+
count: 返回数量
|
|
496
|
+
domain: 领域筛选 (cs/econ/fin)
|
|
497
|
+
|
|
498
|
+
Returns:
|
|
499
|
+
dict 学术论文/研报列表
|
|
500
|
+
"""
|
|
501
|
+
logger.info(f"Academic search: {query} (domain={domain})")
|
|
502
|
+
|
|
503
|
+
cache_key = f"academic:{query}:{domain}"
|
|
504
|
+
cached = self._get_cached(cache_key)
|
|
505
|
+
if cached:
|
|
506
|
+
return cached
|
|
507
|
+
|
|
508
|
+
results = []
|
|
509
|
+
|
|
510
|
+
# 尝试 Google Scholar (通过 SerpAPI)
|
|
511
|
+
if self.config.get("serpapi_key"):
|
|
512
|
+
scholar_results = await self._serpapi_scholar(query, count)
|
|
513
|
+
results.extend(scholar_results)
|
|
514
|
+
|
|
515
|
+
# 尝试 Semantic Scholar (免费)
|
|
516
|
+
ss_results = await self._semantic_scholar(query, count)
|
|
517
|
+
results.extend(ss_results)
|
|
518
|
+
|
|
519
|
+
# 去重
|
|
520
|
+
seen_urls = set()
|
|
521
|
+
unique_results = []
|
|
522
|
+
for r in results:
|
|
523
|
+
if r["url"] not in seen_urls:
|
|
524
|
+
seen_urls.add(r["url"])
|
|
525
|
+
unique_results.append(r)
|
|
526
|
+
|
|
527
|
+
response = {
|
|
528
|
+
"query": query,
|
|
529
|
+
"domain": domain,
|
|
530
|
+
"papers": unique_results[:count],
|
|
531
|
+
"total": len(unique_results),
|
|
532
|
+
"timestamp": datetime.now().isoformat(),
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
self._set_cached(cache_key, response, ttl=3600) # 学术结果缓存1小时
|
|
536
|
+
return response
|
|
537
|
+
|
|
538
|
+
async def _semantic_scholar(
|
|
539
|
+
self,
|
|
540
|
+
query: str,
|
|
541
|
+
count: int,
|
|
542
|
+
) -> List[AcademicResult]:
|
|
543
|
+
"""Semantic Scholar 免费学术搜索"""
|
|
544
|
+
import urllib.parse
|
|
545
|
+
|
|
546
|
+
params = {
|
|
547
|
+
"query": query,
|
|
548
|
+
"limit": count,
|
|
549
|
+
"fields": "title,authors,abstract,year,venue,openAccessPdf",
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
url = f"https://api.semanticscholar.org/graph/v1/paper/search?" + urllib.parse.urlencode(params)
|
|
553
|
+
|
|
554
|
+
headers = {
|
|
555
|
+
"Accept": "application/json",
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
try:
|
|
559
|
+
async with aiohttp.ClientSession() as session:
|
|
560
|
+
async with session.get(
|
|
561
|
+
url,
|
|
562
|
+
headers=headers,
|
|
563
|
+
timeout=aiohttp.ClientTimeout(total=10),
|
|
564
|
+
) as resp:
|
|
565
|
+
data = await resp.json()
|
|
566
|
+
|
|
567
|
+
results = []
|
|
568
|
+
for paper in data.get("data", []):
|
|
569
|
+
authors = [a.get("name", "") for a in paper.get("authors", [])]
|
|
570
|
+
|
|
571
|
+
results.append(AcademicResult(
|
|
572
|
+
title=paper.get("title", ""),
|
|
573
|
+
url=f"https://www.semanticscholar.org/paper/{paper.get('paperId', '')}",
|
|
574
|
+
authors=authors[:5], # 最多5个作者
|
|
575
|
+
abstract=paper.get("abstract", "")[:500],
|
|
576
|
+
year=paper.get("year"),
|
|
577
|
+
venue=paper.get("venue"),
|
|
578
|
+
))
|
|
579
|
+
return results
|
|
580
|
+
|
|
581
|
+
except Exception as e:
|
|
582
|
+
logger.error(f"Semantic Scholar failed: {e}")
|
|
583
|
+
return []
|
|
584
|
+
|
|
585
|
+
async def _serpapi_scholar(
|
|
586
|
+
self,
|
|
587
|
+
query: str,
|
|
588
|
+
count: int,
|
|
589
|
+
) -> List[AcademicResult]:
|
|
590
|
+
"""SerpAPI Google Scholar 搜索"""
|
|
591
|
+
api_key = self.config.get("serpapi_key")
|
|
592
|
+
if not api_key:
|
|
593
|
+
return []
|
|
594
|
+
|
|
595
|
+
params = {
|
|
596
|
+
"q": query,
|
|
597
|
+
"api_key": api_key,
|
|
598
|
+
"engine": "google_scholar",
|
|
599
|
+
"num": count,
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
try:
|
|
603
|
+
async with aiohttp.ClientSession() as session:
|
|
604
|
+
async with session.get(
|
|
605
|
+
"https://serpapi.com/search",
|
|
606
|
+
params=params,
|
|
607
|
+
timeout=aiohttp.ClientTimeout(total=15),
|
|
608
|
+
) as resp:
|
|
609
|
+
data = await resp.json()
|
|
610
|
+
|
|
611
|
+
results = []
|
|
612
|
+
for item in data.get("organic_results", []):
|
|
613
|
+
results.append(AcademicResult(
|
|
614
|
+
title=item.get("title", ""),
|
|
615
|
+
url=item.get("link", ""),
|
|
616
|
+
authors=[item.get("publication_info", {}).get("authors", [])],
|
|
617
|
+
abstract=item.get("snippet", ""),
|
|
618
|
+
year=None,
|
|
619
|
+
venue=item.get("publication_info", {}).get("summary", None),
|
|
620
|
+
))
|
|
621
|
+
return results
|
|
622
|
+
|
|
623
|
+
except Exception as e:
|
|
624
|
+
logger.error(f"SerpAPI Scholar failed: {e}")
|
|
625
|
+
return []
|
|
626
|
+
|
|
627
|
+
# ==================== 公司信息 ====================
|
|
628
|
+
|
|
629
|
+
async def company_search(self, name: str) -> dict:
|
|
630
|
+
"""
|
|
631
|
+
公司信息搜索
|
|
632
|
+
|
|
633
|
+
Args:
|
|
634
|
+
name: 公司名称或股票代码
|
|
635
|
+
|
|
636
|
+
Returns:
|
|
637
|
+
dict 公司基本信息、财务数据、新闻
|
|
638
|
+
"""
|
|
639
|
+
logger.info(f"Company search: {name}")
|
|
640
|
+
|
|
641
|
+
cache_key = f"company:{name}"
|
|
642
|
+
cached = self._get_cached(cache_key)
|
|
643
|
+
if cached:
|
|
644
|
+
return cached
|
|
645
|
+
|
|
646
|
+
# 尝试多个数据源
|
|
647
|
+
info = await self._search_company_basic(name)
|
|
648
|
+
|
|
649
|
+
response = {
|
|
650
|
+
"query": name,
|
|
651
|
+
"info": info,
|
|
652
|
+
"timestamp": datetime.now().isoformat(),
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
self._set_cached(cache_key, response, ttl=3600) # 1小时缓存
|
|
656
|
+
return response
|
|
657
|
+
|
|
658
|
+
async def _search_company_basic(self, name: str) -> CompanyInfo:
|
|
659
|
+
"""搜索公司基本信息"""
|
|
660
|
+
# 使用 DuckDuckGo 搜索公司信息
|
|
661
|
+
results = await self._duckduckgo_search(f"{name} 公司 简介", "zh", 5)
|
|
662
|
+
|
|
663
|
+
if results:
|
|
664
|
+
first_result = results[0]
|
|
665
|
+
return CompanyInfo(
|
|
666
|
+
name=name,
|
|
667
|
+
ticker="",
|
|
668
|
+
exchange="",
|
|
669
|
+
industry="",
|
|
670
|
+
sector=None,
|
|
671
|
+
market_cap=0.0,
|
|
672
|
+
pe_ratio=None,
|
|
673
|
+
description=first_result.get("snippet", ""),
|
|
674
|
+
website=None,
|
|
675
|
+
headquarters=None,
|
|
676
|
+
)
|
|
677
|
+
|
|
678
|
+
return CompanyInfo(
|
|
679
|
+
name=name,
|
|
680
|
+
ticker="",
|
|
681
|
+
exchange="",
|
|
682
|
+
industry="",
|
|
683
|
+
sector=None,
|
|
684
|
+
market_cap=0.0,
|
|
685
|
+
pe_ratio=None,
|
|
686
|
+
description="",
|
|
687
|
+
website=None,
|
|
688
|
+
headquarters=None,
|
|
689
|
+
)
|
|
690
|
+
|
|
691
|
+
# ==================== 缓存管理 ====================
|
|
692
|
+
|
|
693
|
+
def _get_cached(self, key: str) -> Optional[dict]:
|
|
694
|
+
"""获取缓存"""
|
|
695
|
+
import time
|
|
696
|
+
if key in self._cache:
|
|
697
|
+
entry = self._cache[key]
|
|
698
|
+
if time.time() - entry["time"] < entry["ttl"]:
|
|
699
|
+
return entry["data"]
|
|
700
|
+
else:
|
|
701
|
+
del self._cache[key]
|
|
702
|
+
return None
|
|
703
|
+
|
|
704
|
+
def _set_cached(self, key: str, data: dict, ttl: Optional[int] = None) -> None:
|
|
705
|
+
"""设置缓存"""
|
|
706
|
+
import time
|
|
707
|
+
self._cache[key] = {
|
|
708
|
+
"data": data,
|
|
709
|
+
"time": time.time(),
|
|
710
|
+
"ttl": ttl or self._cache_ttl,
|
|
711
|
+
}
|
|
712
|
+
# 限制缓存大小
|
|
713
|
+
if len(self._cache) > 1000:
|
|
714
|
+
self._cleanup_cache()
|
|
715
|
+
|
|
716
|
+
def _cleanup_cache(self) -> None:
|
|
717
|
+
"""清理过期缓存"""
|
|
718
|
+
import time
|
|
719
|
+
now = time.time()
|
|
720
|
+
expired = [k for k, v in self._cache.items() if now - v["time"] >= v["ttl"]]
|
|
721
|
+
for k in expired:
|
|
722
|
+
del self._cache[k]
|
|
723
|
+
|
|
724
|
+
# ==================== 财经新闻快捷方法 ====================
|
|
725
|
+
|
|
726
|
+
async def get_financial_news(
|
|
727
|
+
self,
|
|
728
|
+
tickers: Optional[list[str]] = None,
|
|
729
|
+
days: int = 7,
|
|
730
|
+
) -> dict:
|
|
731
|
+
"""
|
|
732
|
+
获取财经新闻
|
|
733
|
+
|
|
734
|
+
Args:
|
|
735
|
+
tickers: 关注的股票代码列表
|
|
736
|
+
days: 最近天数
|
|
737
|
+
|
|
738
|
+
Returns:
|
|
739
|
+
dict 新闻列表
|
|
740
|
+
"""
|
|
741
|
+
query = " ".join(tickers) if tickers else "股票 财经"
|
|
742
|
+
return await self.news_search(query, days=days, language="zh")
|
|
743
|
+
|
|
744
|
+
async def get_macro_news(
|
|
745
|
+
self,
|
|
746
|
+
keywords: Optional[list[str]] = None,
|
|
747
|
+
days: int = 7,
|
|
748
|
+
) -> dict:
|
|
749
|
+
"""
|
|
750
|
+
获取宏观新闻
|
|
751
|
+
|
|
752
|
+
Args:
|
|
753
|
+
keywords: 关键词列表
|
|
754
|
+
days: 最近天数
|
|
755
|
+
|
|
756
|
+
Returns:
|
|
757
|
+
dict 宏观新闻列表
|
|
758
|
+
"""
|
|
759
|
+
query = " ".join(keywords) if keywords else "宏观经济 货币政策"
|
|
760
|
+
return await self.news_search(query, days=days, language="zh")
|
|
761
|
+
|
|
762
|
+
async def get_industry_news(
|
|
763
|
+
self,
|
|
764
|
+
industry: str,
|
|
765
|
+
days: int = 7,
|
|
766
|
+
) -> dict:
|
|
767
|
+
"""
|
|
768
|
+
获取行业新闻
|
|
769
|
+
|
|
770
|
+
Args:
|
|
771
|
+
industry: 行业名称
|
|
772
|
+
days: 最近天数
|
|
773
|
+
|
|
774
|
+
Returns:
|
|
775
|
+
dict 行业新闻
|
|
776
|
+
"""
|
|
777
|
+
return await self.news_search(f"{industry}行业 动态", days=days, language="zh")
|