botrun-flow-lang 5.12.263__py3-none-any.whl → 6.2.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- botrun_flow_lang/api/auth_api.py +39 -39
- botrun_flow_lang/api/auth_utils.py +183 -183
- botrun_flow_lang/api/botrun_back_api.py +65 -65
- botrun_flow_lang/api/flow_api.py +3 -3
- botrun_flow_lang/api/hatch_api.py +508 -508
- botrun_flow_lang/api/langgraph_api.py +816 -811
- botrun_flow_lang/api/langgraph_constants.py +11 -0
- botrun_flow_lang/api/line_bot_api.py +1484 -1484
- botrun_flow_lang/api/model_api.py +300 -300
- botrun_flow_lang/api/rate_limit_api.py +32 -32
- botrun_flow_lang/api/routes.py +79 -79
- botrun_flow_lang/api/search_api.py +53 -53
- botrun_flow_lang/api/storage_api.py +395 -395
- botrun_flow_lang/api/subsidy_api.py +290 -290
- botrun_flow_lang/api/subsidy_api_system_prompt.txt +109 -109
- botrun_flow_lang/api/user_setting_api.py +70 -70
- botrun_flow_lang/api/version_api.py +31 -31
- botrun_flow_lang/api/youtube_api.py +26 -26
- botrun_flow_lang/constants.py +13 -13
- botrun_flow_lang/langgraph_agents/agents/agent_runner.py +178 -178
- botrun_flow_lang/langgraph_agents/agents/agent_tools/step_planner.py +77 -77
- botrun_flow_lang/langgraph_agents/agents/checkpointer/firestore_checkpointer.py +666 -666
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/GOV_RESEARCHER_PRD.md +192 -192
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/gemini_subsidy_graph.py +460 -460
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_2_graph.py +1002 -1002
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_graph.py +822 -822
- botrun_flow_lang/langgraph_agents/agents/langgraph_react_agent.py +730 -723
- botrun_flow_lang/langgraph_agents/agents/search_agent_graph.py +864 -864
- botrun_flow_lang/langgraph_agents/agents/tools/__init__.py +4 -4
- botrun_flow_lang/langgraph_agents/agents/tools/gemini_code_execution.py +376 -376
- botrun_flow_lang/langgraph_agents/agents/util/gemini_grounding.py +66 -66
- botrun_flow_lang/langgraph_agents/agents/util/html_util.py +316 -316
- botrun_flow_lang/langgraph_agents/agents/util/img_util.py +336 -294
- botrun_flow_lang/langgraph_agents/agents/util/local_files.py +419 -419
- botrun_flow_lang/langgraph_agents/agents/util/mermaid_util.py +86 -86
- botrun_flow_lang/langgraph_agents/agents/util/model_utils.py +143 -143
- botrun_flow_lang/langgraph_agents/agents/util/pdf_analyzer.py +562 -486
- botrun_flow_lang/langgraph_agents/agents/util/pdf_cache.py +250 -250
- botrun_flow_lang/langgraph_agents/agents/util/pdf_processor.py +204 -204
- botrun_flow_lang/langgraph_agents/agents/util/perplexity_search.py +464 -464
- botrun_flow_lang/langgraph_agents/agents/util/plotly_util.py +59 -59
- botrun_flow_lang/langgraph_agents/agents/util/tavily_search.py +199 -199
- botrun_flow_lang/langgraph_agents/agents/util/usage_metadata.py +34 -0
- botrun_flow_lang/langgraph_agents/agents/util/youtube_util.py +90 -90
- botrun_flow_lang/langgraph_agents/cache/langgraph_botrun_cache.py +197 -197
- botrun_flow_lang/llm_agent/llm_agent.py +19 -19
- botrun_flow_lang/llm_agent/llm_agent_util.py +83 -83
- botrun_flow_lang/log/.gitignore +2 -2
- botrun_flow_lang/main.py +61 -61
- botrun_flow_lang/main_fast.py +51 -51
- botrun_flow_lang/mcp_server/__init__.py +10 -10
- botrun_flow_lang/mcp_server/default_mcp.py +854 -744
- botrun_flow_lang/models/nodes/utils.py +205 -205
- botrun_flow_lang/models/token_usage.py +34 -34
- botrun_flow_lang/requirements.txt +21 -21
- botrun_flow_lang/services/base/firestore_base.py +30 -30
- botrun_flow_lang/services/hatch/hatch_factory.py +11 -11
- botrun_flow_lang/services/hatch/hatch_fs_store.py +419 -419
- botrun_flow_lang/services/storage/storage_cs_store.py +206 -206
- botrun_flow_lang/services/storage/storage_factory.py +12 -12
- botrun_flow_lang/services/storage/storage_store.py +65 -65
- botrun_flow_lang/services/user_setting/user_setting_factory.py +9 -9
- botrun_flow_lang/services/user_setting/user_setting_fs_store.py +66 -66
- botrun_flow_lang/static/docs/tools/index.html +926 -926
- botrun_flow_lang/tests/api_functional_tests.py +1525 -1525
- botrun_flow_lang/tests/api_stress_test.py +357 -357
- botrun_flow_lang/tests/shared_hatch_tests.py +333 -333
- botrun_flow_lang/tests/test_botrun_app.py +46 -46
- botrun_flow_lang/tests/test_html_util.py +31 -31
- botrun_flow_lang/tests/test_img_analyzer.py +190 -190
- botrun_flow_lang/tests/test_img_util.py +39 -39
- botrun_flow_lang/tests/test_local_files.py +114 -114
- botrun_flow_lang/tests/test_mermaid_util.py +103 -103
- botrun_flow_lang/tests/test_pdf_analyzer.py +104 -104
- botrun_flow_lang/tests/test_plotly_util.py +151 -151
- botrun_flow_lang/tests/test_run_workflow_engine.py +65 -65
- botrun_flow_lang/tools/generate_docs.py +133 -133
- botrun_flow_lang/tools/templates/tools.html +153 -153
- botrun_flow_lang/utils/__init__.py +7 -7
- botrun_flow_lang/utils/botrun_logger.py +344 -344
- botrun_flow_lang/utils/clients/rate_limit_client.py +209 -209
- botrun_flow_lang/utils/clients/token_verify_client.py +153 -153
- botrun_flow_lang/utils/google_drive_utils.py +654 -654
- botrun_flow_lang/utils/langchain_utils.py +324 -324
- botrun_flow_lang/utils/yaml_utils.py +9 -9
- {botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-6.2.21.dist-info}/METADATA +6 -6
- botrun_flow_lang-6.2.21.dist-info/RECORD +104 -0
- botrun_flow_lang-5.12.263.dist-info/RECORD +0 -102
- {botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-6.2.21.dist-info}/WHEEL +0 -0
|
@@ -1,59 +1,59 @@
|
|
|
1
|
-
import plotly.graph_objects as go
|
|
2
|
-
import os
|
|
3
|
-
from tempfile import NamedTemporaryFile
|
|
4
|
-
from typing import Dict, Any, Optional
|
|
5
|
-
from .local_files import upload_and_get_tmp_public_url, upload_html_and_get_public_url
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
async def generate_plotly_files(
|
|
9
|
-
figure_data: Dict[str, Any],
|
|
10
|
-
botrun_flow_lang_url: str,
|
|
11
|
-
user_id: str,
|
|
12
|
-
title: Optional[str] = None,
|
|
13
|
-
) -> str:
|
|
14
|
-
"""
|
|
15
|
-
Generate plotly HTML file from figure data and upload it to GCS.
|
|
16
|
-
|
|
17
|
-
Args:
|
|
18
|
-
figure_data: Dictionary containing plotly figure data and layout
|
|
19
|
-
botrun_flow_lang_url: URL for the botrun flow lang API
|
|
20
|
-
user_id: User ID for file upload
|
|
21
|
-
title: Optional title for the plot
|
|
22
|
-
|
|
23
|
-
Returns:
|
|
24
|
-
str: URL for the HTML file or error message starting with "Error: "
|
|
25
|
-
"""
|
|
26
|
-
try:
|
|
27
|
-
# Create plotly figure from data and layout
|
|
28
|
-
data = figure_data.get("data", [])
|
|
29
|
-
layout = figure_data.get("layout", {})
|
|
30
|
-
|
|
31
|
-
# If title is provided, add it to the layout before creating the figure
|
|
32
|
-
if title:
|
|
33
|
-
layout["title"] = {"text": title}
|
|
34
|
-
|
|
35
|
-
# Create figure with data and layout
|
|
36
|
-
fig = go.Figure(data=data, layout=layout)
|
|
37
|
-
|
|
38
|
-
except Exception as e:
|
|
39
|
-
return f"Error: {str(e)}"
|
|
40
|
-
|
|
41
|
-
# Create temporary file
|
|
42
|
-
with NamedTemporaryFile(suffix=".html", delete=False) as html_temp:
|
|
43
|
-
try:
|
|
44
|
-
# Save HTML with plotly.js included
|
|
45
|
-
fig.write_html(html_temp.name, include_plotlyjs=True, full_html=True)
|
|
46
|
-
|
|
47
|
-
# Upload file to GCS
|
|
48
|
-
html_url = await upload_html_and_get_public_url(
|
|
49
|
-
html_temp.name, botrun_flow_lang_url, user_id
|
|
50
|
-
)
|
|
51
|
-
|
|
52
|
-
# Clean up temporary file
|
|
53
|
-
os.unlink(html_temp.name)
|
|
54
|
-
|
|
55
|
-
return html_url
|
|
56
|
-
except Exception as e:
|
|
57
|
-
# Clean up temporary file in case of error
|
|
58
|
-
os.unlink(html_temp.name)
|
|
59
|
-
return f"Error: {str(e)}"
|
|
1
|
+
import plotly.graph_objects as go
|
|
2
|
+
import os
|
|
3
|
+
from tempfile import NamedTemporaryFile
|
|
4
|
+
from typing import Dict, Any, Optional
|
|
5
|
+
from .local_files import upload_and_get_tmp_public_url, upload_html_and_get_public_url
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
async def generate_plotly_files(
|
|
9
|
+
figure_data: Dict[str, Any],
|
|
10
|
+
botrun_flow_lang_url: str,
|
|
11
|
+
user_id: str,
|
|
12
|
+
title: Optional[str] = None,
|
|
13
|
+
) -> str:
|
|
14
|
+
"""
|
|
15
|
+
Generate plotly HTML file from figure data and upload it to GCS.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
figure_data: Dictionary containing plotly figure data and layout
|
|
19
|
+
botrun_flow_lang_url: URL for the botrun flow lang API
|
|
20
|
+
user_id: User ID for file upload
|
|
21
|
+
title: Optional title for the plot
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
str: URL for the HTML file or error message starting with "Error: "
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
# Create plotly figure from data and layout
|
|
28
|
+
data = figure_data.get("data", [])
|
|
29
|
+
layout = figure_data.get("layout", {})
|
|
30
|
+
|
|
31
|
+
# If title is provided, add it to the layout before creating the figure
|
|
32
|
+
if title:
|
|
33
|
+
layout["title"] = {"text": title}
|
|
34
|
+
|
|
35
|
+
# Create figure with data and layout
|
|
36
|
+
fig = go.Figure(data=data, layout=layout)
|
|
37
|
+
|
|
38
|
+
except Exception as e:
|
|
39
|
+
return f"Error: {str(e)}"
|
|
40
|
+
|
|
41
|
+
# Create temporary file
|
|
42
|
+
with NamedTemporaryFile(suffix=".html", delete=False) as html_temp:
|
|
43
|
+
try:
|
|
44
|
+
# Save HTML with plotly.js included
|
|
45
|
+
fig.write_html(html_temp.name, include_plotlyjs=True, full_html=True)
|
|
46
|
+
|
|
47
|
+
# Upload file to GCS
|
|
48
|
+
html_url = await upload_html_and_get_public_url(
|
|
49
|
+
html_temp.name, botrun_flow_lang_url, user_id
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# Clean up temporary file
|
|
53
|
+
os.unlink(html_temp.name)
|
|
54
|
+
|
|
55
|
+
return html_url
|
|
56
|
+
except Exception as e:
|
|
57
|
+
# Clean up temporary file in case of error
|
|
58
|
+
os.unlink(html_temp.name)
|
|
59
|
+
return f"Error: {str(e)}"
|
|
@@ -1,199 +1,199 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Tavily search utility for government research
|
|
3
|
-
參考 open_deep_research 的最佳實踐
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import os
|
|
7
|
-
import logging
|
|
8
|
-
from typing import List, Dict, Any, AsyncGenerator
|
|
9
|
-
from dataclasses import dataclass
|
|
10
|
-
|
|
11
|
-
try:
|
|
12
|
-
from tavily import TavilyClient, AsyncTavilyClient
|
|
13
|
-
except ImportError:
|
|
14
|
-
TavilyClient = None
|
|
15
|
-
AsyncTavilyClient = None
|
|
16
|
-
|
|
17
|
-
logger = logging.getLogger(__name__)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
@dataclass
|
|
21
|
-
class SearchEvent:
|
|
22
|
-
"""搜尋事件結構,與 perplexity_search 保持一致"""
|
|
23
|
-
|
|
24
|
-
chunk: str
|
|
25
|
-
raw_json: Dict[str, Any] = None
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
async def respond_with_tavily_search(
|
|
29
|
-
query: str,
|
|
30
|
-
user_prompt_prefix: str = "",
|
|
31
|
-
messages: List[Dict[str, str]] = None,
|
|
32
|
-
domain_filter: List[str] = None,
|
|
33
|
-
stream: bool = False,
|
|
34
|
-
model_name: str = "tavily",
|
|
35
|
-
) -> AsyncGenerator[SearchEvent, None]:
|
|
36
|
-
"""
|
|
37
|
-
使用 Tavily 進行搜尋(參考 open_deep_research 實作)
|
|
38
|
-
|
|
39
|
-
Args:
|
|
40
|
-
query: 搜尋查詢
|
|
41
|
-
user_prompt_prefix: 用戶提示前綴(保持與 perplexity 一致的介面)
|
|
42
|
-
messages: 訊息列表(保持介面一致,但 Tavily 只使用 query)
|
|
43
|
-
domain_filter: 領域過濾
|
|
44
|
-
stream: 是否串流(Tavily 不支援,但保持介面一致)
|
|
45
|
-
model_name: 模型名稱(保持介面一致)
|
|
46
|
-
|
|
47
|
-
Yields:
|
|
48
|
-
SearchEvent: 搜尋事件
|
|
49
|
-
"""
|
|
50
|
-
|
|
51
|
-
if AsyncTavilyClient is None:
|
|
52
|
-
logger.error("Tavily client not available. Please install tavily-python")
|
|
53
|
-
yield SearchEvent(
|
|
54
|
-
chunk="錯誤:Tavily 客戶端未安裝",
|
|
55
|
-
raw_json={"error": "tavily-python not installed"},
|
|
56
|
-
)
|
|
57
|
-
return
|
|
58
|
-
|
|
59
|
-
api_key = os.getenv("TAVILY_API_KEY")
|
|
60
|
-
if not api_key:
|
|
61
|
-
logger.error("TAVILY_API_KEY not found in environment variables")
|
|
62
|
-
yield SearchEvent(
|
|
63
|
-
chunk="錯誤:未設定 TAVILY_API_KEY",
|
|
64
|
-
raw_json={"error": "TAVILY_API_KEY not set"},
|
|
65
|
-
)
|
|
66
|
-
return
|
|
67
|
-
|
|
68
|
-
try:
|
|
69
|
-
# 使用 AsyncTavilyClient 進行搜尋(現代寫法)
|
|
70
|
-
logger.info(f"使用 Tavily 搜尋: {query}")
|
|
71
|
-
|
|
72
|
-
# 初始化 async client
|
|
73
|
-
async_client = AsyncTavilyClient(api_key=api_key)
|
|
74
|
-
|
|
75
|
-
# 準備搜尋參數
|
|
76
|
-
search_params = {
|
|
77
|
-
"query": query,
|
|
78
|
-
"search_depth": "advanced",
|
|
79
|
-
# "include_raw_content": True,
|
|
80
|
-
# "include_answer": "advanced",
|
|
81
|
-
"max_results": 5,
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
# 如果有 domain filter,加入參數
|
|
85
|
-
if domain_filter:
|
|
86
|
-
search_params["include_domains"] = domain_filter
|
|
87
|
-
|
|
88
|
-
# 執行 async 搜尋
|
|
89
|
-
response = await async_client.search(**search_params)
|
|
90
|
-
|
|
91
|
-
# 處理搜尋結果(參考 open_deep_research 格式化)
|
|
92
|
-
if response and "results" in response and len(response["results"]) > 0:
|
|
93
|
-
content_parts = []
|
|
94
|
-
sources = []
|
|
95
|
-
|
|
96
|
-
# 去重複 URL(參考 open_deep_research)
|
|
97
|
-
seen_urls = set()
|
|
98
|
-
unique_results = []
|
|
99
|
-
for result in response["results"]:
|
|
100
|
-
url = result.get("url", "")
|
|
101
|
-
if url and url not in seen_urls:
|
|
102
|
-
seen_urls.add(url)
|
|
103
|
-
unique_results.append(result)
|
|
104
|
-
|
|
105
|
-
# 格式化結果
|
|
106
|
-
for i, result in enumerate(unique_results[:5]): # 限制結果數量
|
|
107
|
-
title = result.get("title", "")
|
|
108
|
-
content = result.get("content", "")
|
|
109
|
-
raw_content = result.get("raw_content", "")
|
|
110
|
-
url = result.get("url", "")
|
|
111
|
-
|
|
112
|
-
# 優先使用 raw_content,回退到 content
|
|
113
|
-
# display_content = raw_content if content else raw_content
|
|
114
|
-
|
|
115
|
-
if title and content:
|
|
116
|
-
# 限制每個結果的長度(參考 open_deep_research)
|
|
117
|
-
# if len(display_content) > 1000:
|
|
118
|
-
# display_content = display_content[:1000] + "..."
|
|
119
|
-
|
|
120
|
-
content_parts.append(
|
|
121
|
-
f"[{i+1}] Title: {title}\nContent: {content}\nURL: {url}"
|
|
122
|
-
)
|
|
123
|
-
sources.append(url)
|
|
124
|
-
|
|
125
|
-
# 組合最終內容
|
|
126
|
-
full_content = "\n\n".join(content_parts)
|
|
127
|
-
|
|
128
|
-
# 限制總長度(參考 open_deep_research 的 30000 字元限制)
|
|
129
|
-
# if len(full_content) > 10000: # 適合政府研究的長度
|
|
130
|
-
# full_content = full_content[:10000] + "\n\n[內容已截斷以符合長度限制]"
|
|
131
|
-
|
|
132
|
-
# 構建回應 JSON(與 Perplexity 格式保持一致)
|
|
133
|
-
response_json = {
|
|
134
|
-
"message": {"content": full_content},
|
|
135
|
-
"usage": {
|
|
136
|
-
"prompt_tokens": len(query.split()),
|
|
137
|
-
"completion_tokens": len(full_content.split()),
|
|
138
|
-
"total_tokens": len(query.split()) + len(full_content.split()),
|
|
139
|
-
},
|
|
140
|
-
"model": "tavily-search",
|
|
141
|
-
"sources": sources,
|
|
142
|
-
"results_count": len(unique_results),
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
yield SearchEvent(chunk=full_content, raw_json=response_json)
|
|
146
|
-
|
|
147
|
-
else:
|
|
148
|
-
logger.warning("Tavily 搜尋回應中沒有結果")
|
|
149
|
-
yield SearchEvent(
|
|
150
|
-
chunk="未找到搜尋結果", raw_json={"error": "no results found"}
|
|
151
|
-
)
|
|
152
|
-
|
|
153
|
-
except Exception as e:
|
|
154
|
-
logger.error(f"Tavily 搜尋失敗: {e}")
|
|
155
|
-
yield SearchEvent(chunk=f"搜尋失敗: {str(e)}", raw_json={"error": str(e)})
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
# 為了保持一致性,提供同步版本
|
|
159
|
-
def search_with_tavily(
|
|
160
|
-
query: str, domain_filter: List[str] = None, max_results: int = 5
|
|
161
|
-
) -> Dict[str, Any]:
|
|
162
|
-
"""
|
|
163
|
-
同步版本的 Tavily 搜尋(參考 open_deep_research)
|
|
164
|
-
|
|
165
|
-
Args:
|
|
166
|
-
query: 搜尋查詢
|
|
167
|
-
domain_filter: 領域過濾
|
|
168
|
-
max_results: 最大結果數量
|
|
169
|
-
|
|
170
|
-
Returns:
|
|
171
|
-
搜尋結果字典
|
|
172
|
-
"""
|
|
173
|
-
|
|
174
|
-
if TavilyClient is None:
|
|
175
|
-
return {"error": "tavily-python not installed"}
|
|
176
|
-
|
|
177
|
-
api_key = os.getenv("TAVILY_API_KEY")
|
|
178
|
-
if not api_key:
|
|
179
|
-
return {"error": "TAVILY_API_KEY not set"}
|
|
180
|
-
|
|
181
|
-
try:
|
|
182
|
-
client = TavilyClient(api_key=api_key)
|
|
183
|
-
|
|
184
|
-
search_params = {
|
|
185
|
-
"query": query,
|
|
186
|
-
"search_depth": "advanced",
|
|
187
|
-
"include_raw_content": True, # 參考 open_deep_research
|
|
188
|
-
"include_domains": domain_filter if domain_filter else None,
|
|
189
|
-
"max_results": max_results,
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
search_params = {k: v for k, v in search_params.items() if v is not None}
|
|
193
|
-
|
|
194
|
-
response = client.search(**search_params)
|
|
195
|
-
return response
|
|
196
|
-
|
|
197
|
-
except Exception as e:
|
|
198
|
-
logger.error(f"Tavily 搜尋失敗: {e}")
|
|
199
|
-
return {"error": str(e)}
|
|
1
|
+
"""
|
|
2
|
+
Tavily search utility for government research
|
|
3
|
+
參考 open_deep_research 的最佳實踐
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import logging
|
|
8
|
+
from typing import List, Dict, Any, AsyncGenerator
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
from tavily import TavilyClient, AsyncTavilyClient
|
|
13
|
+
except ImportError:
|
|
14
|
+
TavilyClient = None
|
|
15
|
+
AsyncTavilyClient = None
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class SearchEvent:
|
|
22
|
+
"""搜尋事件結構,與 perplexity_search 保持一致"""
|
|
23
|
+
|
|
24
|
+
chunk: str
|
|
25
|
+
raw_json: Dict[str, Any] = None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
async def respond_with_tavily_search(
|
|
29
|
+
query: str,
|
|
30
|
+
user_prompt_prefix: str = "",
|
|
31
|
+
messages: List[Dict[str, str]] = None,
|
|
32
|
+
domain_filter: List[str] = None,
|
|
33
|
+
stream: bool = False,
|
|
34
|
+
model_name: str = "tavily",
|
|
35
|
+
) -> AsyncGenerator[SearchEvent, None]:
|
|
36
|
+
"""
|
|
37
|
+
使用 Tavily 進行搜尋(參考 open_deep_research 實作)
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
query: 搜尋查詢
|
|
41
|
+
user_prompt_prefix: 用戶提示前綴(保持與 perplexity 一致的介面)
|
|
42
|
+
messages: 訊息列表(保持介面一致,但 Tavily 只使用 query)
|
|
43
|
+
domain_filter: 領域過濾
|
|
44
|
+
stream: 是否串流(Tavily 不支援,但保持介面一致)
|
|
45
|
+
model_name: 模型名稱(保持介面一致)
|
|
46
|
+
|
|
47
|
+
Yields:
|
|
48
|
+
SearchEvent: 搜尋事件
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
if AsyncTavilyClient is None:
|
|
52
|
+
logger.error("Tavily client not available. Please install tavily-python")
|
|
53
|
+
yield SearchEvent(
|
|
54
|
+
chunk="錯誤:Tavily 客戶端未安裝",
|
|
55
|
+
raw_json={"error": "tavily-python not installed"},
|
|
56
|
+
)
|
|
57
|
+
return
|
|
58
|
+
|
|
59
|
+
api_key = os.getenv("TAVILY_API_KEY")
|
|
60
|
+
if not api_key:
|
|
61
|
+
logger.error("TAVILY_API_KEY not found in environment variables")
|
|
62
|
+
yield SearchEvent(
|
|
63
|
+
chunk="錯誤:未設定 TAVILY_API_KEY",
|
|
64
|
+
raw_json={"error": "TAVILY_API_KEY not set"},
|
|
65
|
+
)
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
# 使用 AsyncTavilyClient 進行搜尋(現代寫法)
|
|
70
|
+
logger.info(f"使用 Tavily 搜尋: {query}")
|
|
71
|
+
|
|
72
|
+
# 初始化 async client
|
|
73
|
+
async_client = AsyncTavilyClient(api_key=api_key)
|
|
74
|
+
|
|
75
|
+
# 準備搜尋參數
|
|
76
|
+
search_params = {
|
|
77
|
+
"query": query,
|
|
78
|
+
"search_depth": "advanced",
|
|
79
|
+
# "include_raw_content": True,
|
|
80
|
+
# "include_answer": "advanced",
|
|
81
|
+
"max_results": 5,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
# 如果有 domain filter,加入參數
|
|
85
|
+
if domain_filter:
|
|
86
|
+
search_params["include_domains"] = domain_filter
|
|
87
|
+
|
|
88
|
+
# 執行 async 搜尋
|
|
89
|
+
response = await async_client.search(**search_params)
|
|
90
|
+
|
|
91
|
+
# 處理搜尋結果(參考 open_deep_research 格式化)
|
|
92
|
+
if response and "results" in response and len(response["results"]) > 0:
|
|
93
|
+
content_parts = []
|
|
94
|
+
sources = []
|
|
95
|
+
|
|
96
|
+
# 去重複 URL(參考 open_deep_research)
|
|
97
|
+
seen_urls = set()
|
|
98
|
+
unique_results = []
|
|
99
|
+
for result in response["results"]:
|
|
100
|
+
url = result.get("url", "")
|
|
101
|
+
if url and url not in seen_urls:
|
|
102
|
+
seen_urls.add(url)
|
|
103
|
+
unique_results.append(result)
|
|
104
|
+
|
|
105
|
+
# 格式化結果
|
|
106
|
+
for i, result in enumerate(unique_results[:5]): # 限制結果數量
|
|
107
|
+
title = result.get("title", "")
|
|
108
|
+
content = result.get("content", "")
|
|
109
|
+
raw_content = result.get("raw_content", "")
|
|
110
|
+
url = result.get("url", "")
|
|
111
|
+
|
|
112
|
+
# 優先使用 raw_content,回退到 content
|
|
113
|
+
# display_content = raw_content if content else raw_content
|
|
114
|
+
|
|
115
|
+
if title and content:
|
|
116
|
+
# 限制每個結果的長度(參考 open_deep_research)
|
|
117
|
+
# if len(display_content) > 1000:
|
|
118
|
+
# display_content = display_content[:1000] + "..."
|
|
119
|
+
|
|
120
|
+
content_parts.append(
|
|
121
|
+
f"[{i+1}] Title: {title}\nContent: {content}\nURL: {url}"
|
|
122
|
+
)
|
|
123
|
+
sources.append(url)
|
|
124
|
+
|
|
125
|
+
# 組合最終內容
|
|
126
|
+
full_content = "\n\n".join(content_parts)
|
|
127
|
+
|
|
128
|
+
# 限制總長度(參考 open_deep_research 的 30000 字元限制)
|
|
129
|
+
# if len(full_content) > 10000: # 適合政府研究的長度
|
|
130
|
+
# full_content = full_content[:10000] + "\n\n[內容已截斷以符合長度限制]"
|
|
131
|
+
|
|
132
|
+
# 構建回應 JSON(與 Perplexity 格式保持一致)
|
|
133
|
+
response_json = {
|
|
134
|
+
"message": {"content": full_content},
|
|
135
|
+
"usage": {
|
|
136
|
+
"prompt_tokens": len(query.split()),
|
|
137
|
+
"completion_tokens": len(full_content.split()),
|
|
138
|
+
"total_tokens": len(query.split()) + len(full_content.split()),
|
|
139
|
+
},
|
|
140
|
+
"model": "tavily-search",
|
|
141
|
+
"sources": sources,
|
|
142
|
+
"results_count": len(unique_results),
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
yield SearchEvent(chunk=full_content, raw_json=response_json)
|
|
146
|
+
|
|
147
|
+
else:
|
|
148
|
+
logger.warning("Tavily 搜尋回應中沒有結果")
|
|
149
|
+
yield SearchEvent(
|
|
150
|
+
chunk="未找到搜尋結果", raw_json={"error": "no results found"}
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
except Exception as e:
|
|
154
|
+
logger.error(f"Tavily 搜尋失敗: {e}")
|
|
155
|
+
yield SearchEvent(chunk=f"搜尋失敗: {str(e)}", raw_json={"error": str(e)})
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# 為了保持一致性,提供同步版本
|
|
159
|
+
def search_with_tavily(
|
|
160
|
+
query: str, domain_filter: List[str] = None, max_results: int = 5
|
|
161
|
+
) -> Dict[str, Any]:
|
|
162
|
+
"""
|
|
163
|
+
同步版本的 Tavily 搜尋(參考 open_deep_research)
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
query: 搜尋查詢
|
|
167
|
+
domain_filter: 領域過濾
|
|
168
|
+
max_results: 最大結果數量
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
搜尋結果字典
|
|
172
|
+
"""
|
|
173
|
+
|
|
174
|
+
if TavilyClient is None:
|
|
175
|
+
return {"error": "tavily-python not installed"}
|
|
176
|
+
|
|
177
|
+
api_key = os.getenv("TAVILY_API_KEY")
|
|
178
|
+
if not api_key:
|
|
179
|
+
return {"error": "TAVILY_API_KEY not set"}
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
client = TavilyClient(api_key=api_key)
|
|
183
|
+
|
|
184
|
+
search_params = {
|
|
185
|
+
"query": query,
|
|
186
|
+
"search_depth": "advanced",
|
|
187
|
+
"include_raw_content": True, # 參考 open_deep_research
|
|
188
|
+
"include_domains": domain_filter if domain_filter else None,
|
|
189
|
+
"max_results": max_results,
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
search_params = {k: v for k, v in search_params.items() if v is not None}
|
|
193
|
+
|
|
194
|
+
response = client.search(**search_params)
|
|
195
|
+
return response
|
|
196
|
+
|
|
197
|
+
except Exception as e:
|
|
198
|
+
logger.error(f"Tavily 搜尋失敗: {e}")
|
|
199
|
+
return {"error": str(e)}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Usage Metadata 模組
|
|
3
|
+
|
|
4
|
+
提供 LLM 呼叫的 token 使用量追蹤功能。
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, asdict
|
|
8
|
+
from typing import Dict, Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class UsageMetadata:
|
|
13
|
+
"""Token usage metadata that matches the expected parsing format."""
|
|
14
|
+
prompt_tokens: int = 0
|
|
15
|
+
completion_tokens: int = 0
|
|
16
|
+
total_tokens: int = 0
|
|
17
|
+
cache_creation_input_tokens: int = 0
|
|
18
|
+
cache_read_input_tokens: int = 0
|
|
19
|
+
model: str = ""
|
|
20
|
+
|
|
21
|
+
def __add__(self, other: "UsageMetadata") -> "UsageMetadata":
|
|
22
|
+
"""Combine two UsageMetadata objects."""
|
|
23
|
+
return UsageMetadata(
|
|
24
|
+
prompt_tokens=self.prompt_tokens + other.prompt_tokens,
|
|
25
|
+
completion_tokens=self.completion_tokens + other.completion_tokens,
|
|
26
|
+
total_tokens=self.total_tokens + other.total_tokens,
|
|
27
|
+
cache_creation_input_tokens=self.cache_creation_input_tokens + other.cache_creation_input_tokens,
|
|
28
|
+
cache_read_input_tokens=self.cache_read_input_tokens + other.cache_read_input_tokens,
|
|
29
|
+
model=self.model or other.model,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
33
|
+
"""Convert to dictionary."""
|
|
34
|
+
return asdict(self)
|