botrun-flow-lang 5.12.263__py3-none-any.whl → 5.12.264__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- botrun_flow_lang/api/auth_api.py +39 -39
- botrun_flow_lang/api/auth_utils.py +183 -183
- botrun_flow_lang/api/botrun_back_api.py +65 -65
- botrun_flow_lang/api/flow_api.py +3 -3
- botrun_flow_lang/api/hatch_api.py +508 -508
- botrun_flow_lang/api/langgraph_api.py +811 -811
- botrun_flow_lang/api/line_bot_api.py +1484 -1484
- botrun_flow_lang/api/model_api.py +300 -300
- botrun_flow_lang/api/rate_limit_api.py +32 -32
- botrun_flow_lang/api/routes.py +79 -79
- botrun_flow_lang/api/search_api.py +53 -53
- botrun_flow_lang/api/storage_api.py +395 -395
- botrun_flow_lang/api/subsidy_api.py +290 -290
- botrun_flow_lang/api/subsidy_api_system_prompt.txt +109 -109
- botrun_flow_lang/api/user_setting_api.py +70 -70
- botrun_flow_lang/api/version_api.py +31 -31
- botrun_flow_lang/api/youtube_api.py +26 -26
- botrun_flow_lang/constants.py +13 -13
- botrun_flow_lang/langgraph_agents/agents/agent_runner.py +178 -178
- botrun_flow_lang/langgraph_agents/agents/agent_tools/step_planner.py +77 -77
- botrun_flow_lang/langgraph_agents/agents/checkpointer/firestore_checkpointer.py +666 -666
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/GOV_RESEARCHER_PRD.md +192 -192
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/gemini_subsidy_graph.py +460 -460
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_2_graph.py +1002 -1002
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_graph.py +822 -822
- botrun_flow_lang/langgraph_agents/agents/langgraph_react_agent.py +723 -723
- botrun_flow_lang/langgraph_agents/agents/search_agent_graph.py +864 -864
- botrun_flow_lang/langgraph_agents/agents/tools/__init__.py +4 -4
- botrun_flow_lang/langgraph_agents/agents/tools/gemini_code_execution.py +376 -376
- botrun_flow_lang/langgraph_agents/agents/util/gemini_grounding.py +66 -66
- botrun_flow_lang/langgraph_agents/agents/util/html_util.py +316 -316
- botrun_flow_lang/langgraph_agents/agents/util/img_util.py +294 -294
- botrun_flow_lang/langgraph_agents/agents/util/local_files.py +419 -419
- botrun_flow_lang/langgraph_agents/agents/util/mermaid_util.py +86 -86
- botrun_flow_lang/langgraph_agents/agents/util/model_utils.py +143 -143
- botrun_flow_lang/langgraph_agents/agents/util/pdf_analyzer.py +486 -486
- botrun_flow_lang/langgraph_agents/agents/util/pdf_cache.py +250 -250
- botrun_flow_lang/langgraph_agents/agents/util/pdf_processor.py +204 -204
- botrun_flow_lang/langgraph_agents/agents/util/perplexity_search.py +464 -464
- botrun_flow_lang/langgraph_agents/agents/util/plotly_util.py +59 -59
- botrun_flow_lang/langgraph_agents/agents/util/tavily_search.py +199 -199
- botrun_flow_lang/langgraph_agents/agents/util/youtube_util.py +90 -90
- botrun_flow_lang/langgraph_agents/cache/langgraph_botrun_cache.py +197 -197
- botrun_flow_lang/llm_agent/llm_agent.py +19 -19
- botrun_flow_lang/llm_agent/llm_agent_util.py +83 -83
- botrun_flow_lang/log/.gitignore +2 -2
- botrun_flow_lang/main.py +61 -61
- botrun_flow_lang/main_fast.py +51 -51
- botrun_flow_lang/mcp_server/__init__.py +10 -10
- botrun_flow_lang/mcp_server/default_mcp.py +744 -744
- botrun_flow_lang/models/nodes/utils.py +205 -205
- botrun_flow_lang/models/token_usage.py +34 -34
- botrun_flow_lang/requirements.txt +21 -21
- botrun_flow_lang/services/base/firestore_base.py +30 -30
- botrun_flow_lang/services/hatch/hatch_factory.py +11 -11
- botrun_flow_lang/services/hatch/hatch_fs_store.py +419 -419
- botrun_flow_lang/services/storage/storage_cs_store.py +206 -206
- botrun_flow_lang/services/storage/storage_factory.py +12 -12
- botrun_flow_lang/services/storage/storage_store.py +65 -65
- botrun_flow_lang/services/user_setting/user_setting_factory.py +9 -9
- botrun_flow_lang/services/user_setting/user_setting_fs_store.py +66 -66
- botrun_flow_lang/static/docs/tools/index.html +926 -926
- botrun_flow_lang/tests/api_functional_tests.py +1525 -1525
- botrun_flow_lang/tests/api_stress_test.py +357 -357
- botrun_flow_lang/tests/shared_hatch_tests.py +333 -333
- botrun_flow_lang/tests/test_botrun_app.py +46 -46
- botrun_flow_lang/tests/test_html_util.py +31 -31
- botrun_flow_lang/tests/test_img_analyzer.py +190 -190
- botrun_flow_lang/tests/test_img_util.py +39 -39
- botrun_flow_lang/tests/test_local_files.py +114 -114
- botrun_flow_lang/tests/test_mermaid_util.py +103 -103
- botrun_flow_lang/tests/test_pdf_analyzer.py +104 -104
- botrun_flow_lang/tests/test_plotly_util.py +151 -151
- botrun_flow_lang/tests/test_run_workflow_engine.py +65 -65
- botrun_flow_lang/tools/generate_docs.py +133 -133
- botrun_flow_lang/tools/templates/tools.html +153 -153
- botrun_flow_lang/utils/__init__.py +7 -7
- botrun_flow_lang/utils/botrun_logger.py +344 -344
- botrun_flow_lang/utils/clients/rate_limit_client.py +209 -209
- botrun_flow_lang/utils/clients/token_verify_client.py +153 -153
- botrun_flow_lang/utils/google_drive_utils.py +654 -654
- botrun_flow_lang/utils/langchain_utils.py +324 -324
- botrun_flow_lang/utils/yaml_utils.py +9 -9
- {botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-5.12.264.dist-info}/METADATA +1 -1
- botrun_flow_lang-5.12.264.dist-info/RECORD +102 -0
- botrun_flow_lang-5.12.263.dist-info/RECORD +0 -102
- {botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-5.12.264.dist-info}/WHEEL +0 -0
|
@@ -1,464 +1,464 @@
|
|
|
1
|
-
from copy import deepcopy
|
|
2
|
-
from typing import AsyncGenerator
|
|
3
|
-
from pydantic import BaseModel
|
|
4
|
-
import os
|
|
5
|
-
import json
|
|
6
|
-
import aiohttp
|
|
7
|
-
from dotenv import load_dotenv
|
|
8
|
-
import re
|
|
9
|
-
import logging
|
|
10
|
-
|
|
11
|
-
load_dotenv()
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class PerplexitySearchEvent(BaseModel):
|
|
15
|
-
chunk: str
|
|
16
|
-
raw_json: dict | None = None
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
|
20
|
-
OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def should_include_citation(citation: str, domain_filter: list[str]) -> bool:
|
|
24
|
-
# 如果沒有任何過濾規則,接受所有網站
|
|
25
|
-
if not domain_filter:
|
|
26
|
-
return True
|
|
27
|
-
|
|
28
|
-
# 分離排除規則和包含規則
|
|
29
|
-
exclude_rules = [
|
|
30
|
-
rule[1:].replace("*.", "") for rule in domain_filter if rule.startswith("-")
|
|
31
|
-
]
|
|
32
|
-
include_rules = [
|
|
33
|
-
rule.replace("*.", "") for rule in domain_filter if not rule.startswith("-")
|
|
34
|
-
]
|
|
35
|
-
|
|
36
|
-
# 檢查是否符合任何排除規則
|
|
37
|
-
for pattern in exclude_rules:
|
|
38
|
-
if pattern in citation:
|
|
39
|
-
return False
|
|
40
|
-
|
|
41
|
-
# 如果沒有包含規則,且通過了排除規則檢查,就接受該網站
|
|
42
|
-
if not include_rules:
|
|
43
|
-
return True
|
|
44
|
-
|
|
45
|
-
# 如果有包含規則,必須符合至少一個
|
|
46
|
-
for pattern in include_rules:
|
|
47
|
-
if pattern in citation:
|
|
48
|
-
return True
|
|
49
|
-
|
|
50
|
-
return False
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def is_valid_domain(domain: str) -> bool:
|
|
54
|
-
if not domain or "*." in domain:
|
|
55
|
-
return False
|
|
56
|
-
|
|
57
|
-
# 只允許包含 ://、.、% 和英數字的網址
|
|
58
|
-
# ^ 表示開頭,$ 表示結尾
|
|
59
|
-
# [a-zA-Z0-9] 表示英數字
|
|
60
|
-
# [\\.\\:\\/\\%] 表示允許的特殊字符
|
|
61
|
-
pattern = r"^[a-zA-Z0-9\\.\\:\\/\\%]+$"
|
|
62
|
-
|
|
63
|
-
return bool(re.match(pattern, domain))
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
async def respond_with_perplexity_search_openrouter(
|
|
67
|
-
input_content,
|
|
68
|
-
user_prompt_prefix,
|
|
69
|
-
messages_for_llm,
|
|
70
|
-
domain_filter: list[str],
|
|
71
|
-
stream: bool = False,
|
|
72
|
-
model: str = "perplexity/sonar-small-online",
|
|
73
|
-
structured_output: bool = False,
|
|
74
|
-
return_images: bool = False,
|
|
75
|
-
) -> AsyncGenerator[PerplexitySearchEvent, None]:
|
|
76
|
-
"""
|
|
77
|
-
使用 OpenRouter 提供的 Perplexity API 服務
|
|
78
|
-
structured_output: 只有在 stream 為 False 時有效
|
|
79
|
-
"""
|
|
80
|
-
# 確保模型是 Perplexity 的模型
|
|
81
|
-
if not model.startswith("perplexity/"):
|
|
82
|
-
model = "perplexity/sonar-small-online"
|
|
83
|
-
|
|
84
|
-
api_key = os.getenv("OPENROUTER_API_KEY")
|
|
85
|
-
if not api_key:
|
|
86
|
-
raise ValueError("OPENROUTER_API_KEY environment variable not set")
|
|
87
|
-
|
|
88
|
-
headers = {
|
|
89
|
-
"Authorization": f"Bearer {api_key}",
|
|
90
|
-
"Content-Type": "application/json",
|
|
91
|
-
"HTTP-Referer": "https://openrouter.ai/api/v1", # OpenRouter 需要提供來源
|
|
92
|
-
"X-Title": "BotRun Flow Lang", # 可選的應用名稱
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
messages = deepcopy(messages_for_llm)
|
|
96
|
-
if len(messages) > 0 and messages[-1]["role"] == "user":
|
|
97
|
-
messages.pop()
|
|
98
|
-
if user_prompt_prefix:
|
|
99
|
-
xml_input_content = f"<使用者提問>{input_content}</使用者提問>"
|
|
100
|
-
messages.append(
|
|
101
|
-
{"role": "user", "content": user_prompt_prefix + "\n\n" + xml_input_content}
|
|
102
|
-
)
|
|
103
|
-
else:
|
|
104
|
-
messages.append({"role": "user", "content": input_content})
|
|
105
|
-
|
|
106
|
-
filtered_domain_filter = []
|
|
107
|
-
for domain in domain_filter:
|
|
108
|
-
if domain and is_valid_domain(domain):
|
|
109
|
-
filtered_domain_filter.append(domain)
|
|
110
|
-
|
|
111
|
-
payload = {
|
|
112
|
-
"model": model,
|
|
113
|
-
"messages": messages,
|
|
114
|
-
"temperature": 0.5,
|
|
115
|
-
"stream": stream,
|
|
116
|
-
# OpenRouter 可能不支持 search_domain_filter 參數,如果有問題可以移除
|
|
117
|
-
"search_domain_filter": filtered_domain_filter,
|
|
118
|
-
"stream_usage": True,
|
|
119
|
-
"return_images": return_images,
|
|
120
|
-
# "reasoning_effort": "high",
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
try:
|
|
124
|
-
input_token = 0
|
|
125
|
-
output_token = 0
|
|
126
|
-
async with aiohttp.ClientSession() as session:
|
|
127
|
-
async with session.post(
|
|
128
|
-
OPENROUTER_API_URL, headers=headers, json=payload
|
|
129
|
-
) as response:
|
|
130
|
-
if response.status != 200:
|
|
131
|
-
error_text = await response.text()
|
|
132
|
-
raise ValueError(f"OpenRouter API error: {error_text}")
|
|
133
|
-
|
|
134
|
-
if not stream:
|
|
135
|
-
# 非串流模式的處理
|
|
136
|
-
response_data = await response.json()
|
|
137
|
-
content = response_data["choices"][0]["message"]["content"]
|
|
138
|
-
content = remove_citation_number_from_content(content)
|
|
139
|
-
if not structured_output:
|
|
140
|
-
yield PerplexitySearchEvent(chunk=content, raw_json=response_data)
|
|
141
|
-
|
|
142
|
-
# 處理引用 (如果 OpenRouter 返回引用)
|
|
143
|
-
citations = response_data.get("citations", [])
|
|
144
|
-
final_citations = [
|
|
145
|
-
citation
|
|
146
|
-
for citation in citations
|
|
147
|
-
if should_include_citation(citation, domain_filter)
|
|
148
|
-
]
|
|
149
|
-
images = response_data.get("images", [])
|
|
150
|
-
|
|
151
|
-
if final_citations:
|
|
152
|
-
references = f"\n\n參考來源:\n"
|
|
153
|
-
for citation in final_citations:
|
|
154
|
-
references += f"- {citation}\n"
|
|
155
|
-
if not structured_output:
|
|
156
|
-
yield PerplexitySearchEvent(chunk=references)
|
|
157
|
-
|
|
158
|
-
if structured_output:
|
|
159
|
-
yield PerplexitySearchEvent(
|
|
160
|
-
chunk=json.dumps(
|
|
161
|
-
{
|
|
162
|
-
"content": content,
|
|
163
|
-
"citations": final_citations,
|
|
164
|
-
"images": images,
|
|
165
|
-
}
|
|
166
|
-
),
|
|
167
|
-
raw_json=response_data,
|
|
168
|
-
)
|
|
169
|
-
else:
|
|
170
|
-
# 串流模式的處理
|
|
171
|
-
full_response = ""
|
|
172
|
-
final_citations = []
|
|
173
|
-
async for line in response.content:
|
|
174
|
-
if line:
|
|
175
|
-
line = line.decode("utf-8").strip()
|
|
176
|
-
if line.startswith("data: "):
|
|
177
|
-
line = line[6:] # Remove 'data: ' prefix
|
|
178
|
-
if line == "[DONE]":
|
|
179
|
-
break
|
|
180
|
-
|
|
181
|
-
try:
|
|
182
|
-
chunk_data = json.loads(line)
|
|
183
|
-
response_data = chunk_data
|
|
184
|
-
|
|
185
|
-
if (
|
|
186
|
-
chunk_data["choices"][0]
|
|
187
|
-
.get("delta", {})
|
|
188
|
-
.get("content")
|
|
189
|
-
):
|
|
190
|
-
content = chunk_data["choices"][0]["delta"][
|
|
191
|
-
"content"
|
|
192
|
-
]
|
|
193
|
-
full_response += content
|
|
194
|
-
yield PerplexitySearchEvent(
|
|
195
|
-
chunk=content,
|
|
196
|
-
raw_json=chunk_data,
|
|
197
|
-
)
|
|
198
|
-
if not final_citations and chunk_data.get(
|
|
199
|
-
"citations", []
|
|
200
|
-
):
|
|
201
|
-
citations = chunk_data.get("citations", [])
|
|
202
|
-
final_citations = [
|
|
203
|
-
citation
|
|
204
|
-
for citation in citations
|
|
205
|
-
if should_include_citation(
|
|
206
|
-
citation, domain_filter
|
|
207
|
-
)
|
|
208
|
-
]
|
|
209
|
-
|
|
210
|
-
except json.JSONDecodeError:
|
|
211
|
-
continue
|
|
212
|
-
|
|
213
|
-
# 只在有符合條件的 citations 時才產生參考文獻
|
|
214
|
-
if final_citations:
|
|
215
|
-
references = f"\n\n參考來源:\n"
|
|
216
|
-
for citation in final_citations:
|
|
217
|
-
references += f"- {citation}\n"
|
|
218
|
-
yield PerplexitySearchEvent(chunk=references)
|
|
219
|
-
|
|
220
|
-
if response_data.get("usage"):
|
|
221
|
-
logging.info(
|
|
222
|
-
f"perplexity_search_openrouter============> input_token: {response_data['usage'].get('prompt_tokens', 0) + response_data['usage'].get('citation_tokens', 0)}, output_token: {response_data['usage'].get('completion_tokens', 0)}",
|
|
223
|
-
)
|
|
224
|
-
except Exception as e:
|
|
225
|
-
import traceback
|
|
226
|
-
|
|
227
|
-
traceback.print_exc()
|
|
228
|
-
print(e)
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
async def respond_with_perplexity_search(
|
|
232
|
-
input_content,
|
|
233
|
-
user_prompt_prefix,
|
|
234
|
-
messages_for_llm,
|
|
235
|
-
domain_filter: list[str],
|
|
236
|
-
stream: bool = False,
|
|
237
|
-
model: str = "sonar-reasoning-pro",
|
|
238
|
-
structured_output: bool = False,
|
|
239
|
-
return_images: bool = False,
|
|
240
|
-
) -> AsyncGenerator[PerplexitySearchEvent, None]:
|
|
241
|
-
"""
|
|
242
|
-
structured_output: 只有在 stream 為 False 時有效
|
|
243
|
-
return_images: 是否返回圖片,但是 openrouter 不支援
|
|
244
|
-
"""
|
|
245
|
-
# 檢查是否使用 OpenRouter
|
|
246
|
-
is_use_openrouter = os.getenv("OPENROUTER_API_KEY") and os.getenv(
|
|
247
|
-
"OPENROUTER_BASE_URL"
|
|
248
|
-
)
|
|
249
|
-
if return_images:
|
|
250
|
-
# if os.getenv("PPLX_API_KEY", "") == "":
|
|
251
|
-
# raise ValueError(
|
|
252
|
-
# "PPLX_API_KEY environment variable not set, return_images needs PPLX_API_KEY"
|
|
253
|
-
# )
|
|
254
|
-
# Openrouter 尚不支援 return_images
|
|
255
|
-
is_use_openrouter = False
|
|
256
|
-
|
|
257
|
-
if is_use_openrouter:
|
|
258
|
-
# 若使用 OpenRouter,轉換模型名稱並呼叫 OpenRouter 版本的函數
|
|
259
|
-
openrouter_model = "perplexity/sonar-reasoning-pro"
|
|
260
|
-
if model == "sonar-reasoning-pro":
|
|
261
|
-
openrouter_model = "perplexity/sonar-reasoning-pro"
|
|
262
|
-
elif model == "sonar-reasoning":
|
|
263
|
-
openrouter_model = "perplexity/sonar-reasoning"
|
|
264
|
-
elif model == "sonar-pro":
|
|
265
|
-
openrouter_model = "perplexity/sonar-pro"
|
|
266
|
-
elif model == "sonar":
|
|
267
|
-
openrouter_model = "perplexity/sonar"
|
|
268
|
-
|
|
269
|
-
async for event in respond_with_perplexity_search_openrouter(
|
|
270
|
-
input_content,
|
|
271
|
-
user_prompt_prefix,
|
|
272
|
-
messages_for_llm,
|
|
273
|
-
domain_filter,
|
|
274
|
-
stream,
|
|
275
|
-
openrouter_model,
|
|
276
|
-
structured_output,
|
|
277
|
-
):
|
|
278
|
-
yield event
|
|
279
|
-
return
|
|
280
|
-
|
|
281
|
-
# 以下是原有的邏輯
|
|
282
|
-
if model not in ["sonar-reasoning-pro", "sonar-reasoning", "sonar-pro", "sonar"]:
|
|
283
|
-
model = "sonar-reasoning-pro"
|
|
284
|
-
api_key = os.getenv("PPLX_API_KEY")
|
|
285
|
-
if not api_key:
|
|
286
|
-
raise ValueError("PPLX_API_KEY environment variable not set")
|
|
287
|
-
|
|
288
|
-
headers = {
|
|
289
|
-
"Authorization": f"Bearer {api_key}",
|
|
290
|
-
"Content-Type": "application/json",
|
|
291
|
-
}
|
|
292
|
-
messages = deepcopy(messages_for_llm)
|
|
293
|
-
if len(messages) > 0 and messages[-1]["role"] == "user":
|
|
294
|
-
messages.pop()
|
|
295
|
-
if user_prompt_prefix:
|
|
296
|
-
xml_input_content = f"<使用者提問>{input_content}</使用者提問>"
|
|
297
|
-
messages.append(
|
|
298
|
-
{"role": "user", "content": user_prompt_prefix + "\n\n" + xml_input_content}
|
|
299
|
-
)
|
|
300
|
-
else:
|
|
301
|
-
messages.append({"role": "user", "content": input_content})
|
|
302
|
-
filtered_domain_filter = []
|
|
303
|
-
|
|
304
|
-
for domain in domain_filter:
|
|
305
|
-
if domain and is_valid_domain(domain):
|
|
306
|
-
filtered_domain_filter.append(domain)
|
|
307
|
-
|
|
308
|
-
payload = {
|
|
309
|
-
"model": model,
|
|
310
|
-
"messages": messages,
|
|
311
|
-
"temperature": 0.5,
|
|
312
|
-
"stream": stream,
|
|
313
|
-
"search_domain_filter": filtered_domain_filter,
|
|
314
|
-
"stream_usage": True,
|
|
315
|
-
"return_images": return_images,
|
|
316
|
-
|
|
317
|
-
}
|
|
318
|
-
try:
|
|
319
|
-
input_token = 0
|
|
320
|
-
output_token = 0
|
|
321
|
-
async with aiohttp.ClientSession() as session:
|
|
322
|
-
async with session.post(
|
|
323
|
-
PERPLEXITY_API_URL, headers=headers, json=payload
|
|
324
|
-
) as response:
|
|
325
|
-
if response.status != 200:
|
|
326
|
-
error_text = await response.text()
|
|
327
|
-
raise ValueError(f"Perplexity API error: {error_text}")
|
|
328
|
-
|
|
329
|
-
if not stream:
|
|
330
|
-
# 非串流模式的處理
|
|
331
|
-
response_data = await response.json()
|
|
332
|
-
content = response_data["choices"][0]["message"]["content"]
|
|
333
|
-
content = remove_citation_number_from_content(content)
|
|
334
|
-
if not structured_output:
|
|
335
|
-
yield PerplexitySearchEvent(chunk=content, raw_json=response_data)
|
|
336
|
-
|
|
337
|
-
# 處理引用
|
|
338
|
-
citations = response_data.get("citations", [])
|
|
339
|
-
final_citations = [
|
|
340
|
-
citation
|
|
341
|
-
for citation in citations
|
|
342
|
-
if should_include_citation(citation, domain_filter)
|
|
343
|
-
]
|
|
344
|
-
images = response_data.get("images", [])
|
|
345
|
-
|
|
346
|
-
if final_citations:
|
|
347
|
-
references = f"\n\n參考來源:\n"
|
|
348
|
-
for citation in final_citations:
|
|
349
|
-
references += f"- {citation}\n"
|
|
350
|
-
if not structured_output:
|
|
351
|
-
yield PerplexitySearchEvent(chunk=references)
|
|
352
|
-
|
|
353
|
-
if structured_output:
|
|
354
|
-
yield PerplexitySearchEvent(
|
|
355
|
-
chunk=json.dumps(
|
|
356
|
-
{
|
|
357
|
-
"content": content,
|
|
358
|
-
"citations": final_citations,
|
|
359
|
-
"images": images,
|
|
360
|
-
}
|
|
361
|
-
),
|
|
362
|
-
raw_json=response_data,
|
|
363
|
-
)
|
|
364
|
-
|
|
365
|
-
# 串流模式的處理
|
|
366
|
-
full_response = ""
|
|
367
|
-
final_citations = []
|
|
368
|
-
async for line in response.content:
|
|
369
|
-
if line:
|
|
370
|
-
line = line.decode("utf-8").strip()
|
|
371
|
-
if line.startswith("data: "):
|
|
372
|
-
line = line[6:] # Remove 'data: ' prefix
|
|
373
|
-
if line == "[DONE]":
|
|
374
|
-
break
|
|
375
|
-
|
|
376
|
-
try:
|
|
377
|
-
chunk_data = json.loads(line)
|
|
378
|
-
response_data = chunk_data
|
|
379
|
-
# print(chunk_data)
|
|
380
|
-
if (
|
|
381
|
-
chunk_data["choices"][0]
|
|
382
|
-
.get("delta", {})
|
|
383
|
-
.get("content")
|
|
384
|
-
):
|
|
385
|
-
content = chunk_data["choices"][0]["delta"][
|
|
386
|
-
"content"
|
|
387
|
-
]
|
|
388
|
-
full_response += content
|
|
389
|
-
yield PerplexitySearchEvent(
|
|
390
|
-
chunk=content,
|
|
391
|
-
raw_json=chunk_data,
|
|
392
|
-
)
|
|
393
|
-
if not final_citations and chunk_data.get(
|
|
394
|
-
"citations", []
|
|
395
|
-
):
|
|
396
|
-
# 發現 perplexity 不會都有 finish_reason 為 stop 的狀況,但是 citations 會有
|
|
397
|
-
# if chunk_data["choices"][0]["finish_reason"] == "stop":
|
|
398
|
-
citations = chunk_data.get("citations", [])
|
|
399
|
-
final_citations = [
|
|
400
|
-
citation
|
|
401
|
-
for citation in citations
|
|
402
|
-
if should_include_citation(
|
|
403
|
-
citation, domain_filter
|
|
404
|
-
)
|
|
405
|
-
]
|
|
406
|
-
|
|
407
|
-
except json.JSONDecodeError:
|
|
408
|
-
continue
|
|
409
|
-
|
|
410
|
-
# 只在有符合條件的 citations 時才產生參考文獻
|
|
411
|
-
if final_citations:
|
|
412
|
-
references = f"\n\n參考來源:\n"
|
|
413
|
-
for citation in final_citations:
|
|
414
|
-
references += f"- {citation}\n"
|
|
415
|
-
yield PerplexitySearchEvent(chunk=references)
|
|
416
|
-
# 安全地存取 usage 資訊,避免鍵不存在的錯誤
|
|
417
|
-
if response_data and "usage" in response_data:
|
|
418
|
-
usage = response_data["usage"]
|
|
419
|
-
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
420
|
-
citation_tokens = usage.get("citation_tokens", 0)
|
|
421
|
-
completion_tokens = usage.get("completion_tokens", 0)
|
|
422
|
-
logging.info(
|
|
423
|
-
f"perplexity_search============> input_token: {prompt_tokens + citation_tokens}, output_token: {completion_tokens}",
|
|
424
|
-
)
|
|
425
|
-
else:
|
|
426
|
-
logging.info("perplexity_search============> usage information not available")
|
|
427
|
-
except Exception as e:
|
|
428
|
-
import traceback
|
|
429
|
-
|
|
430
|
-
traceback.print_exc()
|
|
431
|
-
print(e)
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
def remove_citation_number_from_content(content: str) -> str:
|
|
435
|
-
"""
|
|
436
|
-
移除文字裡的 [1]、[2]、[3] 等數字
|
|
437
|
-
"""
|
|
438
|
-
return re.sub(r"\[[0-9]+\]", "", content)
|
|
439
|
-
# answer_message = await cl.Message(content="").send()
|
|
440
|
-
# full_response = ""
|
|
441
|
-
# for response in responses:
|
|
442
|
-
# if response.candidates[0].finish_reason != Candidate.FinishReason.STOP:
|
|
443
|
-
# # await answer_message.stream_token(response.text)
|
|
444
|
-
# yield GeminiGroundingEvent(chunk=response.text)
|
|
445
|
-
# full_response += response.text
|
|
446
|
-
# if response.candidates[0].grounding_metadata:
|
|
447
|
-
# if len(response.candidates[0].grounding_metadata.grounding_chunks) > 0:
|
|
448
|
-
# references = f"\n\n{tr('Sources:')}\n"
|
|
449
|
-
# for grounding_chunk in response.candidates[
|
|
450
|
-
# 0
|
|
451
|
-
# ].grounding_metadata.grounding_chunks:
|
|
452
|
-
# references += f"- [{grounding_chunk.web.title}]({grounding_chunk.web.uri})\n"
|
|
453
|
-
# # await answer_message.stream_token(references)
|
|
454
|
-
# yield GeminiGroundingEvent(chunk=references)
|
|
455
|
-
# else:
|
|
456
|
-
# if response.candidates[0].grounding_metadata:
|
|
457
|
-
# if len(response.candidates[0].grounding_metadata.grounding_chunks) > 0:
|
|
458
|
-
# references = f"\n\n{tr('Sources:')}\n"
|
|
459
|
-
# for grounding_chunk in response.candidates[
|
|
460
|
-
# 0
|
|
461
|
-
# ].grounding_metadata.grounding_chunks:
|
|
462
|
-
# references += f"- [{grounding_chunk.web.title}]({grounding_chunk.web.uri})\n"
|
|
463
|
-
# # await answer_message.stream_token(references)
|
|
464
|
-
# yield GeminiGroundingEvent(chunk=references)
|
|
1
|
+
from copy import deepcopy
|
|
2
|
+
from typing import AsyncGenerator
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
import os
|
|
5
|
+
import json
|
|
6
|
+
import aiohttp
|
|
7
|
+
from dotenv import load_dotenv
|
|
8
|
+
import re
|
|
9
|
+
import logging
|
|
10
|
+
|
|
11
|
+
load_dotenv()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PerplexitySearchEvent(BaseModel):
|
|
15
|
+
chunk: str
|
|
16
|
+
raw_json: dict | None = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
|
20
|
+
OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def should_include_citation(citation: str, domain_filter: list[str]) -> bool:
|
|
24
|
+
# 如果沒有任何過濾規則,接受所有網站
|
|
25
|
+
if not domain_filter:
|
|
26
|
+
return True
|
|
27
|
+
|
|
28
|
+
# 分離排除規則和包含規則
|
|
29
|
+
exclude_rules = [
|
|
30
|
+
rule[1:].replace("*.", "") for rule in domain_filter if rule.startswith("-")
|
|
31
|
+
]
|
|
32
|
+
include_rules = [
|
|
33
|
+
rule.replace("*.", "") for rule in domain_filter if not rule.startswith("-")
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
# 檢查是否符合任何排除規則
|
|
37
|
+
for pattern in exclude_rules:
|
|
38
|
+
if pattern in citation:
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
# 如果沒有包含規則,且通過了排除規則檢查,就接受該網站
|
|
42
|
+
if not include_rules:
|
|
43
|
+
return True
|
|
44
|
+
|
|
45
|
+
# 如果有包含規則,必須符合至少一個
|
|
46
|
+
for pattern in include_rules:
|
|
47
|
+
if pattern in citation:
|
|
48
|
+
return True
|
|
49
|
+
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def is_valid_domain(domain: str) -> bool:
|
|
54
|
+
if not domain or "*." in domain:
|
|
55
|
+
return False
|
|
56
|
+
|
|
57
|
+
# 只允許包含 ://、.、% 和英數字的網址
|
|
58
|
+
# ^ 表示開頭,$ 表示結尾
|
|
59
|
+
# [a-zA-Z0-9] 表示英數字
|
|
60
|
+
# [\\.\\:\\/\\%] 表示允許的特殊字符
|
|
61
|
+
pattern = r"^[a-zA-Z0-9\\.\\:\\/\\%]+$"
|
|
62
|
+
|
|
63
|
+
return bool(re.match(pattern, domain))
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
async def respond_with_perplexity_search_openrouter(
|
|
67
|
+
input_content,
|
|
68
|
+
user_prompt_prefix,
|
|
69
|
+
messages_for_llm,
|
|
70
|
+
domain_filter: list[str],
|
|
71
|
+
stream: bool = False,
|
|
72
|
+
model: str = "perplexity/sonar-small-online",
|
|
73
|
+
structured_output: bool = False,
|
|
74
|
+
return_images: bool = False,
|
|
75
|
+
) -> AsyncGenerator[PerplexitySearchEvent, None]:
|
|
76
|
+
"""
|
|
77
|
+
使用 OpenRouter 提供的 Perplexity API 服務
|
|
78
|
+
structured_output: 只有在 stream 為 False 時有效
|
|
79
|
+
"""
|
|
80
|
+
# 確保模型是 Perplexity 的模型
|
|
81
|
+
if not model.startswith("perplexity/"):
|
|
82
|
+
model = "perplexity/sonar-small-online"
|
|
83
|
+
|
|
84
|
+
api_key = os.getenv("OPENROUTER_API_KEY")
|
|
85
|
+
if not api_key:
|
|
86
|
+
raise ValueError("OPENROUTER_API_KEY environment variable not set")
|
|
87
|
+
|
|
88
|
+
headers = {
|
|
89
|
+
"Authorization": f"Bearer {api_key}",
|
|
90
|
+
"Content-Type": "application/json",
|
|
91
|
+
"HTTP-Referer": "https://openrouter.ai/api/v1", # OpenRouter 需要提供來源
|
|
92
|
+
"X-Title": "BotRun Flow Lang", # 可選的應用名稱
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
messages = deepcopy(messages_for_llm)
|
|
96
|
+
if len(messages) > 0 and messages[-1]["role"] == "user":
|
|
97
|
+
messages.pop()
|
|
98
|
+
if user_prompt_prefix:
|
|
99
|
+
xml_input_content = f"<使用者提問>{input_content}</使用者提問>"
|
|
100
|
+
messages.append(
|
|
101
|
+
{"role": "user", "content": user_prompt_prefix + "\n\n" + xml_input_content}
|
|
102
|
+
)
|
|
103
|
+
else:
|
|
104
|
+
messages.append({"role": "user", "content": input_content})
|
|
105
|
+
|
|
106
|
+
filtered_domain_filter = []
|
|
107
|
+
for domain in domain_filter:
|
|
108
|
+
if domain and is_valid_domain(domain):
|
|
109
|
+
filtered_domain_filter.append(domain)
|
|
110
|
+
|
|
111
|
+
payload = {
|
|
112
|
+
"model": model,
|
|
113
|
+
"messages": messages,
|
|
114
|
+
"temperature": 0.5,
|
|
115
|
+
"stream": stream,
|
|
116
|
+
# OpenRouter 可能不支持 search_domain_filter 參數,如果有問題可以移除
|
|
117
|
+
"search_domain_filter": filtered_domain_filter,
|
|
118
|
+
"stream_usage": True,
|
|
119
|
+
"return_images": return_images,
|
|
120
|
+
# "reasoning_effort": "high",
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
input_token = 0
|
|
125
|
+
output_token = 0
|
|
126
|
+
async with aiohttp.ClientSession() as session:
|
|
127
|
+
async with session.post(
|
|
128
|
+
OPENROUTER_API_URL, headers=headers, json=payload
|
|
129
|
+
) as response:
|
|
130
|
+
if response.status != 200:
|
|
131
|
+
error_text = await response.text()
|
|
132
|
+
raise ValueError(f"OpenRouter API error: {error_text}")
|
|
133
|
+
|
|
134
|
+
if not stream:
|
|
135
|
+
# 非串流模式的處理
|
|
136
|
+
response_data = await response.json()
|
|
137
|
+
content = response_data["choices"][0]["message"]["content"]
|
|
138
|
+
content = remove_citation_number_from_content(content)
|
|
139
|
+
if not structured_output:
|
|
140
|
+
yield PerplexitySearchEvent(chunk=content, raw_json=response_data)
|
|
141
|
+
|
|
142
|
+
# 處理引用 (如果 OpenRouter 返回引用)
|
|
143
|
+
citations = response_data.get("citations", [])
|
|
144
|
+
final_citations = [
|
|
145
|
+
citation
|
|
146
|
+
for citation in citations
|
|
147
|
+
if should_include_citation(citation, domain_filter)
|
|
148
|
+
]
|
|
149
|
+
images = response_data.get("images", [])
|
|
150
|
+
|
|
151
|
+
if final_citations:
|
|
152
|
+
references = f"\n\n參考來源:\n"
|
|
153
|
+
for citation in final_citations:
|
|
154
|
+
references += f"- {citation}\n"
|
|
155
|
+
if not structured_output:
|
|
156
|
+
yield PerplexitySearchEvent(chunk=references)
|
|
157
|
+
|
|
158
|
+
if structured_output:
|
|
159
|
+
yield PerplexitySearchEvent(
|
|
160
|
+
chunk=json.dumps(
|
|
161
|
+
{
|
|
162
|
+
"content": content,
|
|
163
|
+
"citations": final_citations,
|
|
164
|
+
"images": images,
|
|
165
|
+
}
|
|
166
|
+
),
|
|
167
|
+
raw_json=response_data,
|
|
168
|
+
)
|
|
169
|
+
else:
|
|
170
|
+
# 串流模式的處理
|
|
171
|
+
full_response = ""
|
|
172
|
+
final_citations = []
|
|
173
|
+
async for line in response.content:
|
|
174
|
+
if line:
|
|
175
|
+
line = line.decode("utf-8").strip()
|
|
176
|
+
if line.startswith("data: "):
|
|
177
|
+
line = line[6:] # Remove 'data: ' prefix
|
|
178
|
+
if line == "[DONE]":
|
|
179
|
+
break
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
chunk_data = json.loads(line)
|
|
183
|
+
response_data = chunk_data
|
|
184
|
+
|
|
185
|
+
if (
|
|
186
|
+
chunk_data["choices"][0]
|
|
187
|
+
.get("delta", {})
|
|
188
|
+
.get("content")
|
|
189
|
+
):
|
|
190
|
+
content = chunk_data["choices"][0]["delta"][
|
|
191
|
+
"content"
|
|
192
|
+
]
|
|
193
|
+
full_response += content
|
|
194
|
+
yield PerplexitySearchEvent(
|
|
195
|
+
chunk=content,
|
|
196
|
+
raw_json=chunk_data,
|
|
197
|
+
)
|
|
198
|
+
if not final_citations and chunk_data.get(
|
|
199
|
+
"citations", []
|
|
200
|
+
):
|
|
201
|
+
citations = chunk_data.get("citations", [])
|
|
202
|
+
final_citations = [
|
|
203
|
+
citation
|
|
204
|
+
for citation in citations
|
|
205
|
+
if should_include_citation(
|
|
206
|
+
citation, domain_filter
|
|
207
|
+
)
|
|
208
|
+
]
|
|
209
|
+
|
|
210
|
+
except json.JSONDecodeError:
|
|
211
|
+
continue
|
|
212
|
+
|
|
213
|
+
# 只在有符合條件的 citations 時才產生參考文獻
|
|
214
|
+
if final_citations:
|
|
215
|
+
references = f"\n\n參考來源:\n"
|
|
216
|
+
for citation in final_citations:
|
|
217
|
+
references += f"- {citation}\n"
|
|
218
|
+
yield PerplexitySearchEvent(chunk=references)
|
|
219
|
+
|
|
220
|
+
if response_data.get("usage"):
|
|
221
|
+
logging.info(
|
|
222
|
+
f"perplexity_search_openrouter============> input_token: {response_data['usage'].get('prompt_tokens', 0) + response_data['usage'].get('citation_tokens', 0)}, output_token: {response_data['usage'].get('completion_tokens', 0)}",
|
|
223
|
+
)
|
|
224
|
+
except Exception as e:
|
|
225
|
+
import traceback
|
|
226
|
+
|
|
227
|
+
traceback.print_exc()
|
|
228
|
+
print(e)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
async def respond_with_perplexity_search(
|
|
232
|
+
input_content,
|
|
233
|
+
user_prompt_prefix,
|
|
234
|
+
messages_for_llm,
|
|
235
|
+
domain_filter: list[str],
|
|
236
|
+
stream: bool = False,
|
|
237
|
+
model: str = "sonar-reasoning-pro",
|
|
238
|
+
structured_output: bool = False,
|
|
239
|
+
return_images: bool = False,
|
|
240
|
+
) -> AsyncGenerator[PerplexitySearchEvent, None]:
|
|
241
|
+
"""
|
|
242
|
+
structured_output: 只有在 stream 為 False 時有效
|
|
243
|
+
return_images: 是否返回圖片,但是 openrouter 不支援
|
|
244
|
+
"""
|
|
245
|
+
# 檢查是否使用 OpenRouter
|
|
246
|
+
is_use_openrouter = os.getenv("OPENROUTER_API_KEY") and os.getenv(
|
|
247
|
+
"OPENROUTER_BASE_URL"
|
|
248
|
+
)
|
|
249
|
+
if return_images:
|
|
250
|
+
# if os.getenv("PPLX_API_KEY", "") == "":
|
|
251
|
+
# raise ValueError(
|
|
252
|
+
# "PPLX_API_KEY environment variable not set, return_images needs PPLX_API_KEY"
|
|
253
|
+
# )
|
|
254
|
+
# Openrouter 尚不支援 return_images
|
|
255
|
+
is_use_openrouter = False
|
|
256
|
+
|
|
257
|
+
if is_use_openrouter:
|
|
258
|
+
# 若使用 OpenRouter,轉換模型名稱並呼叫 OpenRouter 版本的函數
|
|
259
|
+
openrouter_model = "perplexity/sonar-reasoning-pro"
|
|
260
|
+
if model == "sonar-reasoning-pro":
|
|
261
|
+
openrouter_model = "perplexity/sonar-reasoning-pro"
|
|
262
|
+
elif model == "sonar-reasoning":
|
|
263
|
+
openrouter_model = "perplexity/sonar-reasoning"
|
|
264
|
+
elif model == "sonar-pro":
|
|
265
|
+
openrouter_model = "perplexity/sonar-pro"
|
|
266
|
+
elif model == "sonar":
|
|
267
|
+
openrouter_model = "perplexity/sonar"
|
|
268
|
+
|
|
269
|
+
async for event in respond_with_perplexity_search_openrouter(
|
|
270
|
+
input_content,
|
|
271
|
+
user_prompt_prefix,
|
|
272
|
+
messages_for_llm,
|
|
273
|
+
domain_filter,
|
|
274
|
+
stream,
|
|
275
|
+
openrouter_model,
|
|
276
|
+
structured_output,
|
|
277
|
+
):
|
|
278
|
+
yield event
|
|
279
|
+
return
|
|
280
|
+
|
|
281
|
+
# 以下是原有的邏輯
|
|
282
|
+
if model not in ["sonar-reasoning-pro", "sonar-reasoning", "sonar-pro", "sonar"]:
|
|
283
|
+
model = "sonar-reasoning-pro"
|
|
284
|
+
api_key = os.getenv("PPLX_API_KEY")
|
|
285
|
+
if not api_key:
|
|
286
|
+
raise ValueError("PPLX_API_KEY environment variable not set")
|
|
287
|
+
|
|
288
|
+
headers = {
|
|
289
|
+
"Authorization": f"Bearer {api_key}",
|
|
290
|
+
"Content-Type": "application/json",
|
|
291
|
+
}
|
|
292
|
+
messages = deepcopy(messages_for_llm)
|
|
293
|
+
if len(messages) > 0 and messages[-1]["role"] == "user":
|
|
294
|
+
messages.pop()
|
|
295
|
+
if user_prompt_prefix:
|
|
296
|
+
xml_input_content = f"<使用者提問>{input_content}</使用者提問>"
|
|
297
|
+
messages.append(
|
|
298
|
+
{"role": "user", "content": user_prompt_prefix + "\n\n" + xml_input_content}
|
|
299
|
+
)
|
|
300
|
+
else:
|
|
301
|
+
messages.append({"role": "user", "content": input_content})
|
|
302
|
+
filtered_domain_filter = []
|
|
303
|
+
|
|
304
|
+
for domain in domain_filter:
|
|
305
|
+
if domain and is_valid_domain(domain):
|
|
306
|
+
filtered_domain_filter.append(domain)
|
|
307
|
+
|
|
308
|
+
payload = {
|
|
309
|
+
"model": model,
|
|
310
|
+
"messages": messages,
|
|
311
|
+
"temperature": 0.5,
|
|
312
|
+
"stream": stream,
|
|
313
|
+
"search_domain_filter": filtered_domain_filter,
|
|
314
|
+
"stream_usage": True,
|
|
315
|
+
"return_images": return_images,
|
|
316
|
+
|
|
317
|
+
}
|
|
318
|
+
try:
|
|
319
|
+
input_token = 0
|
|
320
|
+
output_token = 0
|
|
321
|
+
async with aiohttp.ClientSession() as session:
|
|
322
|
+
async with session.post(
|
|
323
|
+
PERPLEXITY_API_URL, headers=headers, json=payload
|
|
324
|
+
) as response:
|
|
325
|
+
if response.status != 200:
|
|
326
|
+
error_text = await response.text()
|
|
327
|
+
raise ValueError(f"Perplexity API error: {error_text}")
|
|
328
|
+
|
|
329
|
+
if not stream:
|
|
330
|
+
# 非串流模式的處理
|
|
331
|
+
response_data = await response.json()
|
|
332
|
+
content = response_data["choices"][0]["message"]["content"]
|
|
333
|
+
content = remove_citation_number_from_content(content)
|
|
334
|
+
if not structured_output:
|
|
335
|
+
yield PerplexitySearchEvent(chunk=content, raw_json=response_data)
|
|
336
|
+
|
|
337
|
+
# 處理引用
|
|
338
|
+
citations = response_data.get("citations", [])
|
|
339
|
+
final_citations = [
|
|
340
|
+
citation
|
|
341
|
+
for citation in citations
|
|
342
|
+
if should_include_citation(citation, domain_filter)
|
|
343
|
+
]
|
|
344
|
+
images = response_data.get("images", [])
|
|
345
|
+
|
|
346
|
+
if final_citations:
|
|
347
|
+
references = f"\n\n參考來源:\n"
|
|
348
|
+
for citation in final_citations:
|
|
349
|
+
references += f"- {citation}\n"
|
|
350
|
+
if not structured_output:
|
|
351
|
+
yield PerplexitySearchEvent(chunk=references)
|
|
352
|
+
|
|
353
|
+
if structured_output:
|
|
354
|
+
yield PerplexitySearchEvent(
|
|
355
|
+
chunk=json.dumps(
|
|
356
|
+
{
|
|
357
|
+
"content": content,
|
|
358
|
+
"citations": final_citations,
|
|
359
|
+
"images": images,
|
|
360
|
+
}
|
|
361
|
+
),
|
|
362
|
+
raw_json=response_data,
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
# 串流模式的處理
|
|
366
|
+
full_response = ""
|
|
367
|
+
final_citations = []
|
|
368
|
+
async for line in response.content:
|
|
369
|
+
if line:
|
|
370
|
+
line = line.decode("utf-8").strip()
|
|
371
|
+
if line.startswith("data: "):
|
|
372
|
+
line = line[6:] # Remove 'data: ' prefix
|
|
373
|
+
if line == "[DONE]":
|
|
374
|
+
break
|
|
375
|
+
|
|
376
|
+
try:
|
|
377
|
+
chunk_data = json.loads(line)
|
|
378
|
+
response_data = chunk_data
|
|
379
|
+
# print(chunk_data)
|
|
380
|
+
if (
|
|
381
|
+
chunk_data["choices"][0]
|
|
382
|
+
.get("delta", {})
|
|
383
|
+
.get("content")
|
|
384
|
+
):
|
|
385
|
+
content = chunk_data["choices"][0]["delta"][
|
|
386
|
+
"content"
|
|
387
|
+
]
|
|
388
|
+
full_response += content
|
|
389
|
+
yield PerplexitySearchEvent(
|
|
390
|
+
chunk=content,
|
|
391
|
+
raw_json=chunk_data,
|
|
392
|
+
)
|
|
393
|
+
if not final_citations and chunk_data.get(
|
|
394
|
+
"citations", []
|
|
395
|
+
):
|
|
396
|
+
# 發現 perplexity 不會都有 finish_reason 為 stop 的狀況,但是 citations 會有
|
|
397
|
+
# if chunk_data["choices"][0]["finish_reason"] == "stop":
|
|
398
|
+
citations = chunk_data.get("citations", [])
|
|
399
|
+
final_citations = [
|
|
400
|
+
citation
|
|
401
|
+
for citation in citations
|
|
402
|
+
if should_include_citation(
|
|
403
|
+
citation, domain_filter
|
|
404
|
+
)
|
|
405
|
+
]
|
|
406
|
+
|
|
407
|
+
except json.JSONDecodeError:
|
|
408
|
+
continue
|
|
409
|
+
|
|
410
|
+
# 只在有符合條件的 citations 時才產生參考文獻
|
|
411
|
+
if final_citations:
|
|
412
|
+
references = f"\n\n參考來源:\n"
|
|
413
|
+
for citation in final_citations:
|
|
414
|
+
references += f"- {citation}\n"
|
|
415
|
+
yield PerplexitySearchEvent(chunk=references)
|
|
416
|
+
# 安全地存取 usage 資訊,避免鍵不存在的錯誤
|
|
417
|
+
if response_data and "usage" in response_data:
|
|
418
|
+
usage = response_data["usage"]
|
|
419
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
420
|
+
citation_tokens = usage.get("citation_tokens", 0)
|
|
421
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
|
422
|
+
logging.info(
|
|
423
|
+
f"perplexity_search============> input_token: {prompt_tokens + citation_tokens}, output_token: {completion_tokens}",
|
|
424
|
+
)
|
|
425
|
+
else:
|
|
426
|
+
logging.info("perplexity_search============> usage information not available")
|
|
427
|
+
except Exception as e:
|
|
428
|
+
import traceback
|
|
429
|
+
|
|
430
|
+
traceback.print_exc()
|
|
431
|
+
print(e)
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def remove_citation_number_from_content(content: str) -> str:
|
|
435
|
+
"""
|
|
436
|
+
移除文字裡的 [1]、[2]、[3] 等數字
|
|
437
|
+
"""
|
|
438
|
+
return re.sub(r"\[[0-9]+\]", "", content)
|
|
439
|
+
# answer_message = await cl.Message(content="").send()
|
|
440
|
+
# full_response = ""
|
|
441
|
+
# for response in responses:
|
|
442
|
+
# if response.candidates[0].finish_reason != Candidate.FinishReason.STOP:
|
|
443
|
+
# # await answer_message.stream_token(response.text)
|
|
444
|
+
# yield GeminiGroundingEvent(chunk=response.text)
|
|
445
|
+
# full_response += response.text
|
|
446
|
+
# if response.candidates[0].grounding_metadata:
|
|
447
|
+
# if len(response.candidates[0].grounding_metadata.grounding_chunks) > 0:
|
|
448
|
+
# references = f"\n\n{tr('Sources:')}\n"
|
|
449
|
+
# for grounding_chunk in response.candidates[
|
|
450
|
+
# 0
|
|
451
|
+
# ].grounding_metadata.grounding_chunks:
|
|
452
|
+
# references += f"- [{grounding_chunk.web.title}]({grounding_chunk.web.uri})\n"
|
|
453
|
+
# # await answer_message.stream_token(references)
|
|
454
|
+
# yield GeminiGroundingEvent(chunk=references)
|
|
455
|
+
# else:
|
|
456
|
+
# if response.candidates[0].grounding_metadata:
|
|
457
|
+
# if len(response.candidates[0].grounding_metadata.grounding_chunks) > 0:
|
|
458
|
+
# references = f"\n\n{tr('Sources:')}\n"
|
|
459
|
+
# for grounding_chunk in response.candidates[
|
|
460
|
+
# 0
|
|
461
|
+
# ].grounding_metadata.grounding_chunks:
|
|
462
|
+
# references += f"- [{grounding_chunk.web.title}]({grounding_chunk.web.uri})\n"
|
|
463
|
+
# # await answer_message.stream_token(references)
|
|
464
|
+
# yield GeminiGroundingEvent(chunk=references)
|