botrun-flow-lang 5.12.263__py3-none-any.whl → 5.12.264__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. botrun_flow_lang/api/auth_api.py +39 -39
  2. botrun_flow_lang/api/auth_utils.py +183 -183
  3. botrun_flow_lang/api/botrun_back_api.py +65 -65
  4. botrun_flow_lang/api/flow_api.py +3 -3
  5. botrun_flow_lang/api/hatch_api.py +508 -508
  6. botrun_flow_lang/api/langgraph_api.py +811 -811
  7. botrun_flow_lang/api/line_bot_api.py +1484 -1484
  8. botrun_flow_lang/api/model_api.py +300 -300
  9. botrun_flow_lang/api/rate_limit_api.py +32 -32
  10. botrun_flow_lang/api/routes.py +79 -79
  11. botrun_flow_lang/api/search_api.py +53 -53
  12. botrun_flow_lang/api/storage_api.py +395 -395
  13. botrun_flow_lang/api/subsidy_api.py +290 -290
  14. botrun_flow_lang/api/subsidy_api_system_prompt.txt +109 -109
  15. botrun_flow_lang/api/user_setting_api.py +70 -70
  16. botrun_flow_lang/api/version_api.py +31 -31
  17. botrun_flow_lang/api/youtube_api.py +26 -26
  18. botrun_flow_lang/constants.py +13 -13
  19. botrun_flow_lang/langgraph_agents/agents/agent_runner.py +178 -178
  20. botrun_flow_lang/langgraph_agents/agents/agent_tools/step_planner.py +77 -77
  21. botrun_flow_lang/langgraph_agents/agents/checkpointer/firestore_checkpointer.py +666 -666
  22. botrun_flow_lang/langgraph_agents/agents/gov_researcher/GOV_RESEARCHER_PRD.md +192 -192
  23. botrun_flow_lang/langgraph_agents/agents/gov_researcher/gemini_subsidy_graph.py +460 -460
  24. botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_2_graph.py +1002 -1002
  25. botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_graph.py +822 -822
  26. botrun_flow_lang/langgraph_agents/agents/langgraph_react_agent.py +723 -723
  27. botrun_flow_lang/langgraph_agents/agents/search_agent_graph.py +864 -864
  28. botrun_flow_lang/langgraph_agents/agents/tools/__init__.py +4 -4
  29. botrun_flow_lang/langgraph_agents/agents/tools/gemini_code_execution.py +376 -376
  30. botrun_flow_lang/langgraph_agents/agents/util/gemini_grounding.py +66 -66
  31. botrun_flow_lang/langgraph_agents/agents/util/html_util.py +316 -316
  32. botrun_flow_lang/langgraph_agents/agents/util/img_util.py +294 -294
  33. botrun_flow_lang/langgraph_agents/agents/util/local_files.py +419 -419
  34. botrun_flow_lang/langgraph_agents/agents/util/mermaid_util.py +86 -86
  35. botrun_flow_lang/langgraph_agents/agents/util/model_utils.py +143 -143
  36. botrun_flow_lang/langgraph_agents/agents/util/pdf_analyzer.py +486 -486
  37. botrun_flow_lang/langgraph_agents/agents/util/pdf_cache.py +250 -250
  38. botrun_flow_lang/langgraph_agents/agents/util/pdf_processor.py +204 -204
  39. botrun_flow_lang/langgraph_agents/agents/util/perplexity_search.py +464 -464
  40. botrun_flow_lang/langgraph_agents/agents/util/plotly_util.py +59 -59
  41. botrun_flow_lang/langgraph_agents/agents/util/tavily_search.py +199 -199
  42. botrun_flow_lang/langgraph_agents/agents/util/youtube_util.py +90 -90
  43. botrun_flow_lang/langgraph_agents/cache/langgraph_botrun_cache.py +197 -197
  44. botrun_flow_lang/llm_agent/llm_agent.py +19 -19
  45. botrun_flow_lang/llm_agent/llm_agent_util.py +83 -83
  46. botrun_flow_lang/log/.gitignore +2 -2
  47. botrun_flow_lang/main.py +61 -61
  48. botrun_flow_lang/main_fast.py +51 -51
  49. botrun_flow_lang/mcp_server/__init__.py +10 -10
  50. botrun_flow_lang/mcp_server/default_mcp.py +744 -744
  51. botrun_flow_lang/models/nodes/utils.py +205 -205
  52. botrun_flow_lang/models/token_usage.py +34 -34
  53. botrun_flow_lang/requirements.txt +21 -21
  54. botrun_flow_lang/services/base/firestore_base.py +30 -30
  55. botrun_flow_lang/services/hatch/hatch_factory.py +11 -11
  56. botrun_flow_lang/services/hatch/hatch_fs_store.py +419 -419
  57. botrun_flow_lang/services/storage/storage_cs_store.py +206 -206
  58. botrun_flow_lang/services/storage/storage_factory.py +12 -12
  59. botrun_flow_lang/services/storage/storage_store.py +65 -65
  60. botrun_flow_lang/services/user_setting/user_setting_factory.py +9 -9
  61. botrun_flow_lang/services/user_setting/user_setting_fs_store.py +66 -66
  62. botrun_flow_lang/static/docs/tools/index.html +926 -926
  63. botrun_flow_lang/tests/api_functional_tests.py +1525 -1525
  64. botrun_flow_lang/tests/api_stress_test.py +357 -357
  65. botrun_flow_lang/tests/shared_hatch_tests.py +333 -333
  66. botrun_flow_lang/tests/test_botrun_app.py +46 -46
  67. botrun_flow_lang/tests/test_html_util.py +31 -31
  68. botrun_flow_lang/tests/test_img_analyzer.py +190 -190
  69. botrun_flow_lang/tests/test_img_util.py +39 -39
  70. botrun_flow_lang/tests/test_local_files.py +114 -114
  71. botrun_flow_lang/tests/test_mermaid_util.py +103 -103
  72. botrun_flow_lang/tests/test_pdf_analyzer.py +104 -104
  73. botrun_flow_lang/tests/test_plotly_util.py +151 -151
  74. botrun_flow_lang/tests/test_run_workflow_engine.py +65 -65
  75. botrun_flow_lang/tools/generate_docs.py +133 -133
  76. botrun_flow_lang/tools/templates/tools.html +153 -153
  77. botrun_flow_lang/utils/__init__.py +7 -7
  78. botrun_flow_lang/utils/botrun_logger.py +344 -344
  79. botrun_flow_lang/utils/clients/rate_limit_client.py +209 -209
  80. botrun_flow_lang/utils/clients/token_verify_client.py +153 -153
  81. botrun_flow_lang/utils/google_drive_utils.py +654 -654
  82. botrun_flow_lang/utils/langchain_utils.py +324 -324
  83. botrun_flow_lang/utils/yaml_utils.py +9 -9
  84. {botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-5.12.264.dist-info}/METADATA +1 -1
  85. botrun_flow_lang-5.12.264.dist-info/RECORD +102 -0
  86. botrun_flow_lang-5.12.263.dist-info/RECORD +0 -102
  87. {botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-5.12.264.dist-info}/WHEEL +0 -0
@@ -1,464 +1,464 @@
1
- from copy import deepcopy
2
- from typing import AsyncGenerator
3
- from pydantic import BaseModel
4
- import os
5
- import json
6
- import aiohttp
7
- from dotenv import load_dotenv
8
- import re
9
- import logging
10
-
11
- load_dotenv()
12
-
13
-
14
- class PerplexitySearchEvent(BaseModel):
15
- chunk: str
16
- raw_json: dict | None = None
17
-
18
-
19
- PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
20
- OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
21
-
22
-
23
- def should_include_citation(citation: str, domain_filter: list[str]) -> bool:
24
- # 如果沒有任何過濾規則,接受所有網站
25
- if not domain_filter:
26
- return True
27
-
28
- # 分離排除規則和包含規則
29
- exclude_rules = [
30
- rule[1:].replace("*.", "") for rule in domain_filter if rule.startswith("-")
31
- ]
32
- include_rules = [
33
- rule.replace("*.", "") for rule in domain_filter if not rule.startswith("-")
34
- ]
35
-
36
- # 檢查是否符合任何排除規則
37
- for pattern in exclude_rules:
38
- if pattern in citation:
39
- return False
40
-
41
- # 如果沒有包含規則,且通過了排除規則檢查,就接受該網站
42
- if not include_rules:
43
- return True
44
-
45
- # 如果有包含規則,必須符合至少一個
46
- for pattern in include_rules:
47
- if pattern in citation:
48
- return True
49
-
50
- return False
51
-
52
-
53
- def is_valid_domain(domain: str) -> bool:
54
- if not domain or "*." in domain:
55
- return False
56
-
57
- # 只允許包含 ://、.、% 和英數字的網址
58
- # ^ 表示開頭,$ 表示結尾
59
- # [a-zA-Z0-9] 表示英數字
60
- # [\\.\\:\\/\\%] 表示允許的特殊字符
61
- pattern = r"^[a-zA-Z0-9\\.\\:\\/\\%]+$"
62
-
63
- return bool(re.match(pattern, domain))
64
-
65
-
66
- async def respond_with_perplexity_search_openrouter(
67
- input_content,
68
- user_prompt_prefix,
69
- messages_for_llm,
70
- domain_filter: list[str],
71
- stream: bool = False,
72
- model: str = "perplexity/sonar-small-online",
73
- structured_output: bool = False,
74
- return_images: bool = False,
75
- ) -> AsyncGenerator[PerplexitySearchEvent, None]:
76
- """
77
- 使用 OpenRouter 提供的 Perplexity API 服務
78
- structured_output: 只有在 stream 為 False 時有效
79
- """
80
- # 確保模型是 Perplexity 的模型
81
- if not model.startswith("perplexity/"):
82
- model = "perplexity/sonar-small-online"
83
-
84
- api_key = os.getenv("OPENROUTER_API_KEY")
85
- if not api_key:
86
- raise ValueError("OPENROUTER_API_KEY environment variable not set")
87
-
88
- headers = {
89
- "Authorization": f"Bearer {api_key}",
90
- "Content-Type": "application/json",
91
- "HTTP-Referer": "https://openrouter.ai/api/v1", # OpenRouter 需要提供來源
92
- "X-Title": "BotRun Flow Lang", # 可選的應用名稱
93
- }
94
-
95
- messages = deepcopy(messages_for_llm)
96
- if len(messages) > 0 and messages[-1]["role"] == "user":
97
- messages.pop()
98
- if user_prompt_prefix:
99
- xml_input_content = f"<使用者提問>{input_content}</使用者提問>"
100
- messages.append(
101
- {"role": "user", "content": user_prompt_prefix + "\n\n" + xml_input_content}
102
- )
103
- else:
104
- messages.append({"role": "user", "content": input_content})
105
-
106
- filtered_domain_filter = []
107
- for domain in domain_filter:
108
- if domain and is_valid_domain(domain):
109
- filtered_domain_filter.append(domain)
110
-
111
- payload = {
112
- "model": model,
113
- "messages": messages,
114
- "temperature": 0.5,
115
- "stream": stream,
116
- # OpenRouter 可能不支持 search_domain_filter 參數,如果有問題可以移除
117
- "search_domain_filter": filtered_domain_filter,
118
- "stream_usage": True,
119
- "return_images": return_images,
120
- # "reasoning_effort": "high",
121
- }
122
-
123
- try:
124
- input_token = 0
125
- output_token = 0
126
- async with aiohttp.ClientSession() as session:
127
- async with session.post(
128
- OPENROUTER_API_URL, headers=headers, json=payload
129
- ) as response:
130
- if response.status != 200:
131
- error_text = await response.text()
132
- raise ValueError(f"OpenRouter API error: {error_text}")
133
-
134
- if not stream:
135
- # 非串流模式的處理
136
- response_data = await response.json()
137
- content = response_data["choices"][0]["message"]["content"]
138
- content = remove_citation_number_from_content(content)
139
- if not structured_output:
140
- yield PerplexitySearchEvent(chunk=content, raw_json=response_data)
141
-
142
- # 處理引用 (如果 OpenRouter 返回引用)
143
- citations = response_data.get("citations", [])
144
- final_citations = [
145
- citation
146
- for citation in citations
147
- if should_include_citation(citation, domain_filter)
148
- ]
149
- images = response_data.get("images", [])
150
-
151
- if final_citations:
152
- references = f"\n\n參考來源:\n"
153
- for citation in final_citations:
154
- references += f"- {citation}\n"
155
- if not structured_output:
156
- yield PerplexitySearchEvent(chunk=references)
157
-
158
- if structured_output:
159
- yield PerplexitySearchEvent(
160
- chunk=json.dumps(
161
- {
162
- "content": content,
163
- "citations": final_citations,
164
- "images": images,
165
- }
166
- ),
167
- raw_json=response_data,
168
- )
169
- else:
170
- # 串流模式的處理
171
- full_response = ""
172
- final_citations = []
173
- async for line in response.content:
174
- if line:
175
- line = line.decode("utf-8").strip()
176
- if line.startswith("data: "):
177
- line = line[6:] # Remove 'data: ' prefix
178
- if line == "[DONE]":
179
- break
180
-
181
- try:
182
- chunk_data = json.loads(line)
183
- response_data = chunk_data
184
-
185
- if (
186
- chunk_data["choices"][0]
187
- .get("delta", {})
188
- .get("content")
189
- ):
190
- content = chunk_data["choices"][0]["delta"][
191
- "content"
192
- ]
193
- full_response += content
194
- yield PerplexitySearchEvent(
195
- chunk=content,
196
- raw_json=chunk_data,
197
- )
198
- if not final_citations and chunk_data.get(
199
- "citations", []
200
- ):
201
- citations = chunk_data.get("citations", [])
202
- final_citations = [
203
- citation
204
- for citation in citations
205
- if should_include_citation(
206
- citation, domain_filter
207
- )
208
- ]
209
-
210
- except json.JSONDecodeError:
211
- continue
212
-
213
- # 只在有符合條件的 citations 時才產生參考文獻
214
- if final_citations:
215
- references = f"\n\n參考來源:\n"
216
- for citation in final_citations:
217
- references += f"- {citation}\n"
218
- yield PerplexitySearchEvent(chunk=references)
219
-
220
- if response_data.get("usage"):
221
- logging.info(
222
- f"perplexity_search_openrouter============> input_token: {response_data['usage'].get('prompt_tokens', 0) + response_data['usage'].get('citation_tokens', 0)}, output_token: {response_data['usage'].get('completion_tokens', 0)}",
223
- )
224
- except Exception as e:
225
- import traceback
226
-
227
- traceback.print_exc()
228
- print(e)
229
-
230
-
231
- async def respond_with_perplexity_search(
232
- input_content,
233
- user_prompt_prefix,
234
- messages_for_llm,
235
- domain_filter: list[str],
236
- stream: bool = False,
237
- model: str = "sonar-reasoning-pro",
238
- structured_output: bool = False,
239
- return_images: bool = False,
240
- ) -> AsyncGenerator[PerplexitySearchEvent, None]:
241
- """
242
- structured_output: 只有在 stream 為 False 時有效
243
- return_images: 是否返回圖片,但是 openrouter 不支援
244
- """
245
- # 檢查是否使用 OpenRouter
246
- is_use_openrouter = os.getenv("OPENROUTER_API_KEY") and os.getenv(
247
- "OPENROUTER_BASE_URL"
248
- )
249
- if return_images:
250
- # if os.getenv("PPLX_API_KEY", "") == "":
251
- # raise ValueError(
252
- # "PPLX_API_KEY environment variable not set, return_images needs PPLX_API_KEY"
253
- # )
254
- # Openrouter 尚不支援 return_images
255
- is_use_openrouter = False
256
-
257
- if is_use_openrouter:
258
- # 若使用 OpenRouter,轉換模型名稱並呼叫 OpenRouter 版本的函數
259
- openrouter_model = "perplexity/sonar-reasoning-pro"
260
- if model == "sonar-reasoning-pro":
261
- openrouter_model = "perplexity/sonar-reasoning-pro"
262
- elif model == "sonar-reasoning":
263
- openrouter_model = "perplexity/sonar-reasoning"
264
- elif model == "sonar-pro":
265
- openrouter_model = "perplexity/sonar-pro"
266
- elif model == "sonar":
267
- openrouter_model = "perplexity/sonar"
268
-
269
- async for event in respond_with_perplexity_search_openrouter(
270
- input_content,
271
- user_prompt_prefix,
272
- messages_for_llm,
273
- domain_filter,
274
- stream,
275
- openrouter_model,
276
- structured_output,
277
- ):
278
- yield event
279
- return
280
-
281
- # 以下是原有的邏輯
282
- if model not in ["sonar-reasoning-pro", "sonar-reasoning", "sonar-pro", "sonar"]:
283
- model = "sonar-reasoning-pro"
284
- api_key = os.getenv("PPLX_API_KEY")
285
- if not api_key:
286
- raise ValueError("PPLX_API_KEY environment variable not set")
287
-
288
- headers = {
289
- "Authorization": f"Bearer {api_key}",
290
- "Content-Type": "application/json",
291
- }
292
- messages = deepcopy(messages_for_llm)
293
- if len(messages) > 0 and messages[-1]["role"] == "user":
294
- messages.pop()
295
- if user_prompt_prefix:
296
- xml_input_content = f"<使用者提問>{input_content}</使用者提問>"
297
- messages.append(
298
- {"role": "user", "content": user_prompt_prefix + "\n\n" + xml_input_content}
299
- )
300
- else:
301
- messages.append({"role": "user", "content": input_content})
302
- filtered_domain_filter = []
303
-
304
- for domain in domain_filter:
305
- if domain and is_valid_domain(domain):
306
- filtered_domain_filter.append(domain)
307
-
308
- payload = {
309
- "model": model,
310
- "messages": messages,
311
- "temperature": 0.5,
312
- "stream": stream,
313
- "search_domain_filter": filtered_domain_filter,
314
- "stream_usage": True,
315
- "return_images": return_images,
316
-
317
- }
318
- try:
319
- input_token = 0
320
- output_token = 0
321
- async with aiohttp.ClientSession() as session:
322
- async with session.post(
323
- PERPLEXITY_API_URL, headers=headers, json=payload
324
- ) as response:
325
- if response.status != 200:
326
- error_text = await response.text()
327
- raise ValueError(f"Perplexity API error: {error_text}")
328
-
329
- if not stream:
330
- # 非串流模式的處理
331
- response_data = await response.json()
332
- content = response_data["choices"][0]["message"]["content"]
333
- content = remove_citation_number_from_content(content)
334
- if not structured_output:
335
- yield PerplexitySearchEvent(chunk=content, raw_json=response_data)
336
-
337
- # 處理引用
338
- citations = response_data.get("citations", [])
339
- final_citations = [
340
- citation
341
- for citation in citations
342
- if should_include_citation(citation, domain_filter)
343
- ]
344
- images = response_data.get("images", [])
345
-
346
- if final_citations:
347
- references = f"\n\n參考來源:\n"
348
- for citation in final_citations:
349
- references += f"- {citation}\n"
350
- if not structured_output:
351
- yield PerplexitySearchEvent(chunk=references)
352
-
353
- if structured_output:
354
- yield PerplexitySearchEvent(
355
- chunk=json.dumps(
356
- {
357
- "content": content,
358
- "citations": final_citations,
359
- "images": images,
360
- }
361
- ),
362
- raw_json=response_data,
363
- )
364
-
365
- # 串流模式的處理
366
- full_response = ""
367
- final_citations = []
368
- async for line in response.content:
369
- if line:
370
- line = line.decode("utf-8").strip()
371
- if line.startswith("data: "):
372
- line = line[6:] # Remove 'data: ' prefix
373
- if line == "[DONE]":
374
- break
375
-
376
- try:
377
- chunk_data = json.loads(line)
378
- response_data = chunk_data
379
- # print(chunk_data)
380
- if (
381
- chunk_data["choices"][0]
382
- .get("delta", {})
383
- .get("content")
384
- ):
385
- content = chunk_data["choices"][0]["delta"][
386
- "content"
387
- ]
388
- full_response += content
389
- yield PerplexitySearchEvent(
390
- chunk=content,
391
- raw_json=chunk_data,
392
- )
393
- if not final_citations and chunk_data.get(
394
- "citations", []
395
- ):
396
- # 發現 perplexity 不會都有 finish_reason 為 stop 的狀況,但是 citations 會有
397
- # if chunk_data["choices"][0]["finish_reason"] == "stop":
398
- citations = chunk_data.get("citations", [])
399
- final_citations = [
400
- citation
401
- for citation in citations
402
- if should_include_citation(
403
- citation, domain_filter
404
- )
405
- ]
406
-
407
- except json.JSONDecodeError:
408
- continue
409
-
410
- # 只在有符合條件的 citations 時才產生參考文獻
411
- if final_citations:
412
- references = f"\n\n參考來源:\n"
413
- for citation in final_citations:
414
- references += f"- {citation}\n"
415
- yield PerplexitySearchEvent(chunk=references)
416
- # 安全地存取 usage 資訊,避免鍵不存在的錯誤
417
- if response_data and "usage" in response_data:
418
- usage = response_data["usage"]
419
- prompt_tokens = usage.get("prompt_tokens", 0)
420
- citation_tokens = usage.get("citation_tokens", 0)
421
- completion_tokens = usage.get("completion_tokens", 0)
422
- logging.info(
423
- f"perplexity_search============> input_token: {prompt_tokens + citation_tokens}, output_token: {completion_tokens}",
424
- )
425
- else:
426
- logging.info("perplexity_search============> usage information not available")
427
- except Exception as e:
428
- import traceback
429
-
430
- traceback.print_exc()
431
- print(e)
432
-
433
-
434
- def remove_citation_number_from_content(content: str) -> str:
435
- """
436
- 移除文字裡的 [1]、[2]、[3] 等數字
437
- """
438
- return re.sub(r"\[[0-9]+\]", "", content)
439
- # answer_message = await cl.Message(content="").send()
440
- # full_response = ""
441
- # for response in responses:
442
- # if response.candidates[0].finish_reason != Candidate.FinishReason.STOP:
443
- # # await answer_message.stream_token(response.text)
444
- # yield GeminiGroundingEvent(chunk=response.text)
445
- # full_response += response.text
446
- # if response.candidates[0].grounding_metadata:
447
- # if len(response.candidates[0].grounding_metadata.grounding_chunks) > 0:
448
- # references = f"\n\n{tr('Sources:')}\n"
449
- # for grounding_chunk in response.candidates[
450
- # 0
451
- # ].grounding_metadata.grounding_chunks:
452
- # references += f"- [{grounding_chunk.web.title}]({grounding_chunk.web.uri})\n"
453
- # # await answer_message.stream_token(references)
454
- # yield GeminiGroundingEvent(chunk=references)
455
- # else:
456
- # if response.candidates[0].grounding_metadata:
457
- # if len(response.candidates[0].grounding_metadata.grounding_chunks) > 0:
458
- # references = f"\n\n{tr('Sources:')}\n"
459
- # for grounding_chunk in response.candidates[
460
- # 0
461
- # ].grounding_metadata.grounding_chunks:
462
- # references += f"- [{grounding_chunk.web.title}]({grounding_chunk.web.uri})\n"
463
- # # await answer_message.stream_token(references)
464
- # yield GeminiGroundingEvent(chunk=references)
1
+ from copy import deepcopy
2
+ from typing import AsyncGenerator
3
+ from pydantic import BaseModel
4
+ import os
5
+ import json
6
+ import aiohttp
7
+ from dotenv import load_dotenv
8
+ import re
9
+ import logging
10
+
11
+ load_dotenv()
12
+
13
+
14
+ class PerplexitySearchEvent(BaseModel):
15
+ chunk: str
16
+ raw_json: dict | None = None
17
+
18
+
19
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
20
+ OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
21
+
22
+
23
+ def should_include_citation(citation: str, domain_filter: list[str]) -> bool:
24
+ # 如果沒有任何過濾規則,接受所有網站
25
+ if not domain_filter:
26
+ return True
27
+
28
+ # 分離排除規則和包含規則
29
+ exclude_rules = [
30
+ rule[1:].replace("*.", "") for rule in domain_filter if rule.startswith("-")
31
+ ]
32
+ include_rules = [
33
+ rule.replace("*.", "") for rule in domain_filter if not rule.startswith("-")
34
+ ]
35
+
36
+ # 檢查是否符合任何排除規則
37
+ for pattern in exclude_rules:
38
+ if pattern in citation:
39
+ return False
40
+
41
+ # 如果沒有包含規則,且通過了排除規則檢查,就接受該網站
42
+ if not include_rules:
43
+ return True
44
+
45
+ # 如果有包含規則,必須符合至少一個
46
+ for pattern in include_rules:
47
+ if pattern in citation:
48
+ return True
49
+
50
+ return False
51
+
52
+
53
+ def is_valid_domain(domain: str) -> bool:
54
+ if not domain or "*." in domain:
55
+ return False
56
+
57
+ # 只允許包含 ://、.、% 和英數字的網址
58
+ # ^ 表示開頭,$ 表示結尾
59
+ # [a-zA-Z0-9] 表示英數字
60
+ # [\\.\\:\\/\\%] 表示允許的特殊字符
61
+ pattern = r"^[a-zA-Z0-9\\.\\:\\/\\%]+$"
62
+
63
+ return bool(re.match(pattern, domain))
64
+
65
+
66
+ async def respond_with_perplexity_search_openrouter(
67
+ input_content,
68
+ user_prompt_prefix,
69
+ messages_for_llm,
70
+ domain_filter: list[str],
71
+ stream: bool = False,
72
+ model: str = "perplexity/sonar-small-online",
73
+ structured_output: bool = False,
74
+ return_images: bool = False,
75
+ ) -> AsyncGenerator[PerplexitySearchEvent, None]:
76
+ """
77
+ 使用 OpenRouter 提供的 Perplexity API 服務
78
+ structured_output: 只有在 stream 為 False 時有效
79
+ """
80
+ # 確保模型是 Perplexity 的模型
81
+ if not model.startswith("perplexity/"):
82
+ model = "perplexity/sonar-small-online"
83
+
84
+ api_key = os.getenv("OPENROUTER_API_KEY")
85
+ if not api_key:
86
+ raise ValueError("OPENROUTER_API_KEY environment variable not set")
87
+
88
+ headers = {
89
+ "Authorization": f"Bearer {api_key}",
90
+ "Content-Type": "application/json",
91
+ "HTTP-Referer": "https://openrouter.ai/api/v1", # OpenRouter 需要提供來源
92
+ "X-Title": "BotRun Flow Lang", # 可選的應用名稱
93
+ }
94
+
95
+ messages = deepcopy(messages_for_llm)
96
+ if len(messages) > 0 and messages[-1]["role"] == "user":
97
+ messages.pop()
98
+ if user_prompt_prefix:
99
+ xml_input_content = f"<使用者提問>{input_content}</使用者提問>"
100
+ messages.append(
101
+ {"role": "user", "content": user_prompt_prefix + "\n\n" + xml_input_content}
102
+ )
103
+ else:
104
+ messages.append({"role": "user", "content": input_content})
105
+
106
+ filtered_domain_filter = []
107
+ for domain in domain_filter:
108
+ if domain and is_valid_domain(domain):
109
+ filtered_domain_filter.append(domain)
110
+
111
+ payload = {
112
+ "model": model,
113
+ "messages": messages,
114
+ "temperature": 0.5,
115
+ "stream": stream,
116
+ # OpenRouter 可能不支持 search_domain_filter 參數,如果有問題可以移除
117
+ "search_domain_filter": filtered_domain_filter,
118
+ "stream_usage": True,
119
+ "return_images": return_images,
120
+ # "reasoning_effort": "high",
121
+ }
122
+
123
+ try:
124
+ input_token = 0
125
+ output_token = 0
126
+ async with aiohttp.ClientSession() as session:
127
+ async with session.post(
128
+ OPENROUTER_API_URL, headers=headers, json=payload
129
+ ) as response:
130
+ if response.status != 200:
131
+ error_text = await response.text()
132
+ raise ValueError(f"OpenRouter API error: {error_text}")
133
+
134
+ if not stream:
135
+ # 非串流模式的處理
136
+ response_data = await response.json()
137
+ content = response_data["choices"][0]["message"]["content"]
138
+ content = remove_citation_number_from_content(content)
139
+ if not structured_output:
140
+ yield PerplexitySearchEvent(chunk=content, raw_json=response_data)
141
+
142
+ # 處理引用 (如果 OpenRouter 返回引用)
143
+ citations = response_data.get("citations", [])
144
+ final_citations = [
145
+ citation
146
+ for citation in citations
147
+ if should_include_citation(citation, domain_filter)
148
+ ]
149
+ images = response_data.get("images", [])
150
+
151
+ if final_citations:
152
+ references = f"\n\n參考來源:\n"
153
+ for citation in final_citations:
154
+ references += f"- {citation}\n"
155
+ if not structured_output:
156
+ yield PerplexitySearchEvent(chunk=references)
157
+
158
+ if structured_output:
159
+ yield PerplexitySearchEvent(
160
+ chunk=json.dumps(
161
+ {
162
+ "content": content,
163
+ "citations": final_citations,
164
+ "images": images,
165
+ }
166
+ ),
167
+ raw_json=response_data,
168
+ )
169
+ else:
170
+ # 串流模式的處理
171
+ full_response = ""
172
+ final_citations = []
173
+ async for line in response.content:
174
+ if line:
175
+ line = line.decode("utf-8").strip()
176
+ if line.startswith("data: "):
177
+ line = line[6:] # Remove 'data: ' prefix
178
+ if line == "[DONE]":
179
+ break
180
+
181
+ try:
182
+ chunk_data = json.loads(line)
183
+ response_data = chunk_data
184
+
185
+ if (
186
+ chunk_data["choices"][0]
187
+ .get("delta", {})
188
+ .get("content")
189
+ ):
190
+ content = chunk_data["choices"][0]["delta"][
191
+ "content"
192
+ ]
193
+ full_response += content
194
+ yield PerplexitySearchEvent(
195
+ chunk=content,
196
+ raw_json=chunk_data,
197
+ )
198
+ if not final_citations and chunk_data.get(
199
+ "citations", []
200
+ ):
201
+ citations = chunk_data.get("citations", [])
202
+ final_citations = [
203
+ citation
204
+ for citation in citations
205
+ if should_include_citation(
206
+ citation, domain_filter
207
+ )
208
+ ]
209
+
210
+ except json.JSONDecodeError:
211
+ continue
212
+
213
+ # 只在有符合條件的 citations 時才產生參考文獻
214
+ if final_citations:
215
+ references = f"\n\n參考來源:\n"
216
+ for citation in final_citations:
217
+ references += f"- {citation}\n"
218
+ yield PerplexitySearchEvent(chunk=references)
219
+
220
+ if response_data.get("usage"):
221
+ logging.info(
222
+ f"perplexity_search_openrouter============> input_token: {response_data['usage'].get('prompt_tokens', 0) + response_data['usage'].get('citation_tokens', 0)}, output_token: {response_data['usage'].get('completion_tokens', 0)}",
223
+ )
224
+ except Exception as e:
225
+ import traceback
226
+
227
+ traceback.print_exc()
228
+ print(e)
229
+
230
+
231
+ async def respond_with_perplexity_search(
232
+ input_content,
233
+ user_prompt_prefix,
234
+ messages_for_llm,
235
+ domain_filter: list[str],
236
+ stream: bool = False,
237
+ model: str = "sonar-reasoning-pro",
238
+ structured_output: bool = False,
239
+ return_images: bool = False,
240
+ ) -> AsyncGenerator[PerplexitySearchEvent, None]:
241
+ """
242
+ structured_output: 只有在 stream 為 False 時有效
243
+ return_images: 是否返回圖片,但是 openrouter 不支援
244
+ """
245
+ # 檢查是否使用 OpenRouter
246
+ is_use_openrouter = os.getenv("OPENROUTER_API_KEY") and os.getenv(
247
+ "OPENROUTER_BASE_URL"
248
+ )
249
+ if return_images:
250
+ # if os.getenv("PPLX_API_KEY", "") == "":
251
+ # raise ValueError(
252
+ # "PPLX_API_KEY environment variable not set, return_images needs PPLX_API_KEY"
253
+ # )
254
+ # Openrouter 尚不支援 return_images
255
+ is_use_openrouter = False
256
+
257
+ if is_use_openrouter:
258
+ # 若使用 OpenRouter,轉換模型名稱並呼叫 OpenRouter 版本的函數
259
+ openrouter_model = "perplexity/sonar-reasoning-pro"
260
+ if model == "sonar-reasoning-pro":
261
+ openrouter_model = "perplexity/sonar-reasoning-pro"
262
+ elif model == "sonar-reasoning":
263
+ openrouter_model = "perplexity/sonar-reasoning"
264
+ elif model == "sonar-pro":
265
+ openrouter_model = "perplexity/sonar-pro"
266
+ elif model == "sonar":
267
+ openrouter_model = "perplexity/sonar"
268
+
269
+ async for event in respond_with_perplexity_search_openrouter(
270
+ input_content,
271
+ user_prompt_prefix,
272
+ messages_for_llm,
273
+ domain_filter,
274
+ stream,
275
+ openrouter_model,
276
+ structured_output,
277
+ ):
278
+ yield event
279
+ return
280
+
281
+ # 以下是原有的邏輯
282
+ if model not in ["sonar-reasoning-pro", "sonar-reasoning", "sonar-pro", "sonar"]:
283
+ model = "sonar-reasoning-pro"
284
+ api_key = os.getenv("PPLX_API_KEY")
285
+ if not api_key:
286
+ raise ValueError("PPLX_API_KEY environment variable not set")
287
+
288
+ headers = {
289
+ "Authorization": f"Bearer {api_key}",
290
+ "Content-Type": "application/json",
291
+ }
292
+ messages = deepcopy(messages_for_llm)
293
+ if len(messages) > 0 and messages[-1]["role"] == "user":
294
+ messages.pop()
295
+ if user_prompt_prefix:
296
+ xml_input_content = f"<使用者提問>{input_content}</使用者提問>"
297
+ messages.append(
298
+ {"role": "user", "content": user_prompt_prefix + "\n\n" + xml_input_content}
299
+ )
300
+ else:
301
+ messages.append({"role": "user", "content": input_content})
302
+ filtered_domain_filter = []
303
+
304
+ for domain in domain_filter:
305
+ if domain and is_valid_domain(domain):
306
+ filtered_domain_filter.append(domain)
307
+
308
+ payload = {
309
+ "model": model,
310
+ "messages": messages,
311
+ "temperature": 0.5,
312
+ "stream": stream,
313
+ "search_domain_filter": filtered_domain_filter,
314
+ "stream_usage": True,
315
+ "return_images": return_images,
316
+
317
+ }
318
+ try:
319
+ input_token = 0
320
+ output_token = 0
321
+ async with aiohttp.ClientSession() as session:
322
+ async with session.post(
323
+ PERPLEXITY_API_URL, headers=headers, json=payload
324
+ ) as response:
325
+ if response.status != 200:
326
+ error_text = await response.text()
327
+ raise ValueError(f"Perplexity API error: {error_text}")
328
+
329
+ if not stream:
330
+ # 非串流模式的處理
331
+ response_data = await response.json()
332
+ content = response_data["choices"][0]["message"]["content"]
333
+ content = remove_citation_number_from_content(content)
334
+ if not structured_output:
335
+ yield PerplexitySearchEvent(chunk=content, raw_json=response_data)
336
+
337
+ # 處理引用
338
+ citations = response_data.get("citations", [])
339
+ final_citations = [
340
+ citation
341
+ for citation in citations
342
+ if should_include_citation(citation, domain_filter)
343
+ ]
344
+ images = response_data.get("images", [])
345
+
346
+ if final_citations:
347
+ references = f"\n\n參考來源:\n"
348
+ for citation in final_citations:
349
+ references += f"- {citation}\n"
350
+ if not structured_output:
351
+ yield PerplexitySearchEvent(chunk=references)
352
+
353
+ if structured_output:
354
+ yield PerplexitySearchEvent(
355
+ chunk=json.dumps(
356
+ {
357
+ "content": content,
358
+ "citations": final_citations,
359
+ "images": images,
360
+ }
361
+ ),
362
+ raw_json=response_data,
363
+ )
364
+
365
+ # 串流模式的處理
366
+ full_response = ""
367
+ final_citations = []
368
+ async for line in response.content:
369
+ if line:
370
+ line = line.decode("utf-8").strip()
371
+ if line.startswith("data: "):
372
+ line = line[6:] # Remove 'data: ' prefix
373
+ if line == "[DONE]":
374
+ break
375
+
376
+ try:
377
+ chunk_data = json.loads(line)
378
+ response_data = chunk_data
379
+ # print(chunk_data)
380
+ if (
381
+ chunk_data["choices"][0]
382
+ .get("delta", {})
383
+ .get("content")
384
+ ):
385
+ content = chunk_data["choices"][0]["delta"][
386
+ "content"
387
+ ]
388
+ full_response += content
389
+ yield PerplexitySearchEvent(
390
+ chunk=content,
391
+ raw_json=chunk_data,
392
+ )
393
+ if not final_citations and chunk_data.get(
394
+ "citations", []
395
+ ):
396
+ # 發現 perplexity 不會都有 finish_reason 為 stop 的狀況,但是 citations 會有
397
+ # if chunk_data["choices"][0]["finish_reason"] == "stop":
398
+ citations = chunk_data.get("citations", [])
399
+ final_citations = [
400
+ citation
401
+ for citation in citations
402
+ if should_include_citation(
403
+ citation, domain_filter
404
+ )
405
+ ]
406
+
407
+ except json.JSONDecodeError:
408
+ continue
409
+
410
+ # 只在有符合條件的 citations 時才產生參考文獻
411
+ if final_citations:
412
+ references = f"\n\n參考來源:\n"
413
+ for citation in final_citations:
414
+ references += f"- {citation}\n"
415
+ yield PerplexitySearchEvent(chunk=references)
416
+ # 安全地存取 usage 資訊,避免鍵不存在的錯誤
417
+ if response_data and "usage" in response_data:
418
+ usage = response_data["usage"]
419
+ prompt_tokens = usage.get("prompt_tokens", 0)
420
+ citation_tokens = usage.get("citation_tokens", 0)
421
+ completion_tokens = usage.get("completion_tokens", 0)
422
+ logging.info(
423
+ f"perplexity_search============> input_token: {prompt_tokens + citation_tokens}, output_token: {completion_tokens}",
424
+ )
425
+ else:
426
+ logging.info("perplexity_search============> usage information not available")
427
+ except Exception as e:
428
+ import traceback
429
+
430
+ traceback.print_exc()
431
+ print(e)
432
+
433
+
434
+ def remove_citation_number_from_content(content: str) -> str:
435
+ """
436
+ 移除文字裡的 [1]、[2]、[3] 等數字
437
+ """
438
+ return re.sub(r"\[[0-9]+\]", "", content)
439
+ # answer_message = await cl.Message(content="").send()
440
+ # full_response = ""
441
+ # for response in responses:
442
+ # if response.candidates[0].finish_reason != Candidate.FinishReason.STOP:
443
+ # # await answer_message.stream_token(response.text)
444
+ # yield GeminiGroundingEvent(chunk=response.text)
445
+ # full_response += response.text
446
+ # if response.candidates[0].grounding_metadata:
447
+ # if len(response.candidates[0].grounding_metadata.grounding_chunks) > 0:
448
+ # references = f"\n\n{tr('Sources:')}\n"
449
+ # for grounding_chunk in response.candidates[
450
+ # 0
451
+ # ].grounding_metadata.grounding_chunks:
452
+ # references += f"- [{grounding_chunk.web.title}]({grounding_chunk.web.uri})\n"
453
+ # # await answer_message.stream_token(references)
454
+ # yield GeminiGroundingEvent(chunk=references)
455
+ # else:
456
+ # if response.candidates[0].grounding_metadata:
457
+ # if len(response.candidates[0].grounding_metadata.grounding_chunks) > 0:
458
+ # references = f"\n\n{tr('Sources:')}\n"
459
+ # for grounding_chunk in response.candidates[
460
+ # 0
461
+ # ].grounding_metadata.grounding_chunks:
462
+ # references += f"- [{grounding_chunk.web.title}]({grounding_chunk.web.uri})\n"
463
+ # # await answer_message.stream_token(references)
464
+ # yield GeminiGroundingEvent(chunk=references)