botrun-flow-lang 5.12.263__py3-none-any.whl → 6.2.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. botrun_flow_lang/api/auth_api.py +39 -39
  2. botrun_flow_lang/api/auth_utils.py +183 -183
  3. botrun_flow_lang/api/botrun_back_api.py +65 -65
  4. botrun_flow_lang/api/flow_api.py +3 -3
  5. botrun_flow_lang/api/hatch_api.py +508 -508
  6. botrun_flow_lang/api/langgraph_api.py +816 -811
  7. botrun_flow_lang/api/langgraph_constants.py +11 -0
  8. botrun_flow_lang/api/line_bot_api.py +1484 -1484
  9. botrun_flow_lang/api/model_api.py +300 -300
  10. botrun_flow_lang/api/rate_limit_api.py +32 -32
  11. botrun_flow_lang/api/routes.py +79 -79
  12. botrun_flow_lang/api/search_api.py +53 -53
  13. botrun_flow_lang/api/storage_api.py +395 -395
  14. botrun_flow_lang/api/subsidy_api.py +290 -290
  15. botrun_flow_lang/api/subsidy_api_system_prompt.txt +109 -109
  16. botrun_flow_lang/api/user_setting_api.py +70 -70
  17. botrun_flow_lang/api/version_api.py +31 -31
  18. botrun_flow_lang/api/youtube_api.py +26 -26
  19. botrun_flow_lang/constants.py +13 -13
  20. botrun_flow_lang/langgraph_agents/agents/agent_runner.py +178 -178
  21. botrun_flow_lang/langgraph_agents/agents/agent_tools/step_planner.py +77 -77
  22. botrun_flow_lang/langgraph_agents/agents/checkpointer/firestore_checkpointer.py +666 -666
  23. botrun_flow_lang/langgraph_agents/agents/gov_researcher/GOV_RESEARCHER_PRD.md +192 -192
  24. botrun_flow_lang/langgraph_agents/agents/gov_researcher/gemini_subsidy_graph.py +460 -460
  25. botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_2_graph.py +1002 -1002
  26. botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_graph.py +822 -822
  27. botrun_flow_lang/langgraph_agents/agents/langgraph_react_agent.py +730 -723
  28. botrun_flow_lang/langgraph_agents/agents/search_agent_graph.py +864 -864
  29. botrun_flow_lang/langgraph_agents/agents/tools/__init__.py +4 -4
  30. botrun_flow_lang/langgraph_agents/agents/tools/gemini_code_execution.py +376 -376
  31. botrun_flow_lang/langgraph_agents/agents/util/gemini_grounding.py +66 -66
  32. botrun_flow_lang/langgraph_agents/agents/util/html_util.py +316 -316
  33. botrun_flow_lang/langgraph_agents/agents/util/img_util.py +336 -294
  34. botrun_flow_lang/langgraph_agents/agents/util/local_files.py +419 -419
  35. botrun_flow_lang/langgraph_agents/agents/util/mermaid_util.py +86 -86
  36. botrun_flow_lang/langgraph_agents/agents/util/model_utils.py +143 -143
  37. botrun_flow_lang/langgraph_agents/agents/util/pdf_analyzer.py +562 -486
  38. botrun_flow_lang/langgraph_agents/agents/util/pdf_cache.py +250 -250
  39. botrun_flow_lang/langgraph_agents/agents/util/pdf_processor.py +204 -204
  40. botrun_flow_lang/langgraph_agents/agents/util/perplexity_search.py +464 -464
  41. botrun_flow_lang/langgraph_agents/agents/util/plotly_util.py +59 -59
  42. botrun_flow_lang/langgraph_agents/agents/util/tavily_search.py +199 -199
  43. botrun_flow_lang/langgraph_agents/agents/util/usage_metadata.py +34 -0
  44. botrun_flow_lang/langgraph_agents/agents/util/youtube_util.py +90 -90
  45. botrun_flow_lang/langgraph_agents/cache/langgraph_botrun_cache.py +197 -197
  46. botrun_flow_lang/llm_agent/llm_agent.py +19 -19
  47. botrun_flow_lang/llm_agent/llm_agent_util.py +83 -83
  48. botrun_flow_lang/log/.gitignore +2 -2
  49. botrun_flow_lang/main.py +61 -61
  50. botrun_flow_lang/main_fast.py +51 -51
  51. botrun_flow_lang/mcp_server/__init__.py +10 -10
  52. botrun_flow_lang/mcp_server/default_mcp.py +854 -744
  53. botrun_flow_lang/models/nodes/utils.py +205 -205
  54. botrun_flow_lang/models/token_usage.py +34 -34
  55. botrun_flow_lang/requirements.txt +21 -21
  56. botrun_flow_lang/services/base/firestore_base.py +30 -30
  57. botrun_flow_lang/services/hatch/hatch_factory.py +11 -11
  58. botrun_flow_lang/services/hatch/hatch_fs_store.py +419 -419
  59. botrun_flow_lang/services/storage/storage_cs_store.py +206 -206
  60. botrun_flow_lang/services/storage/storage_factory.py +12 -12
  61. botrun_flow_lang/services/storage/storage_store.py +65 -65
  62. botrun_flow_lang/services/user_setting/user_setting_factory.py +9 -9
  63. botrun_flow_lang/services/user_setting/user_setting_fs_store.py +66 -66
  64. botrun_flow_lang/static/docs/tools/index.html +926 -926
  65. botrun_flow_lang/tests/api_functional_tests.py +1525 -1525
  66. botrun_flow_lang/tests/api_stress_test.py +357 -357
  67. botrun_flow_lang/tests/shared_hatch_tests.py +333 -333
  68. botrun_flow_lang/tests/test_botrun_app.py +46 -46
  69. botrun_flow_lang/tests/test_html_util.py +31 -31
  70. botrun_flow_lang/tests/test_img_analyzer.py +190 -190
  71. botrun_flow_lang/tests/test_img_util.py +39 -39
  72. botrun_flow_lang/tests/test_local_files.py +114 -114
  73. botrun_flow_lang/tests/test_mermaid_util.py +103 -103
  74. botrun_flow_lang/tests/test_pdf_analyzer.py +104 -104
  75. botrun_flow_lang/tests/test_plotly_util.py +151 -151
  76. botrun_flow_lang/tests/test_run_workflow_engine.py +65 -65
  77. botrun_flow_lang/tools/generate_docs.py +133 -133
  78. botrun_flow_lang/tools/templates/tools.html +153 -153
  79. botrun_flow_lang/utils/__init__.py +7 -7
  80. botrun_flow_lang/utils/botrun_logger.py +344 -344
  81. botrun_flow_lang/utils/clients/rate_limit_client.py +209 -209
  82. botrun_flow_lang/utils/clients/token_verify_client.py +153 -153
  83. botrun_flow_lang/utils/google_drive_utils.py +654 -654
  84. botrun_flow_lang/utils/langchain_utils.py +324 -324
  85. botrun_flow_lang/utils/yaml_utils.py +9 -9
  86. {botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-6.2.21.dist-info}/METADATA +6 -6
  87. botrun_flow_lang-6.2.21.dist-info/RECORD +104 -0
  88. botrun_flow_lang-5.12.263.dist-info/RECORD +0 -102
  89. {botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-6.2.21.dist-info}/WHEEL +0 -0
@@ -1,464 +1,464 @@
1
- from copy import deepcopy
2
- from typing import AsyncGenerator
3
- from pydantic import BaseModel
4
- import os
5
- import json
6
- import aiohttp
7
- from dotenv import load_dotenv
8
- import re
9
- import logging
10
-
11
- load_dotenv()
12
-
13
-
14
- class PerplexitySearchEvent(BaseModel):
15
- chunk: str
16
- raw_json: dict | None = None
17
-
18
-
19
- PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
20
- OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
21
-
22
-
23
- def should_include_citation(citation: str, domain_filter: list[str]) -> bool:
24
- # 如果沒有任何過濾規則,接受所有網站
25
- if not domain_filter:
26
- return True
27
-
28
- # 分離排除規則和包含規則
29
- exclude_rules = [
30
- rule[1:].replace("*.", "") for rule in domain_filter if rule.startswith("-")
31
- ]
32
- include_rules = [
33
- rule.replace("*.", "") for rule in domain_filter if not rule.startswith("-")
34
- ]
35
-
36
- # 檢查是否符合任何排除規則
37
- for pattern in exclude_rules:
38
- if pattern in citation:
39
- return False
40
-
41
- # 如果沒有包含規則,且通過了排除規則檢查,就接受該網站
42
- if not include_rules:
43
- return True
44
-
45
- # 如果有包含規則,必須符合至少一個
46
- for pattern in include_rules:
47
- if pattern in citation:
48
- return True
49
-
50
- return False
51
-
52
-
53
- def is_valid_domain(domain: str) -> bool:
54
- if not domain or "*." in domain:
55
- return False
56
-
57
- # 只允許包含 ://、.、% 和英數字的網址
58
- # ^ 表示開頭,$ 表示結尾
59
- # [a-zA-Z0-9] 表示英數字
60
- # [\\.\\:\\/\\%] 表示允許的特殊字符
61
- pattern = r"^[a-zA-Z0-9\\.\\:\\/\\%]+$"
62
-
63
- return bool(re.match(pattern, domain))
64
-
65
-
66
- async def respond_with_perplexity_search_openrouter(
67
- input_content,
68
- user_prompt_prefix,
69
- messages_for_llm,
70
- domain_filter: list[str],
71
- stream: bool = False,
72
- model: str = "perplexity/sonar-small-online",
73
- structured_output: bool = False,
74
- return_images: bool = False,
75
- ) -> AsyncGenerator[PerplexitySearchEvent, None]:
76
- """
77
- 使用 OpenRouter 提供的 Perplexity API 服務
78
- structured_output: 只有在 stream 為 False 時有效
79
- """
80
- # 確保模型是 Perplexity 的模型
81
- if not model.startswith("perplexity/"):
82
- model = "perplexity/sonar-small-online"
83
-
84
- api_key = os.getenv("OPENROUTER_API_KEY")
85
- if not api_key:
86
- raise ValueError("OPENROUTER_API_KEY environment variable not set")
87
-
88
- headers = {
89
- "Authorization": f"Bearer {api_key}",
90
- "Content-Type": "application/json",
91
- "HTTP-Referer": "https://openrouter.ai/api/v1", # OpenRouter 需要提供來源
92
- "X-Title": "BotRun Flow Lang", # 可選的應用名稱
93
- }
94
-
95
- messages = deepcopy(messages_for_llm)
96
- if len(messages) > 0 and messages[-1]["role"] == "user":
97
- messages.pop()
98
- if user_prompt_prefix:
99
- xml_input_content = f"<使用者提問>{input_content}</使用者提問>"
100
- messages.append(
101
- {"role": "user", "content": user_prompt_prefix + "\n\n" + xml_input_content}
102
- )
103
- else:
104
- messages.append({"role": "user", "content": input_content})
105
-
106
- filtered_domain_filter = []
107
- for domain in domain_filter:
108
- if domain and is_valid_domain(domain):
109
- filtered_domain_filter.append(domain)
110
-
111
- payload = {
112
- "model": model,
113
- "messages": messages,
114
- "temperature": 0.5,
115
- "stream": stream,
116
- # OpenRouter 可能不支持 search_domain_filter 參數,如果有問題可以移除
117
- "search_domain_filter": filtered_domain_filter,
118
- "stream_usage": True,
119
- "return_images": return_images,
120
- # "reasoning_effort": "high",
121
- }
122
-
123
- try:
124
- input_token = 0
125
- output_token = 0
126
- async with aiohttp.ClientSession() as session:
127
- async with session.post(
128
- OPENROUTER_API_URL, headers=headers, json=payload
129
- ) as response:
130
- if response.status != 200:
131
- error_text = await response.text()
132
- raise ValueError(f"OpenRouter API error: {error_text}")
133
-
134
- if not stream:
135
- # 非串流模式的處理
136
- response_data = await response.json()
137
- content = response_data["choices"][0]["message"]["content"]
138
- content = remove_citation_number_from_content(content)
139
- if not structured_output:
140
- yield PerplexitySearchEvent(chunk=content, raw_json=response_data)
141
-
142
- # 處理引用 (如果 OpenRouter 返回引用)
143
- citations = response_data.get("citations", [])
144
- final_citations = [
145
- citation
146
- for citation in citations
147
- if should_include_citation(citation, domain_filter)
148
- ]
149
- images = response_data.get("images", [])
150
-
151
- if final_citations:
152
- references = f"\n\n參考來源:\n"
153
- for citation in final_citations:
154
- references += f"- {citation}\n"
155
- if not structured_output:
156
- yield PerplexitySearchEvent(chunk=references)
157
-
158
- if structured_output:
159
- yield PerplexitySearchEvent(
160
- chunk=json.dumps(
161
- {
162
- "content": content,
163
- "citations": final_citations,
164
- "images": images,
165
- }
166
- ),
167
- raw_json=response_data,
168
- )
169
- else:
170
- # 串流模式的處理
171
- full_response = ""
172
- final_citations = []
173
- async for line in response.content:
174
- if line:
175
- line = line.decode("utf-8").strip()
176
- if line.startswith("data: "):
177
- line = line[6:] # Remove 'data: ' prefix
178
- if line == "[DONE]":
179
- break
180
-
181
- try:
182
- chunk_data = json.loads(line)
183
- response_data = chunk_data
184
-
185
- if (
186
- chunk_data["choices"][0]
187
- .get("delta", {})
188
- .get("content")
189
- ):
190
- content = chunk_data["choices"][0]["delta"][
191
- "content"
192
- ]
193
- full_response += content
194
- yield PerplexitySearchEvent(
195
- chunk=content,
196
- raw_json=chunk_data,
197
- )
198
- if not final_citations and chunk_data.get(
199
- "citations", []
200
- ):
201
- citations = chunk_data.get("citations", [])
202
- final_citations = [
203
- citation
204
- for citation in citations
205
- if should_include_citation(
206
- citation, domain_filter
207
- )
208
- ]
209
-
210
- except json.JSONDecodeError:
211
- continue
212
-
213
- # 只在有符合條件的 citations 時才產生參考文獻
214
- if final_citations:
215
- references = f"\n\n參考來源:\n"
216
- for citation in final_citations:
217
- references += f"- {citation}\n"
218
- yield PerplexitySearchEvent(chunk=references)
219
-
220
- if response_data.get("usage"):
221
- logging.info(
222
- f"perplexity_search_openrouter============> input_token: {response_data['usage'].get('prompt_tokens', 0) + response_data['usage'].get('citation_tokens', 0)}, output_token: {response_data['usage'].get('completion_tokens', 0)}",
223
- )
224
- except Exception as e:
225
- import traceback
226
-
227
- traceback.print_exc()
228
- print(e)
229
-
230
-
231
- async def respond_with_perplexity_search(
232
- input_content,
233
- user_prompt_prefix,
234
- messages_for_llm,
235
- domain_filter: list[str],
236
- stream: bool = False,
237
- model: str = "sonar-reasoning-pro",
238
- structured_output: bool = False,
239
- return_images: bool = False,
240
- ) -> AsyncGenerator[PerplexitySearchEvent, None]:
241
- """
242
- structured_output: 只有在 stream 為 False 時有效
243
- return_images: 是否返回圖片,但是 openrouter 不支援
244
- """
245
- # 檢查是否使用 OpenRouter
246
- is_use_openrouter = os.getenv("OPENROUTER_API_KEY") and os.getenv(
247
- "OPENROUTER_BASE_URL"
248
- )
249
- if return_images:
250
- # if os.getenv("PPLX_API_KEY", "") == "":
251
- # raise ValueError(
252
- # "PPLX_API_KEY environment variable not set, return_images needs PPLX_API_KEY"
253
- # )
254
- # Openrouter 尚不支援 return_images
255
- is_use_openrouter = False
256
-
257
- if is_use_openrouter:
258
- # 若使用 OpenRouter,轉換模型名稱並呼叫 OpenRouter 版本的函數
259
- openrouter_model = "perplexity/sonar-reasoning-pro"
260
- if model == "sonar-reasoning-pro":
261
- openrouter_model = "perplexity/sonar-reasoning-pro"
262
- elif model == "sonar-reasoning":
263
- openrouter_model = "perplexity/sonar-reasoning"
264
- elif model == "sonar-pro":
265
- openrouter_model = "perplexity/sonar-pro"
266
- elif model == "sonar":
267
- openrouter_model = "perplexity/sonar"
268
-
269
- async for event in respond_with_perplexity_search_openrouter(
270
- input_content,
271
- user_prompt_prefix,
272
- messages_for_llm,
273
- domain_filter,
274
- stream,
275
- openrouter_model,
276
- structured_output,
277
- ):
278
- yield event
279
- return
280
-
281
- # 以下是原有的邏輯
282
- if model not in ["sonar-reasoning-pro", "sonar-reasoning", "sonar-pro", "sonar"]:
283
- model = "sonar-reasoning-pro"
284
- api_key = os.getenv("PPLX_API_KEY")
285
- if not api_key:
286
- raise ValueError("PPLX_API_KEY environment variable not set")
287
-
288
- headers = {
289
- "Authorization": f"Bearer {api_key}",
290
- "Content-Type": "application/json",
291
- }
292
- messages = deepcopy(messages_for_llm)
293
- if len(messages) > 0 and messages[-1]["role"] == "user":
294
- messages.pop()
295
- if user_prompt_prefix:
296
- xml_input_content = f"<使用者提問>{input_content}</使用者提問>"
297
- messages.append(
298
- {"role": "user", "content": user_prompt_prefix + "\n\n" + xml_input_content}
299
- )
300
- else:
301
- messages.append({"role": "user", "content": input_content})
302
- filtered_domain_filter = []
303
-
304
- for domain in domain_filter:
305
- if domain and is_valid_domain(domain):
306
- filtered_domain_filter.append(domain)
307
-
308
- payload = {
309
- "model": model,
310
- "messages": messages,
311
- "temperature": 0.5,
312
- "stream": stream,
313
- "search_domain_filter": filtered_domain_filter,
314
- "stream_usage": True,
315
- "return_images": return_images,
316
-
317
- }
318
- try:
319
- input_token = 0
320
- output_token = 0
321
- async with aiohttp.ClientSession() as session:
322
- async with session.post(
323
- PERPLEXITY_API_URL, headers=headers, json=payload
324
- ) as response:
325
- if response.status != 200:
326
- error_text = await response.text()
327
- raise ValueError(f"Perplexity API error: {error_text}")
328
-
329
- if not stream:
330
- # 非串流模式的處理
331
- response_data = await response.json()
332
- content = response_data["choices"][0]["message"]["content"]
333
- content = remove_citation_number_from_content(content)
334
- if not structured_output:
335
- yield PerplexitySearchEvent(chunk=content, raw_json=response_data)
336
-
337
- # 處理引用
338
- citations = response_data.get("citations", [])
339
- final_citations = [
340
- citation
341
- for citation in citations
342
- if should_include_citation(citation, domain_filter)
343
- ]
344
- images = response_data.get("images", [])
345
-
346
- if final_citations:
347
- references = f"\n\n參考來源:\n"
348
- for citation in final_citations:
349
- references += f"- {citation}\n"
350
- if not structured_output:
351
- yield PerplexitySearchEvent(chunk=references)
352
-
353
- if structured_output:
354
- yield PerplexitySearchEvent(
355
- chunk=json.dumps(
356
- {
357
- "content": content,
358
- "citations": final_citations,
359
- "images": images,
360
- }
361
- ),
362
- raw_json=response_data,
363
- )
364
-
365
- # 串流模式的處理
366
- full_response = ""
367
- final_citations = []
368
- async for line in response.content:
369
- if line:
370
- line = line.decode("utf-8").strip()
371
- if line.startswith("data: "):
372
- line = line[6:] # Remove 'data: ' prefix
373
- if line == "[DONE]":
374
- break
375
-
376
- try:
377
- chunk_data = json.loads(line)
378
- response_data = chunk_data
379
- # print(chunk_data)
380
- if (
381
- chunk_data["choices"][0]
382
- .get("delta", {})
383
- .get("content")
384
- ):
385
- content = chunk_data["choices"][0]["delta"][
386
- "content"
387
- ]
388
- full_response += content
389
- yield PerplexitySearchEvent(
390
- chunk=content,
391
- raw_json=chunk_data,
392
- )
393
- if not final_citations and chunk_data.get(
394
- "citations", []
395
- ):
396
- # 發現 perplexity 不會都有 finish_reason 為 stop 的狀況,但是 citations 會有
397
- # if chunk_data["choices"][0]["finish_reason"] == "stop":
398
- citations = chunk_data.get("citations", [])
399
- final_citations = [
400
- citation
401
- for citation in citations
402
- if should_include_citation(
403
- citation, domain_filter
404
- )
405
- ]
406
-
407
- except json.JSONDecodeError:
408
- continue
409
-
410
- # 只在有符合條件的 citations 時才產生參考文獻
411
- if final_citations:
412
- references = f"\n\n參考來源:\n"
413
- for citation in final_citations:
414
- references += f"- {citation}\n"
415
- yield PerplexitySearchEvent(chunk=references)
416
- # 安全地存取 usage 資訊,避免鍵不存在的錯誤
417
- if response_data and "usage" in response_data:
418
- usage = response_data["usage"]
419
- prompt_tokens = usage.get("prompt_tokens", 0)
420
- citation_tokens = usage.get("citation_tokens", 0)
421
- completion_tokens = usage.get("completion_tokens", 0)
422
- logging.info(
423
- f"perplexity_search============> input_token: {prompt_tokens + citation_tokens}, output_token: {completion_tokens}",
424
- )
425
- else:
426
- logging.info("perplexity_search============> usage information not available")
427
- except Exception as e:
428
- import traceback
429
-
430
- traceback.print_exc()
431
- print(e)
432
-
433
-
434
- def remove_citation_number_from_content(content: str) -> str:
435
- """
436
- 移除文字裡的 [1]、[2]、[3] 等數字
437
- """
438
- return re.sub(r"\[[0-9]+\]", "", content)
439
- # answer_message = await cl.Message(content="").send()
440
- # full_response = ""
441
- # for response in responses:
442
- # if response.candidates[0].finish_reason != Candidate.FinishReason.STOP:
443
- # # await answer_message.stream_token(response.text)
444
- # yield GeminiGroundingEvent(chunk=response.text)
445
- # full_response += response.text
446
- # if response.candidates[0].grounding_metadata:
447
- # if len(response.candidates[0].grounding_metadata.grounding_chunks) > 0:
448
- # references = f"\n\n{tr('Sources:')}\n"
449
- # for grounding_chunk in response.candidates[
450
- # 0
451
- # ].grounding_metadata.grounding_chunks:
452
- # references += f"- [{grounding_chunk.web.title}]({grounding_chunk.web.uri})\n"
453
- # # await answer_message.stream_token(references)
454
- # yield GeminiGroundingEvent(chunk=references)
455
- # else:
456
- # if response.candidates[0].grounding_metadata:
457
- # if len(response.candidates[0].grounding_metadata.grounding_chunks) > 0:
458
- # references = f"\n\n{tr('Sources:')}\n"
459
- # for grounding_chunk in response.candidates[
460
- # 0
461
- # ].grounding_metadata.grounding_chunks:
462
- # references += f"- [{grounding_chunk.web.title}]({grounding_chunk.web.uri})\n"
463
- # # await answer_message.stream_token(references)
464
- # yield GeminiGroundingEvent(chunk=references)
1
+ from copy import deepcopy
2
+ from typing import AsyncGenerator
3
+ from pydantic import BaseModel
4
+ import os
5
+ import json
6
+ import aiohttp
7
+ from dotenv import load_dotenv
8
+ import re
9
+ import logging
10
+
11
+ load_dotenv()
12
+
13
+
14
+ class PerplexitySearchEvent(BaseModel):
15
+ chunk: str
16
+ raw_json: dict | None = None
17
+
18
+
19
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
20
+ OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
21
+
22
+
23
+ def should_include_citation(citation: str, domain_filter: list[str]) -> bool:
24
+ # 如果沒有任何過濾規則,接受所有網站
25
+ if not domain_filter:
26
+ return True
27
+
28
+ # 分離排除規則和包含規則
29
+ exclude_rules = [
30
+ rule[1:].replace("*.", "") for rule in domain_filter if rule.startswith("-")
31
+ ]
32
+ include_rules = [
33
+ rule.replace("*.", "") for rule in domain_filter if not rule.startswith("-")
34
+ ]
35
+
36
+ # 檢查是否符合任何排除規則
37
+ for pattern in exclude_rules:
38
+ if pattern in citation:
39
+ return False
40
+
41
+ # 如果沒有包含規則,且通過了排除規則檢查,就接受該網站
42
+ if not include_rules:
43
+ return True
44
+
45
+ # 如果有包含規則,必須符合至少一個
46
+ for pattern in include_rules:
47
+ if pattern in citation:
48
+ return True
49
+
50
+ return False
51
+
52
+
53
+ def is_valid_domain(domain: str) -> bool:
54
+ if not domain or "*." in domain:
55
+ return False
56
+
57
+ # 只允許包含 ://、.、% 和英數字的網址
58
+ # ^ 表示開頭,$ 表示結尾
59
+ # [a-zA-Z0-9] 表示英數字
60
+ # [\\.\\:\\/\\%] 表示允許的特殊字符
61
+ pattern = r"^[a-zA-Z0-9\\.\\:\\/\\%]+$"
62
+
63
+ return bool(re.match(pattern, domain))
64
+
65
+
66
+ async def respond_with_perplexity_search_openrouter(
67
+ input_content,
68
+ user_prompt_prefix,
69
+ messages_for_llm,
70
+ domain_filter: list[str],
71
+ stream: bool = False,
72
+ model: str = "perplexity/sonar-small-online",
73
+ structured_output: bool = False,
74
+ return_images: bool = False,
75
+ ) -> AsyncGenerator[PerplexitySearchEvent, None]:
76
+ """
77
+ 使用 OpenRouter 提供的 Perplexity API 服務
78
+ structured_output: 只有在 stream 為 False 時有效
79
+ """
80
+ # 確保模型是 Perplexity 的模型
81
+ if not model.startswith("perplexity/"):
82
+ model = "perplexity/sonar-small-online"
83
+
84
+ api_key = os.getenv("OPENROUTER_API_KEY")
85
+ if not api_key:
86
+ raise ValueError("OPENROUTER_API_KEY environment variable not set")
87
+
88
+ headers = {
89
+ "Authorization": f"Bearer {api_key}",
90
+ "Content-Type": "application/json",
91
+ "HTTP-Referer": "https://openrouter.ai/api/v1", # OpenRouter 需要提供來源
92
+ "X-Title": "BotRun Flow Lang", # 可選的應用名稱
93
+ }
94
+
95
+ messages = deepcopy(messages_for_llm)
96
+ if len(messages) > 0 and messages[-1]["role"] == "user":
97
+ messages.pop()
98
+ if user_prompt_prefix:
99
+ xml_input_content = f"<使用者提問>{input_content}</使用者提問>"
100
+ messages.append(
101
+ {"role": "user", "content": user_prompt_prefix + "\n\n" + xml_input_content}
102
+ )
103
+ else:
104
+ messages.append({"role": "user", "content": input_content})
105
+
106
+ filtered_domain_filter = []
107
+ for domain in domain_filter:
108
+ if domain and is_valid_domain(domain):
109
+ filtered_domain_filter.append(domain)
110
+
111
+ payload = {
112
+ "model": model,
113
+ "messages": messages,
114
+ "temperature": 0.5,
115
+ "stream": stream,
116
+ # OpenRouter 可能不支持 search_domain_filter 參數,如果有問題可以移除
117
+ "search_domain_filter": filtered_domain_filter,
118
+ "stream_usage": True,
119
+ "return_images": return_images,
120
+ # "reasoning_effort": "high",
121
+ }
122
+
123
+ try:
124
+ input_token = 0
125
+ output_token = 0
126
+ async with aiohttp.ClientSession() as session:
127
+ async with session.post(
128
+ OPENROUTER_API_URL, headers=headers, json=payload
129
+ ) as response:
130
+ if response.status != 200:
131
+ error_text = await response.text()
132
+ raise ValueError(f"OpenRouter API error: {error_text}")
133
+
134
+ if not stream:
135
+ # 非串流模式的處理
136
+ response_data = await response.json()
137
+ content = response_data["choices"][0]["message"]["content"]
138
+ content = remove_citation_number_from_content(content)
139
+ if not structured_output:
140
+ yield PerplexitySearchEvent(chunk=content, raw_json=response_data)
141
+
142
+ # 處理引用 (如果 OpenRouter 返回引用)
143
+ citations = response_data.get("citations", [])
144
+ final_citations = [
145
+ citation
146
+ for citation in citations
147
+ if should_include_citation(citation, domain_filter)
148
+ ]
149
+ images = response_data.get("images", [])
150
+
151
+ if final_citations:
152
+ references = f"\n\n參考來源:\n"
153
+ for citation in final_citations:
154
+ references += f"- {citation}\n"
155
+ if not structured_output:
156
+ yield PerplexitySearchEvent(chunk=references)
157
+
158
+ if structured_output:
159
+ yield PerplexitySearchEvent(
160
+ chunk=json.dumps(
161
+ {
162
+ "content": content,
163
+ "citations": final_citations,
164
+ "images": images,
165
+ }
166
+ ),
167
+ raw_json=response_data,
168
+ )
169
+ else:
170
+ # 串流模式的處理
171
+ full_response = ""
172
+ final_citations = []
173
+ async for line in response.content:
174
+ if line:
175
+ line = line.decode("utf-8").strip()
176
+ if line.startswith("data: "):
177
+ line = line[6:] # Remove 'data: ' prefix
178
+ if line == "[DONE]":
179
+ break
180
+
181
+ try:
182
+ chunk_data = json.loads(line)
183
+ response_data = chunk_data
184
+
185
+ if (
186
+ chunk_data["choices"][0]
187
+ .get("delta", {})
188
+ .get("content")
189
+ ):
190
+ content = chunk_data["choices"][0]["delta"][
191
+ "content"
192
+ ]
193
+ full_response += content
194
+ yield PerplexitySearchEvent(
195
+ chunk=content,
196
+ raw_json=chunk_data,
197
+ )
198
+ if not final_citations and chunk_data.get(
199
+ "citations", []
200
+ ):
201
+ citations = chunk_data.get("citations", [])
202
+ final_citations = [
203
+ citation
204
+ for citation in citations
205
+ if should_include_citation(
206
+ citation, domain_filter
207
+ )
208
+ ]
209
+
210
+ except json.JSONDecodeError:
211
+ continue
212
+
213
+ # 只在有符合條件的 citations 時才產生參考文獻
214
+ if final_citations:
215
+ references = f"\n\n參考來源:\n"
216
+ for citation in final_citations:
217
+ references += f"- {citation}\n"
218
+ yield PerplexitySearchEvent(chunk=references)
219
+
220
+ if response_data.get("usage"):
221
+ logging.info(
222
+ f"perplexity_search_openrouter============> input_token: {response_data['usage'].get('prompt_tokens', 0) + response_data['usage'].get('citation_tokens', 0)}, output_token: {response_data['usage'].get('completion_tokens', 0)}",
223
+ )
224
+ except Exception as e:
225
+ import traceback
226
+
227
+ traceback.print_exc()
228
+ print(e)
229
+
230
+
231
+ async def respond_with_perplexity_search(
232
+ input_content,
233
+ user_prompt_prefix,
234
+ messages_for_llm,
235
+ domain_filter: list[str],
236
+ stream: bool = False,
237
+ model: str = "sonar-reasoning-pro",
238
+ structured_output: bool = False,
239
+ return_images: bool = False,
240
+ ) -> AsyncGenerator[PerplexitySearchEvent, None]:
241
+ """
242
+ structured_output: 只有在 stream 為 False 時有效
243
+ return_images: 是否返回圖片,但是 openrouter 不支援
244
+ """
245
+ # 檢查是否使用 OpenRouter
246
+ is_use_openrouter = os.getenv("OPENROUTER_API_KEY") and os.getenv(
247
+ "OPENROUTER_BASE_URL"
248
+ )
249
+ if return_images:
250
+ # if os.getenv("PPLX_API_KEY", "") == "":
251
+ # raise ValueError(
252
+ # "PPLX_API_KEY environment variable not set, return_images needs PPLX_API_KEY"
253
+ # )
254
+ # Openrouter 尚不支援 return_images
255
+ is_use_openrouter = False
256
+
257
+ if is_use_openrouter:
258
+ # 若使用 OpenRouter,轉換模型名稱並呼叫 OpenRouter 版本的函數
259
+ openrouter_model = "perplexity/sonar-reasoning-pro"
260
+ if model == "sonar-reasoning-pro":
261
+ openrouter_model = "perplexity/sonar-reasoning-pro"
262
+ elif model == "sonar-reasoning":
263
+ openrouter_model = "perplexity/sonar-reasoning"
264
+ elif model == "sonar-pro":
265
+ openrouter_model = "perplexity/sonar-pro"
266
+ elif model == "sonar":
267
+ openrouter_model = "perplexity/sonar"
268
+
269
+ async for event in respond_with_perplexity_search_openrouter(
270
+ input_content,
271
+ user_prompt_prefix,
272
+ messages_for_llm,
273
+ domain_filter,
274
+ stream,
275
+ openrouter_model,
276
+ structured_output,
277
+ ):
278
+ yield event
279
+ return
280
+
281
+ # 以下是原有的邏輯
282
+ if model not in ["sonar-reasoning-pro", "sonar-reasoning", "sonar-pro", "sonar"]:
283
+ model = "sonar-reasoning-pro"
284
+ api_key = os.getenv("PPLX_API_KEY")
285
+ if not api_key:
286
+ raise ValueError("PPLX_API_KEY environment variable not set")
287
+
288
+ headers = {
289
+ "Authorization": f"Bearer {api_key}",
290
+ "Content-Type": "application/json",
291
+ }
292
+ messages = deepcopy(messages_for_llm)
293
+ if len(messages) > 0 and messages[-1]["role"] == "user":
294
+ messages.pop()
295
+ if user_prompt_prefix:
296
+ xml_input_content = f"<使用者提問>{input_content}</使用者提問>"
297
+ messages.append(
298
+ {"role": "user", "content": user_prompt_prefix + "\n\n" + xml_input_content}
299
+ )
300
+ else:
301
+ messages.append({"role": "user", "content": input_content})
302
+ filtered_domain_filter = []
303
+
304
+ for domain in domain_filter:
305
+ if domain and is_valid_domain(domain):
306
+ filtered_domain_filter.append(domain)
307
+
308
+ payload = {
309
+ "model": model,
310
+ "messages": messages,
311
+ "temperature": 0.5,
312
+ "stream": stream,
313
+ "search_domain_filter": filtered_domain_filter,
314
+ "stream_usage": True,
315
+ "return_images": return_images,
316
+
317
+ }
318
+ try:
319
+ input_token = 0
320
+ output_token = 0
321
+ async with aiohttp.ClientSession() as session:
322
+ async with session.post(
323
+ PERPLEXITY_API_URL, headers=headers, json=payload
324
+ ) as response:
325
+ if response.status != 200:
326
+ error_text = await response.text()
327
+ raise ValueError(f"Perplexity API error: {error_text}")
328
+
329
+ if not stream:
330
+ # 非串流模式的處理
331
+ response_data = await response.json()
332
+ content = response_data["choices"][0]["message"]["content"]
333
+ content = remove_citation_number_from_content(content)
334
+ if not structured_output:
335
+ yield PerplexitySearchEvent(chunk=content, raw_json=response_data)
336
+
337
+ # 處理引用
338
+ citations = response_data.get("citations", [])
339
+ final_citations = [
340
+ citation
341
+ for citation in citations
342
+ if should_include_citation(citation, domain_filter)
343
+ ]
344
+ images = response_data.get("images", [])
345
+
346
+ if final_citations:
347
+ references = f"\n\n參考來源:\n"
348
+ for citation in final_citations:
349
+ references += f"- {citation}\n"
350
+ if not structured_output:
351
+ yield PerplexitySearchEvent(chunk=references)
352
+
353
+ if structured_output:
354
+ yield PerplexitySearchEvent(
355
+ chunk=json.dumps(
356
+ {
357
+ "content": content,
358
+ "citations": final_citations,
359
+ "images": images,
360
+ }
361
+ ),
362
+ raw_json=response_data,
363
+ )
364
+
365
+ # 串流模式的處理
366
+ full_response = ""
367
+ final_citations = []
368
+ async for line in response.content:
369
+ if line:
370
+ line = line.decode("utf-8").strip()
371
+ if line.startswith("data: "):
372
+ line = line[6:] # Remove 'data: ' prefix
373
+ if line == "[DONE]":
374
+ break
375
+
376
+ try:
377
+ chunk_data = json.loads(line)
378
+ response_data = chunk_data
379
+ # print(chunk_data)
380
+ if (
381
+ chunk_data["choices"][0]
382
+ .get("delta", {})
383
+ .get("content")
384
+ ):
385
+ content = chunk_data["choices"][0]["delta"][
386
+ "content"
387
+ ]
388
+ full_response += content
389
+ yield PerplexitySearchEvent(
390
+ chunk=content,
391
+ raw_json=chunk_data,
392
+ )
393
+ if not final_citations and chunk_data.get(
394
+ "citations", []
395
+ ):
396
+ # 發現 perplexity 不會都有 finish_reason 為 stop 的狀況,但是 citations 會有
397
+ # if chunk_data["choices"][0]["finish_reason"] == "stop":
398
+ citations = chunk_data.get("citations", [])
399
+ final_citations = [
400
+ citation
401
+ for citation in citations
402
+ if should_include_citation(
403
+ citation, domain_filter
404
+ )
405
+ ]
406
+
407
+ except json.JSONDecodeError:
408
+ continue
409
+
410
+ # 只在有符合條件的 citations 時才產生參考文獻
411
+ if final_citations:
412
+ references = f"\n\n參考來源:\n"
413
+ for citation in final_citations:
414
+ references += f"- {citation}\n"
415
+ yield PerplexitySearchEvent(chunk=references)
416
+ # 安全地存取 usage 資訊,避免鍵不存在的錯誤
417
+ if response_data and "usage" in response_data:
418
+ usage = response_data["usage"]
419
+ prompt_tokens = usage.get("prompt_tokens", 0)
420
+ citation_tokens = usage.get("citation_tokens", 0)
421
+ completion_tokens = usage.get("completion_tokens", 0)
422
+ logging.info(
423
+ f"perplexity_search============> input_token: {prompt_tokens + citation_tokens}, output_token: {completion_tokens}",
424
+ )
425
+ else:
426
+ logging.info("perplexity_search============> usage information not available")
427
+ except Exception as e:
428
+ import traceback
429
+
430
+ traceback.print_exc()
431
+ print(e)
432
+
433
+
434
+ def remove_citation_number_from_content(content: str) -> str:
435
+ """
436
+ 移除文字裡的 [1]、[2]、[3] 等數字
437
+ """
438
+ return re.sub(r"\[[0-9]+\]", "", content)
439
+ # answer_message = await cl.Message(content="").send()
440
+ # full_response = ""
441
+ # for response in responses:
442
+ # if response.candidates[0].finish_reason != Candidate.FinishReason.STOP:
443
+ # # await answer_message.stream_token(response.text)
444
+ # yield GeminiGroundingEvent(chunk=response.text)
445
+ # full_response += response.text
446
+ # if response.candidates[0].grounding_metadata:
447
+ # if len(response.candidates[0].grounding_metadata.grounding_chunks) > 0:
448
+ # references = f"\n\n{tr('Sources:')}\n"
449
+ # for grounding_chunk in response.candidates[
450
+ # 0
451
+ # ].grounding_metadata.grounding_chunks:
452
+ # references += f"- [{grounding_chunk.web.title}]({grounding_chunk.web.uri})\n"
453
+ # # await answer_message.stream_token(references)
454
+ # yield GeminiGroundingEvent(chunk=references)
455
+ # else:
456
+ # if response.candidates[0].grounding_metadata:
457
+ # if len(response.candidates[0].grounding_metadata.grounding_chunks) > 0:
458
+ # references = f"\n\n{tr('Sources:')}\n"
459
+ # for grounding_chunk in response.candidates[
460
+ # 0
461
+ # ].grounding_metadata.grounding_chunks:
462
+ # references += f"- [{grounding_chunk.web.title}]({grounding_chunk.web.uri})\n"
463
+ # # await answer_message.stream_token(references)
464
+ # yield GeminiGroundingEvent(chunk=references)