botrun-flow-lang 5.12.263__py3-none-any.whl → 5.12.264__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. botrun_flow_lang/api/auth_api.py +39 -39
  2. botrun_flow_lang/api/auth_utils.py +183 -183
  3. botrun_flow_lang/api/botrun_back_api.py +65 -65
  4. botrun_flow_lang/api/flow_api.py +3 -3
  5. botrun_flow_lang/api/hatch_api.py +508 -508
  6. botrun_flow_lang/api/langgraph_api.py +811 -811
  7. botrun_flow_lang/api/line_bot_api.py +1484 -1484
  8. botrun_flow_lang/api/model_api.py +300 -300
  9. botrun_flow_lang/api/rate_limit_api.py +32 -32
  10. botrun_flow_lang/api/routes.py +79 -79
  11. botrun_flow_lang/api/search_api.py +53 -53
  12. botrun_flow_lang/api/storage_api.py +395 -395
  13. botrun_flow_lang/api/subsidy_api.py +290 -290
  14. botrun_flow_lang/api/subsidy_api_system_prompt.txt +109 -109
  15. botrun_flow_lang/api/user_setting_api.py +70 -70
  16. botrun_flow_lang/api/version_api.py +31 -31
  17. botrun_flow_lang/api/youtube_api.py +26 -26
  18. botrun_flow_lang/constants.py +13 -13
  19. botrun_flow_lang/langgraph_agents/agents/agent_runner.py +178 -178
  20. botrun_flow_lang/langgraph_agents/agents/agent_tools/step_planner.py +77 -77
  21. botrun_flow_lang/langgraph_agents/agents/checkpointer/firestore_checkpointer.py +666 -666
  22. botrun_flow_lang/langgraph_agents/agents/gov_researcher/GOV_RESEARCHER_PRD.md +192 -192
  23. botrun_flow_lang/langgraph_agents/agents/gov_researcher/gemini_subsidy_graph.py +460 -460
  24. botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_2_graph.py +1002 -1002
  25. botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_graph.py +822 -822
  26. botrun_flow_lang/langgraph_agents/agents/langgraph_react_agent.py +723 -723
  27. botrun_flow_lang/langgraph_agents/agents/search_agent_graph.py +864 -864
  28. botrun_flow_lang/langgraph_agents/agents/tools/__init__.py +4 -4
  29. botrun_flow_lang/langgraph_agents/agents/tools/gemini_code_execution.py +376 -376
  30. botrun_flow_lang/langgraph_agents/agents/util/gemini_grounding.py +66 -66
  31. botrun_flow_lang/langgraph_agents/agents/util/html_util.py +316 -316
  32. botrun_flow_lang/langgraph_agents/agents/util/img_util.py +294 -294
  33. botrun_flow_lang/langgraph_agents/agents/util/local_files.py +419 -419
  34. botrun_flow_lang/langgraph_agents/agents/util/mermaid_util.py +86 -86
  35. botrun_flow_lang/langgraph_agents/agents/util/model_utils.py +143 -143
  36. botrun_flow_lang/langgraph_agents/agents/util/pdf_analyzer.py +486 -486
  37. botrun_flow_lang/langgraph_agents/agents/util/pdf_cache.py +250 -250
  38. botrun_flow_lang/langgraph_agents/agents/util/pdf_processor.py +204 -204
  39. botrun_flow_lang/langgraph_agents/agents/util/perplexity_search.py +464 -464
  40. botrun_flow_lang/langgraph_agents/agents/util/plotly_util.py +59 -59
  41. botrun_flow_lang/langgraph_agents/agents/util/tavily_search.py +199 -199
  42. botrun_flow_lang/langgraph_agents/agents/util/youtube_util.py +90 -90
  43. botrun_flow_lang/langgraph_agents/cache/langgraph_botrun_cache.py +197 -197
  44. botrun_flow_lang/llm_agent/llm_agent.py +19 -19
  45. botrun_flow_lang/llm_agent/llm_agent_util.py +83 -83
  46. botrun_flow_lang/log/.gitignore +2 -2
  47. botrun_flow_lang/main.py +61 -61
  48. botrun_flow_lang/main_fast.py +51 -51
  49. botrun_flow_lang/mcp_server/__init__.py +10 -10
  50. botrun_flow_lang/mcp_server/default_mcp.py +744 -744
  51. botrun_flow_lang/models/nodes/utils.py +205 -205
  52. botrun_flow_lang/models/token_usage.py +34 -34
  53. botrun_flow_lang/requirements.txt +21 -21
  54. botrun_flow_lang/services/base/firestore_base.py +30 -30
  55. botrun_flow_lang/services/hatch/hatch_factory.py +11 -11
  56. botrun_flow_lang/services/hatch/hatch_fs_store.py +419 -419
  57. botrun_flow_lang/services/storage/storage_cs_store.py +206 -206
  58. botrun_flow_lang/services/storage/storage_factory.py +12 -12
  59. botrun_flow_lang/services/storage/storage_store.py +65 -65
  60. botrun_flow_lang/services/user_setting/user_setting_factory.py +9 -9
  61. botrun_flow_lang/services/user_setting/user_setting_fs_store.py +66 -66
  62. botrun_flow_lang/static/docs/tools/index.html +926 -926
  63. botrun_flow_lang/tests/api_functional_tests.py +1525 -1525
  64. botrun_flow_lang/tests/api_stress_test.py +357 -357
  65. botrun_flow_lang/tests/shared_hatch_tests.py +333 -333
  66. botrun_flow_lang/tests/test_botrun_app.py +46 -46
  67. botrun_flow_lang/tests/test_html_util.py +31 -31
  68. botrun_flow_lang/tests/test_img_analyzer.py +190 -190
  69. botrun_flow_lang/tests/test_img_util.py +39 -39
  70. botrun_flow_lang/tests/test_local_files.py +114 -114
  71. botrun_flow_lang/tests/test_mermaid_util.py +103 -103
  72. botrun_flow_lang/tests/test_pdf_analyzer.py +104 -104
  73. botrun_flow_lang/tests/test_plotly_util.py +151 -151
  74. botrun_flow_lang/tests/test_run_workflow_engine.py +65 -65
  75. botrun_flow_lang/tools/generate_docs.py +133 -133
  76. botrun_flow_lang/tools/templates/tools.html +153 -153
  77. botrun_flow_lang/utils/__init__.py +7 -7
  78. botrun_flow_lang/utils/botrun_logger.py +344 -344
  79. botrun_flow_lang/utils/clients/rate_limit_client.py +209 -209
  80. botrun_flow_lang/utils/clients/token_verify_client.py +153 -153
  81. botrun_flow_lang/utils/google_drive_utils.py +654 -654
  82. botrun_flow_lang/utils/langchain_utils.py +324 -324
  83. botrun_flow_lang/utils/yaml_utils.py +9 -9
  84. {botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-5.12.264.dist-info}/METADATA +1 -1
  85. botrun_flow_lang-5.12.264.dist-info/RECORD +102 -0
  86. botrun_flow_lang-5.12.263.dist-info/RECORD +0 -102
  87. {botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-5.12.264.dist-info}/WHEEL +0 -0
@@ -1,204 +1,204 @@
1
- """
2
- PDF 處理工具模組
3
-
4
- 提供 PDF 切割等功能,用於處理大型 PDF 檔案。
5
- 使用 pypdf(純 Python)實作,避免 C++ 庫的 segfault 問題。
6
- """
7
-
8
- import io
9
- from typing import List, Tuple
10
-
11
- from pypdf import PdfReader, PdfWriter
12
-
13
-
14
- def get_pdf_size(pdf_content: bytes) -> int:
15
- """
16
- 取得 PDF 檔案大小(bytes)
17
-
18
- Args:
19
- pdf_content: PDF 檔案的二進位內容
20
-
21
- Returns:
22
- int: 檔案大小(bytes)
23
- """
24
- return len(pdf_content)
25
-
26
-
27
- def get_pdf_size_mb(pdf_content: bytes) -> float:
28
- """
29
- 取得 PDF 檔案大小(MB)
30
-
31
- Args:
32
- pdf_content: PDF 檔案的二進位內容
33
-
34
- Returns:
35
- float: 檔案大小(MB)
36
- """
37
- return len(pdf_content) / (1024 * 1024)
38
-
39
-
40
- def get_pdf_page_count(pdf_content: bytes) -> int:
41
- """
42
- 取得 PDF 總頁數
43
-
44
- Args:
45
- pdf_content: PDF 檔案的二進位內容
46
-
47
- Returns:
48
- int: 總頁數
49
- """
50
- try:
51
- reader = PdfReader(io.BytesIO(pdf_content))
52
- return len(reader.pages)
53
- except Exception as e:
54
- print(f"[get_pdf_page_count] 無法讀取 PDF 頁數: {e}")
55
- return 0
56
-
57
-
58
- def split_pdf_by_pages(
59
- pdf_content: bytes, pages_per_chunk: int = 15
60
- ) -> List[Tuple[bytes, str]]:
61
- """
62
- 按頁數切割 PDF
63
-
64
- Args:
65
- pdf_content: PDF 檔案的二進位內容
66
- pages_per_chunk: 每個切片的頁數(預設 15 頁)
67
-
68
- Returns:
69
- List[Tuple[bytes, str]]: 切片清單,每個元素為 (切片內容, 頁碼範圍字串)
70
- 例如: [(chunk_bytes, "page-001-015"), (chunk_bytes, "page-016-030"), ...]
71
- """
72
- chunks = []
73
-
74
- try:
75
- reader = PdfReader(io.BytesIO(pdf_content))
76
- total_pages = len(reader.pages)
77
-
78
- for start_idx in range(0, total_pages, pages_per_chunk):
79
- end_idx = min(start_idx + pages_per_chunk, total_pages)
80
-
81
- # 建立新的 PDF 並複製頁面
82
- writer = PdfWriter()
83
- for page_idx in range(start_idx, end_idx):
84
- writer.add_page(reader.pages[page_idx])
85
-
86
- # 輸出切片
87
- output = io.BytesIO()
88
- writer.write(output)
89
- chunk_bytes = output.getvalue()
90
-
91
- # 產生頁碼範圍字串(1-indexed)
92
- page_range = f"page-{start_idx + 1:03d}-{end_idx:03d}"
93
-
94
- chunks.append((chunk_bytes, page_range))
95
-
96
- except Exception as e:
97
- print(f"[split_pdf_by_pages] 切割 PDF 時發生錯誤: {e}")
98
- # 如果切割失敗,回傳整個 PDF 作為單一切片
99
- if pdf_content:
100
- chunks.append((pdf_content, "page-001-all"))
101
-
102
- return chunks
103
-
104
-
105
- def calculate_optimal_chunk_size(
106
- pdf_content: bytes,
107
- target_size_mb: float = 4.0,
108
- min_pages: int = 5,
109
- max_pages: int = 30,
110
- ) -> int:
111
- """
112
- 計算最佳切割頁數,確保每個切片小於目標大小
113
-
114
- 策略:
115
- 1. 先估算每頁平均大小
116
- 2. 計算達到目標大小需要的頁數
117
- 3. 限制在 min_pages 和 max_pages 之間
118
-
119
- Args:
120
- pdf_content: PDF 檔案的二進位內容
121
- target_size_mb: 目標切片大小(MB),預設 4MB
122
- min_pages: 最小頁數,預設 5 頁
123
- max_pages: 最大頁數,預設 30 頁
124
-
125
- Returns:
126
- int: 建議的每個切片頁數
127
- """
128
- total_size_mb = get_pdf_size_mb(pdf_content)
129
- total_pages = get_pdf_page_count(pdf_content)
130
-
131
- if total_pages == 0:
132
- return min_pages
133
-
134
- # 估算每頁平均大小
135
- avg_page_size_mb = total_size_mb / total_pages
136
-
137
- # 計算達到目標大小需要的頁數
138
- if avg_page_size_mb > 0:
139
- optimal_pages = int(target_size_mb / avg_page_size_mb)
140
- else:
141
- optimal_pages = max_pages
142
-
143
- # 限制在範圍內
144
- optimal_pages = max(min_pages, min(optimal_pages, max_pages))
145
-
146
- return optimal_pages
147
-
148
-
149
- def split_pdf_smart(
150
- pdf_content: bytes, target_size_mb: float = 4.0
151
- ) -> List[Tuple[bytes, str]]:
152
- """
153
- 智慧切割 PDF
154
-
155
- 先計算最佳切割頁數,然後進行切割。
156
- 如果切割後某個切片仍超過目標大小,會進一步分割。
157
-
158
- Args:
159
- pdf_content: PDF 檔案的二進位內容
160
- target_size_mb: 目標切片大小(MB),預設 4MB
161
-
162
- Returns:
163
- List[Tuple[bytes, str]]: 切片清單,每個元素為 (切片內容, 頁碼範圍字串)
164
- """
165
- # 計算最佳切割頁數
166
- pages_per_chunk = calculate_optimal_chunk_size(pdf_content, target_size_mb)
167
- print(f"[split_pdf_smart] 計算最佳切割頁數: {pages_per_chunk} 頁/切片")
168
-
169
- # 進行初步切割
170
- chunks = split_pdf_by_pages(pdf_content, pages_per_chunk)
171
-
172
- # 檢查是否有切片超過目標大小,如果有則進一步分割
173
- final_chunks = []
174
- for chunk_bytes, page_range in chunks:
175
- chunk_size_mb = get_pdf_size_mb(chunk_bytes)
176
-
177
- if chunk_size_mb > target_size_mb and pages_per_chunk > 5:
178
- # 這個切片太大,需要進一步分割
179
- print(
180
- f"[split_pdf_smart] 切片 {page_range} 大小 {chunk_size_mb:.2f}MB "
181
- f"超過目標 {target_size_mb}MB,進一步分割"
182
- )
183
-
184
- # 取得這個切片的頁碼範圍
185
- parts = page_range.replace("page-", "").split("-")
186
- start_page = int(parts[0])
187
-
188
- # 用更小的頁數重新切割
189
- smaller_chunks = split_pdf_by_pages(chunk_bytes, pages_per_chunk // 2)
190
-
191
- # 更新頁碼範圍
192
- chunk_page_count = get_pdf_page_count(chunk_bytes)
193
- for i, (sub_chunk, _) in enumerate(smaller_chunks):
194
- sub_start = start_page + i * (pages_per_chunk // 2)
195
- sub_end = min(
196
- sub_start + (pages_per_chunk // 2) - 1,
197
- start_page + chunk_page_count - 1,
198
- )
199
- sub_range = f"page-{sub_start:03d}-{sub_end:03d}"
200
- final_chunks.append((sub_chunk, sub_range))
201
- else:
202
- final_chunks.append((chunk_bytes, page_range))
203
-
204
- return final_chunks
1
+ """
2
+ PDF 處理工具模組
3
+
4
+ 提供 PDF 切割等功能,用於處理大型 PDF 檔案。
5
+ 使用 pypdf(純 Python)實作,避免 C++ 庫的 segfault 問題。
6
+ """
7
+
8
+ import io
9
+ from typing import List, Tuple
10
+
11
+ from pypdf import PdfReader, PdfWriter
12
+
13
+
14
+ def get_pdf_size(pdf_content: bytes) -> int:
15
+ """
16
+ 取得 PDF 檔案大小(bytes)
17
+
18
+ Args:
19
+ pdf_content: PDF 檔案的二進位內容
20
+
21
+ Returns:
22
+ int: 檔案大小(bytes)
23
+ """
24
+ return len(pdf_content)
25
+
26
+
27
+ def get_pdf_size_mb(pdf_content: bytes) -> float:
28
+ """
29
+ 取得 PDF 檔案大小(MB)
30
+
31
+ Args:
32
+ pdf_content: PDF 檔案的二進位內容
33
+
34
+ Returns:
35
+ float: 檔案大小(MB)
36
+ """
37
+ return len(pdf_content) / (1024 * 1024)
38
+
39
+
40
+ def get_pdf_page_count(pdf_content: bytes) -> int:
41
+ """
42
+ 取得 PDF 總頁數
43
+
44
+ Args:
45
+ pdf_content: PDF 檔案的二進位內容
46
+
47
+ Returns:
48
+ int: 總頁數
49
+ """
50
+ try:
51
+ reader = PdfReader(io.BytesIO(pdf_content))
52
+ return len(reader.pages)
53
+ except Exception as e:
54
+ print(f"[get_pdf_page_count] 無法讀取 PDF 頁數: {e}")
55
+ return 0
56
+
57
+
58
+ def split_pdf_by_pages(
59
+ pdf_content: bytes, pages_per_chunk: int = 15
60
+ ) -> List[Tuple[bytes, str]]:
61
+ """
62
+ 按頁數切割 PDF
63
+
64
+ Args:
65
+ pdf_content: PDF 檔案的二進位內容
66
+ pages_per_chunk: 每個切片的頁數(預設 15 頁)
67
+
68
+ Returns:
69
+ List[Tuple[bytes, str]]: 切片清單,每個元素為 (切片內容, 頁碼範圍字串)
70
+ 例如: [(chunk_bytes, "page-001-015"), (chunk_bytes, "page-016-030"), ...]
71
+ """
72
+ chunks = []
73
+
74
+ try:
75
+ reader = PdfReader(io.BytesIO(pdf_content))
76
+ total_pages = len(reader.pages)
77
+
78
+ for start_idx in range(0, total_pages, pages_per_chunk):
79
+ end_idx = min(start_idx + pages_per_chunk, total_pages)
80
+
81
+ # 建立新的 PDF 並複製頁面
82
+ writer = PdfWriter()
83
+ for page_idx in range(start_idx, end_idx):
84
+ writer.add_page(reader.pages[page_idx])
85
+
86
+ # 輸出切片
87
+ output = io.BytesIO()
88
+ writer.write(output)
89
+ chunk_bytes = output.getvalue()
90
+
91
+ # 產生頁碼範圍字串(1-indexed)
92
+ page_range = f"page-{start_idx + 1:03d}-{end_idx:03d}"
93
+
94
+ chunks.append((chunk_bytes, page_range))
95
+
96
+ except Exception as e:
97
+ print(f"[split_pdf_by_pages] 切割 PDF 時發生錯誤: {e}")
98
+ # 如果切割失敗,回傳整個 PDF 作為單一切片
99
+ if pdf_content:
100
+ chunks.append((pdf_content, "page-001-all"))
101
+
102
+ return chunks
103
+
104
+
105
+ def calculate_optimal_chunk_size(
106
+ pdf_content: bytes,
107
+ target_size_mb: float = 4.0,
108
+ min_pages: int = 5,
109
+ max_pages: int = 30,
110
+ ) -> int:
111
+ """
112
+ 計算最佳切割頁數,確保每個切片小於目標大小
113
+
114
+ 策略:
115
+ 1. 先估算每頁平均大小
116
+ 2. 計算達到目標大小需要的頁數
117
+ 3. 限制在 min_pages 和 max_pages 之間
118
+
119
+ Args:
120
+ pdf_content: PDF 檔案的二進位內容
121
+ target_size_mb: 目標切片大小(MB),預設 4MB
122
+ min_pages: 最小頁數,預設 5 頁
123
+ max_pages: 最大頁數,預設 30 頁
124
+
125
+ Returns:
126
+ int: 建議的每個切片頁數
127
+ """
128
+ total_size_mb = get_pdf_size_mb(pdf_content)
129
+ total_pages = get_pdf_page_count(pdf_content)
130
+
131
+ if total_pages == 0:
132
+ return min_pages
133
+
134
+ # 估算每頁平均大小
135
+ avg_page_size_mb = total_size_mb / total_pages
136
+
137
+ # 計算達到目標大小需要的頁數
138
+ if avg_page_size_mb > 0:
139
+ optimal_pages = int(target_size_mb / avg_page_size_mb)
140
+ else:
141
+ optimal_pages = max_pages
142
+
143
+ # 限制在範圍內
144
+ optimal_pages = max(min_pages, min(optimal_pages, max_pages))
145
+
146
+ return optimal_pages
147
+
148
+
149
+ def split_pdf_smart(
150
+ pdf_content: bytes, target_size_mb: float = 4.0
151
+ ) -> List[Tuple[bytes, str]]:
152
+ """
153
+ 智慧切割 PDF
154
+
155
+ 先計算最佳切割頁數,然後進行切割。
156
+ 如果切割後某個切片仍超過目標大小,會進一步分割。
157
+
158
+ Args:
159
+ pdf_content: PDF 檔案的二進位內容
160
+ target_size_mb: 目標切片大小(MB),預設 4MB
161
+
162
+ Returns:
163
+ List[Tuple[bytes, str]]: 切片清單,每個元素為 (切片內容, 頁碼範圍字串)
164
+ """
165
+ # 計算最佳切割頁數
166
+ pages_per_chunk = calculate_optimal_chunk_size(pdf_content, target_size_mb)
167
+ print(f"[split_pdf_smart] 計算最佳切割頁數: {pages_per_chunk} 頁/切片")
168
+
169
+ # 進行初步切割
170
+ chunks = split_pdf_by_pages(pdf_content, pages_per_chunk)
171
+
172
+ # 檢查是否有切片超過目標大小,如果有則進一步分割
173
+ final_chunks = []
174
+ for chunk_bytes, page_range in chunks:
175
+ chunk_size_mb = get_pdf_size_mb(chunk_bytes)
176
+
177
+ if chunk_size_mb > target_size_mb and pages_per_chunk > 5:
178
+ # 這個切片太大,需要進一步分割
179
+ print(
180
+ f"[split_pdf_smart] 切片 {page_range} 大小 {chunk_size_mb:.2f}MB "
181
+ f"超過目標 {target_size_mb}MB,進一步分割"
182
+ )
183
+
184
+ # 取得這個切片的頁碼範圍
185
+ parts = page_range.replace("page-", "").split("-")
186
+ start_page = int(parts[0])
187
+
188
+ # 用更小的頁數重新切割
189
+ smaller_chunks = split_pdf_by_pages(chunk_bytes, pages_per_chunk // 2)
190
+
191
+ # 更新頁碼範圍
192
+ chunk_page_count = get_pdf_page_count(chunk_bytes)
193
+ for i, (sub_chunk, _) in enumerate(smaller_chunks):
194
+ sub_start = start_page + i * (pages_per_chunk // 2)
195
+ sub_end = min(
196
+ sub_start + (pages_per_chunk // 2) - 1,
197
+ start_page + chunk_page_count - 1,
198
+ )
199
+ sub_range = f"page-{sub_start:03d}-{sub_end:03d}"
200
+ final_chunks.append((sub_chunk, sub_range))
201
+ else:
202
+ final_chunks.append((chunk_bytes, page_range))
203
+
204
+ return final_chunks