botrun-flow-lang 5.12.263__py3-none-any.whl → 6.2.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. botrun_flow_lang/api/auth_api.py +39 -39
  2. botrun_flow_lang/api/auth_utils.py +183 -183
  3. botrun_flow_lang/api/botrun_back_api.py +65 -65
  4. botrun_flow_lang/api/flow_api.py +3 -3
  5. botrun_flow_lang/api/hatch_api.py +508 -508
  6. botrun_flow_lang/api/langgraph_api.py +816 -811
  7. botrun_flow_lang/api/langgraph_constants.py +11 -0
  8. botrun_flow_lang/api/line_bot_api.py +1484 -1484
  9. botrun_flow_lang/api/model_api.py +300 -300
  10. botrun_flow_lang/api/rate_limit_api.py +32 -32
  11. botrun_flow_lang/api/routes.py +79 -79
  12. botrun_flow_lang/api/search_api.py +53 -53
  13. botrun_flow_lang/api/storage_api.py +395 -395
  14. botrun_flow_lang/api/subsidy_api.py +290 -290
  15. botrun_flow_lang/api/subsidy_api_system_prompt.txt +109 -109
  16. botrun_flow_lang/api/user_setting_api.py +70 -70
  17. botrun_flow_lang/api/version_api.py +31 -31
  18. botrun_flow_lang/api/youtube_api.py +26 -26
  19. botrun_flow_lang/constants.py +13 -13
  20. botrun_flow_lang/langgraph_agents/agents/agent_runner.py +178 -178
  21. botrun_flow_lang/langgraph_agents/agents/agent_tools/step_planner.py +77 -77
  22. botrun_flow_lang/langgraph_agents/agents/checkpointer/firestore_checkpointer.py +666 -666
  23. botrun_flow_lang/langgraph_agents/agents/gov_researcher/GOV_RESEARCHER_PRD.md +192 -192
  24. botrun_flow_lang/langgraph_agents/agents/gov_researcher/gemini_subsidy_graph.py +460 -460
  25. botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_2_graph.py +1002 -1002
  26. botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_graph.py +822 -822
  27. botrun_flow_lang/langgraph_agents/agents/langgraph_react_agent.py +730 -723
  28. botrun_flow_lang/langgraph_agents/agents/search_agent_graph.py +864 -864
  29. botrun_flow_lang/langgraph_agents/agents/tools/__init__.py +4 -4
  30. botrun_flow_lang/langgraph_agents/agents/tools/gemini_code_execution.py +376 -376
  31. botrun_flow_lang/langgraph_agents/agents/util/gemini_grounding.py +66 -66
  32. botrun_flow_lang/langgraph_agents/agents/util/html_util.py +316 -316
  33. botrun_flow_lang/langgraph_agents/agents/util/img_util.py +336 -294
  34. botrun_flow_lang/langgraph_agents/agents/util/local_files.py +419 -419
  35. botrun_flow_lang/langgraph_agents/agents/util/mermaid_util.py +86 -86
  36. botrun_flow_lang/langgraph_agents/agents/util/model_utils.py +143 -143
  37. botrun_flow_lang/langgraph_agents/agents/util/pdf_analyzer.py +562 -486
  38. botrun_flow_lang/langgraph_agents/agents/util/pdf_cache.py +250 -250
  39. botrun_flow_lang/langgraph_agents/agents/util/pdf_processor.py +204 -204
  40. botrun_flow_lang/langgraph_agents/agents/util/perplexity_search.py +464 -464
  41. botrun_flow_lang/langgraph_agents/agents/util/plotly_util.py +59 -59
  42. botrun_flow_lang/langgraph_agents/agents/util/tavily_search.py +199 -199
  43. botrun_flow_lang/langgraph_agents/agents/util/usage_metadata.py +34 -0
  44. botrun_flow_lang/langgraph_agents/agents/util/youtube_util.py +90 -90
  45. botrun_flow_lang/langgraph_agents/cache/langgraph_botrun_cache.py +197 -197
  46. botrun_flow_lang/llm_agent/llm_agent.py +19 -19
  47. botrun_flow_lang/llm_agent/llm_agent_util.py +83 -83
  48. botrun_flow_lang/log/.gitignore +2 -2
  49. botrun_flow_lang/main.py +61 -61
  50. botrun_flow_lang/main_fast.py +51 -51
  51. botrun_flow_lang/mcp_server/__init__.py +10 -10
  52. botrun_flow_lang/mcp_server/default_mcp.py +854 -744
  53. botrun_flow_lang/models/nodes/utils.py +205 -205
  54. botrun_flow_lang/models/token_usage.py +34 -34
  55. botrun_flow_lang/requirements.txt +21 -21
  56. botrun_flow_lang/services/base/firestore_base.py +30 -30
  57. botrun_flow_lang/services/hatch/hatch_factory.py +11 -11
  58. botrun_flow_lang/services/hatch/hatch_fs_store.py +419 -419
  59. botrun_flow_lang/services/storage/storage_cs_store.py +206 -206
  60. botrun_flow_lang/services/storage/storage_factory.py +12 -12
  61. botrun_flow_lang/services/storage/storage_store.py +65 -65
  62. botrun_flow_lang/services/user_setting/user_setting_factory.py +9 -9
  63. botrun_flow_lang/services/user_setting/user_setting_fs_store.py +66 -66
  64. botrun_flow_lang/static/docs/tools/index.html +926 -926
  65. botrun_flow_lang/tests/api_functional_tests.py +1525 -1525
  66. botrun_flow_lang/tests/api_stress_test.py +357 -357
  67. botrun_flow_lang/tests/shared_hatch_tests.py +333 -333
  68. botrun_flow_lang/tests/test_botrun_app.py +46 -46
  69. botrun_flow_lang/tests/test_html_util.py +31 -31
  70. botrun_flow_lang/tests/test_img_analyzer.py +190 -190
  71. botrun_flow_lang/tests/test_img_util.py +39 -39
  72. botrun_flow_lang/tests/test_local_files.py +114 -114
  73. botrun_flow_lang/tests/test_mermaid_util.py +103 -103
  74. botrun_flow_lang/tests/test_pdf_analyzer.py +104 -104
  75. botrun_flow_lang/tests/test_plotly_util.py +151 -151
  76. botrun_flow_lang/tests/test_run_workflow_engine.py +65 -65
  77. botrun_flow_lang/tools/generate_docs.py +133 -133
  78. botrun_flow_lang/tools/templates/tools.html +153 -153
  79. botrun_flow_lang/utils/__init__.py +7 -7
  80. botrun_flow_lang/utils/botrun_logger.py +344 -344
  81. botrun_flow_lang/utils/clients/rate_limit_client.py +209 -209
  82. botrun_flow_lang/utils/clients/token_verify_client.py +153 -153
  83. botrun_flow_lang/utils/google_drive_utils.py +654 -654
  84. botrun_flow_lang/utils/langchain_utils.py +324 -324
  85. botrun_flow_lang/utils/yaml_utils.py +9 -9
  86. {botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-6.2.21.dist-info}/METADATA +6 -6
  87. botrun_flow_lang-6.2.21.dist-info/RECORD +104 -0
  88. botrun_flow_lang-5.12.263.dist-info/RECORD +0 -102
  89. {botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-6.2.21.dist-info}/WHEEL +0 -0
@@ -1,204 +1,204 @@
1
- """
2
- PDF 處理工具模組
3
-
4
- 提供 PDF 切割等功能,用於處理大型 PDF 檔案。
5
- 使用 pypdf(純 Python)實作,避免 C++ 庫的 segfault 問題。
6
- """
7
-
8
- import io
9
- from typing import List, Tuple
10
-
11
- from pypdf import PdfReader, PdfWriter
12
-
13
-
14
- def get_pdf_size(pdf_content: bytes) -> int:
15
- """
16
- 取得 PDF 檔案大小(bytes)
17
-
18
- Args:
19
- pdf_content: PDF 檔案的二進位內容
20
-
21
- Returns:
22
- int: 檔案大小(bytes)
23
- """
24
- return len(pdf_content)
25
-
26
-
27
- def get_pdf_size_mb(pdf_content: bytes) -> float:
28
- """
29
- 取得 PDF 檔案大小(MB)
30
-
31
- Args:
32
- pdf_content: PDF 檔案的二進位內容
33
-
34
- Returns:
35
- float: 檔案大小(MB)
36
- """
37
- return len(pdf_content) / (1024 * 1024)
38
-
39
-
40
- def get_pdf_page_count(pdf_content: bytes) -> int:
41
- """
42
- 取得 PDF 總頁數
43
-
44
- Args:
45
- pdf_content: PDF 檔案的二進位內容
46
-
47
- Returns:
48
- int: 總頁數
49
- """
50
- try:
51
- reader = PdfReader(io.BytesIO(pdf_content))
52
- return len(reader.pages)
53
- except Exception as e:
54
- print(f"[get_pdf_page_count] 無法讀取 PDF 頁數: {e}")
55
- return 0
56
-
57
-
58
- def split_pdf_by_pages(
59
- pdf_content: bytes, pages_per_chunk: int = 15
60
- ) -> List[Tuple[bytes, str]]:
61
- """
62
- 按頁數切割 PDF
63
-
64
- Args:
65
- pdf_content: PDF 檔案的二進位內容
66
- pages_per_chunk: 每個切片的頁數(預設 15 頁)
67
-
68
- Returns:
69
- List[Tuple[bytes, str]]: 切片清單,每個元素為 (切片內容, 頁碼範圍字串)
70
- 例如: [(chunk_bytes, "page-001-015"), (chunk_bytes, "page-016-030"), ...]
71
- """
72
- chunks = []
73
-
74
- try:
75
- reader = PdfReader(io.BytesIO(pdf_content))
76
- total_pages = len(reader.pages)
77
-
78
- for start_idx in range(0, total_pages, pages_per_chunk):
79
- end_idx = min(start_idx + pages_per_chunk, total_pages)
80
-
81
- # 建立新的 PDF 並複製頁面
82
- writer = PdfWriter()
83
- for page_idx in range(start_idx, end_idx):
84
- writer.add_page(reader.pages[page_idx])
85
-
86
- # 輸出切片
87
- output = io.BytesIO()
88
- writer.write(output)
89
- chunk_bytes = output.getvalue()
90
-
91
- # 產生頁碼範圍字串(1-indexed)
92
- page_range = f"page-{start_idx + 1:03d}-{end_idx:03d}"
93
-
94
- chunks.append((chunk_bytes, page_range))
95
-
96
- except Exception as e:
97
- print(f"[split_pdf_by_pages] 切割 PDF 時發生錯誤: {e}")
98
- # 如果切割失敗,回傳整個 PDF 作為單一切片
99
- if pdf_content:
100
- chunks.append((pdf_content, "page-001-all"))
101
-
102
- return chunks
103
-
104
-
105
- def calculate_optimal_chunk_size(
106
- pdf_content: bytes,
107
- target_size_mb: float = 4.0,
108
- min_pages: int = 5,
109
- max_pages: int = 30,
110
- ) -> int:
111
- """
112
- 計算最佳切割頁數,確保每個切片小於目標大小
113
-
114
- 策略:
115
- 1. 先估算每頁平均大小
116
- 2. 計算達到目標大小需要的頁數
117
- 3. 限制在 min_pages 和 max_pages 之間
118
-
119
- Args:
120
- pdf_content: PDF 檔案的二進位內容
121
- target_size_mb: 目標切片大小(MB),預設 4MB
122
- min_pages: 最小頁數,預設 5 頁
123
- max_pages: 最大頁數,預設 30 頁
124
-
125
- Returns:
126
- int: 建議的每個切片頁數
127
- """
128
- total_size_mb = get_pdf_size_mb(pdf_content)
129
- total_pages = get_pdf_page_count(pdf_content)
130
-
131
- if total_pages == 0:
132
- return min_pages
133
-
134
- # 估算每頁平均大小
135
- avg_page_size_mb = total_size_mb / total_pages
136
-
137
- # 計算達到目標大小需要的頁數
138
- if avg_page_size_mb > 0:
139
- optimal_pages = int(target_size_mb / avg_page_size_mb)
140
- else:
141
- optimal_pages = max_pages
142
-
143
- # 限制在範圍內
144
- optimal_pages = max(min_pages, min(optimal_pages, max_pages))
145
-
146
- return optimal_pages
147
-
148
-
149
- def split_pdf_smart(
150
- pdf_content: bytes, target_size_mb: float = 4.0
151
- ) -> List[Tuple[bytes, str]]:
152
- """
153
- 智慧切割 PDF
154
-
155
- 先計算最佳切割頁數,然後進行切割。
156
- 如果切割後某個切片仍超過目標大小,會進一步分割。
157
-
158
- Args:
159
- pdf_content: PDF 檔案的二進位內容
160
- target_size_mb: 目標切片大小(MB),預設 4MB
161
-
162
- Returns:
163
- List[Tuple[bytes, str]]: 切片清單,每個元素為 (切片內容, 頁碼範圍字串)
164
- """
165
- # 計算最佳切割頁數
166
- pages_per_chunk = calculate_optimal_chunk_size(pdf_content, target_size_mb)
167
- print(f"[split_pdf_smart] 計算最佳切割頁數: {pages_per_chunk} 頁/切片")
168
-
169
- # 進行初步切割
170
- chunks = split_pdf_by_pages(pdf_content, pages_per_chunk)
171
-
172
- # 檢查是否有切片超過目標大小,如果有則進一步分割
173
- final_chunks = []
174
- for chunk_bytes, page_range in chunks:
175
- chunk_size_mb = get_pdf_size_mb(chunk_bytes)
176
-
177
- if chunk_size_mb > target_size_mb and pages_per_chunk > 5:
178
- # 這個切片太大,需要進一步分割
179
- print(
180
- f"[split_pdf_smart] 切片 {page_range} 大小 {chunk_size_mb:.2f}MB "
181
- f"超過目標 {target_size_mb}MB,進一步分割"
182
- )
183
-
184
- # 取得這個切片的頁碼範圍
185
- parts = page_range.replace("page-", "").split("-")
186
- start_page = int(parts[0])
187
-
188
- # 用更小的頁數重新切割
189
- smaller_chunks = split_pdf_by_pages(chunk_bytes, pages_per_chunk // 2)
190
-
191
- # 更新頁碼範圍
192
- chunk_page_count = get_pdf_page_count(chunk_bytes)
193
- for i, (sub_chunk, _) in enumerate(smaller_chunks):
194
- sub_start = start_page + i * (pages_per_chunk // 2)
195
- sub_end = min(
196
- sub_start + (pages_per_chunk // 2) - 1,
197
- start_page + chunk_page_count - 1,
198
- )
199
- sub_range = f"page-{sub_start:03d}-{sub_end:03d}"
200
- final_chunks.append((sub_chunk, sub_range))
201
- else:
202
- final_chunks.append((chunk_bytes, page_range))
203
-
204
- return final_chunks
1
+ """
2
+ PDF 處理工具模組
3
+
4
+ 提供 PDF 切割等功能,用於處理大型 PDF 檔案。
5
+ 使用 pypdf(純 Python)實作,避免 C++ 庫的 segfault 問題。
6
+ """
7
+
8
+ import io
9
+ from typing import List, Tuple
10
+
11
+ from pypdf import PdfReader, PdfWriter
12
+
13
+
14
+ def get_pdf_size(pdf_content: bytes) -> int:
15
+ """
16
+ 取得 PDF 檔案大小(bytes)
17
+
18
+ Args:
19
+ pdf_content: PDF 檔案的二進位內容
20
+
21
+ Returns:
22
+ int: 檔案大小(bytes)
23
+ """
24
+ return len(pdf_content)
25
+
26
+
27
+ def get_pdf_size_mb(pdf_content: bytes) -> float:
28
+ """
29
+ 取得 PDF 檔案大小(MB)
30
+
31
+ Args:
32
+ pdf_content: PDF 檔案的二進位內容
33
+
34
+ Returns:
35
+ float: 檔案大小(MB)
36
+ """
37
+ return len(pdf_content) / (1024 * 1024)
38
+
39
+
40
+ def get_pdf_page_count(pdf_content: bytes) -> int:
41
+ """
42
+ 取得 PDF 總頁數
43
+
44
+ Args:
45
+ pdf_content: PDF 檔案的二進位內容
46
+
47
+ Returns:
48
+ int: 總頁數
49
+ """
50
+ try:
51
+ reader = PdfReader(io.BytesIO(pdf_content))
52
+ return len(reader.pages)
53
+ except Exception as e:
54
+ print(f"[get_pdf_page_count] 無法讀取 PDF 頁數: {e}")
55
+ return 0
56
+
57
+
58
+ def split_pdf_by_pages(
59
+ pdf_content: bytes, pages_per_chunk: int = 15
60
+ ) -> List[Tuple[bytes, str]]:
61
+ """
62
+ 按頁數切割 PDF
63
+
64
+ Args:
65
+ pdf_content: PDF 檔案的二進位內容
66
+ pages_per_chunk: 每個切片的頁數(預設 15 頁)
67
+
68
+ Returns:
69
+ List[Tuple[bytes, str]]: 切片清單,每個元素為 (切片內容, 頁碼範圍字串)
70
+ 例如: [(chunk_bytes, "page-001-015"), (chunk_bytes, "page-016-030"), ...]
71
+ """
72
+ chunks = []
73
+
74
+ try:
75
+ reader = PdfReader(io.BytesIO(pdf_content))
76
+ total_pages = len(reader.pages)
77
+
78
+ for start_idx in range(0, total_pages, pages_per_chunk):
79
+ end_idx = min(start_idx + pages_per_chunk, total_pages)
80
+
81
+ # 建立新的 PDF 並複製頁面
82
+ writer = PdfWriter()
83
+ for page_idx in range(start_idx, end_idx):
84
+ writer.add_page(reader.pages[page_idx])
85
+
86
+ # 輸出切片
87
+ output = io.BytesIO()
88
+ writer.write(output)
89
+ chunk_bytes = output.getvalue()
90
+
91
+ # 產生頁碼範圍字串(1-indexed)
92
+ page_range = f"page-{start_idx + 1:03d}-{end_idx:03d}"
93
+
94
+ chunks.append((chunk_bytes, page_range))
95
+
96
+ except Exception as e:
97
+ print(f"[split_pdf_by_pages] 切割 PDF 時發生錯誤: {e}")
98
+ # 如果切割失敗,回傳整個 PDF 作為單一切片
99
+ if pdf_content:
100
+ chunks.append((pdf_content, "page-001-all"))
101
+
102
+ return chunks
103
+
104
+
105
+ def calculate_optimal_chunk_size(
106
+ pdf_content: bytes,
107
+ target_size_mb: float = 4.0,
108
+ min_pages: int = 5,
109
+ max_pages: int = 30,
110
+ ) -> int:
111
+ """
112
+ 計算最佳切割頁數,確保每個切片小於目標大小
113
+
114
+ 策略:
115
+ 1. 先估算每頁平均大小
116
+ 2. 計算達到目標大小需要的頁數
117
+ 3. 限制在 min_pages 和 max_pages 之間
118
+
119
+ Args:
120
+ pdf_content: PDF 檔案的二進位內容
121
+ target_size_mb: 目標切片大小(MB),預設 4MB
122
+ min_pages: 最小頁數,預設 5 頁
123
+ max_pages: 最大頁數,預設 30 頁
124
+
125
+ Returns:
126
+ int: 建議的每個切片頁數
127
+ """
128
+ total_size_mb = get_pdf_size_mb(pdf_content)
129
+ total_pages = get_pdf_page_count(pdf_content)
130
+
131
+ if total_pages == 0:
132
+ return min_pages
133
+
134
+ # 估算每頁平均大小
135
+ avg_page_size_mb = total_size_mb / total_pages
136
+
137
+ # 計算達到目標大小需要的頁數
138
+ if avg_page_size_mb > 0:
139
+ optimal_pages = int(target_size_mb / avg_page_size_mb)
140
+ else:
141
+ optimal_pages = max_pages
142
+
143
+ # 限制在範圍內
144
+ optimal_pages = max(min_pages, min(optimal_pages, max_pages))
145
+
146
+ return optimal_pages
147
+
148
+
149
+ def split_pdf_smart(
150
+ pdf_content: bytes, target_size_mb: float = 4.0
151
+ ) -> List[Tuple[bytes, str]]:
152
+ """
153
+ 智慧切割 PDF
154
+
155
+ 先計算最佳切割頁數,然後進行切割。
156
+ 如果切割後某個切片仍超過目標大小,會進一步分割。
157
+
158
+ Args:
159
+ pdf_content: PDF 檔案的二進位內容
160
+ target_size_mb: 目標切片大小(MB),預設 4MB
161
+
162
+ Returns:
163
+ List[Tuple[bytes, str]]: 切片清單,每個元素為 (切片內容, 頁碼範圍字串)
164
+ """
165
+ # 計算最佳切割頁數
166
+ pages_per_chunk = calculate_optimal_chunk_size(pdf_content, target_size_mb)
167
+ print(f"[split_pdf_smart] 計算最佳切割頁數: {pages_per_chunk} 頁/切片")
168
+
169
+ # 進行初步切割
170
+ chunks = split_pdf_by_pages(pdf_content, pages_per_chunk)
171
+
172
+ # 檢查是否有切片超過目標大小,如果有則進一步分割
173
+ final_chunks = []
174
+ for chunk_bytes, page_range in chunks:
175
+ chunk_size_mb = get_pdf_size_mb(chunk_bytes)
176
+
177
+ if chunk_size_mb > target_size_mb and pages_per_chunk > 5:
178
+ # 這個切片太大,需要進一步分割
179
+ print(
180
+ f"[split_pdf_smart] 切片 {page_range} 大小 {chunk_size_mb:.2f}MB "
181
+ f"超過目標 {target_size_mb}MB,進一步分割"
182
+ )
183
+
184
+ # 取得這個切片的頁碼範圍
185
+ parts = page_range.replace("page-", "").split("-")
186
+ start_page = int(parts[0])
187
+
188
+ # 用更小的頁數重新切割
189
+ smaller_chunks = split_pdf_by_pages(chunk_bytes, pages_per_chunk // 2)
190
+
191
+ # 更新頁碼範圍
192
+ chunk_page_count = get_pdf_page_count(chunk_bytes)
193
+ for i, (sub_chunk, _) in enumerate(smaller_chunks):
194
+ sub_start = start_page + i * (pages_per_chunk // 2)
195
+ sub_end = min(
196
+ sub_start + (pages_per_chunk // 2) - 1,
197
+ start_page + chunk_page_count - 1,
198
+ )
199
+ sub_range = f"page-{sub_start:03d}-{sub_end:03d}"
200
+ final_chunks.append((sub_chunk, sub_range))
201
+ else:
202
+ final_chunks.append((chunk_bytes, page_range))
203
+
204
+ return final_chunks