PyPI - botrun-flow-lang - Versions diffs - 5.12.263__py3-none-any.whl → 5.12.264__py3-none-any.whl - Mend

botrun-flow-lang 5.12.263py3-none-any.whl → 5.12.264py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

botrun_flow_lang/api/auth_api.py +39 -39
botrun_flow_lang/api/auth_utils.py +183 -183
botrun_flow_lang/api/botrun_back_api.py +65 -65
botrun_flow_lang/api/flow_api.py +3 -3
botrun_flow_lang/api/hatch_api.py +508 -508
botrun_flow_lang/api/langgraph_api.py +811 -811
botrun_flow_lang/api/line_bot_api.py +1484 -1484
botrun_flow_lang/api/model_api.py +300 -300
botrun_flow_lang/api/rate_limit_api.py +32 -32
botrun_flow_lang/api/routes.py +79 -79
botrun_flow_lang/api/search_api.py +53 -53
botrun_flow_lang/api/storage_api.py +395 -395
botrun_flow_lang/api/subsidy_api.py +290 -290
botrun_flow_lang/api/subsidy_api_system_prompt.txt +109 -109
botrun_flow_lang/api/user_setting_api.py +70 -70
botrun_flow_lang/api/version_api.py +31 -31
botrun_flow_lang/api/youtube_api.py +26 -26
botrun_flow_lang/constants.py +13 -13
botrun_flow_lang/langgraph_agents/agents/agent_runner.py +178 -178
botrun_flow_lang/langgraph_agents/agents/agent_tools/step_planner.py +77 -77
botrun_flow_lang/langgraph_agents/agents/checkpointer/firestore_checkpointer.py +666 -666
botrun_flow_lang/langgraph_agents/agents/gov_researcher/GOV_RESEARCHER_PRD.md +192 -192
botrun_flow_lang/langgraph_agents/agents/gov_researcher/gemini_subsidy_graph.py +460 -460
botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_2_graph.py +1002 -1002
botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_graph.py +822 -822
botrun_flow_lang/langgraph_agents/agents/langgraph_react_agent.py +723 -723
botrun_flow_lang/langgraph_agents/agents/search_agent_graph.py +864 -864
botrun_flow_lang/langgraph_agents/agents/tools/__init__.py +4 -4
botrun_flow_lang/langgraph_agents/agents/tools/gemini_code_execution.py +376 -376
botrun_flow_lang/langgraph_agents/agents/util/gemini_grounding.py +66 -66
botrun_flow_lang/langgraph_agents/agents/util/html_util.py +316 -316
botrun_flow_lang/langgraph_agents/agents/util/img_util.py +294 -294
botrun_flow_lang/langgraph_agents/agents/util/local_files.py +419 -419
botrun_flow_lang/langgraph_agents/agents/util/mermaid_util.py +86 -86
botrun_flow_lang/langgraph_agents/agents/util/model_utils.py +143 -143
botrun_flow_lang/langgraph_agents/agents/util/pdf_analyzer.py +486 -486
botrun_flow_lang/langgraph_agents/agents/util/pdf_cache.py +250 -250
botrun_flow_lang/langgraph_agents/agents/util/pdf_processor.py +204 -204
botrun_flow_lang/langgraph_agents/agents/util/perplexity_search.py +464 -464
botrun_flow_lang/langgraph_agents/agents/util/plotly_util.py +59 -59
botrun_flow_lang/langgraph_agents/agents/util/tavily_search.py +199 -199
botrun_flow_lang/langgraph_agents/agents/util/youtube_util.py +90 -90
botrun_flow_lang/langgraph_agents/cache/langgraph_botrun_cache.py +197 -197
botrun_flow_lang/llm_agent/llm_agent.py +19 -19
botrun_flow_lang/llm_agent/llm_agent_util.py +83 -83
botrun_flow_lang/log/.gitignore +2 -2
botrun_flow_lang/main.py +61 -61
botrun_flow_lang/main_fast.py +51 -51
botrun_flow_lang/mcp_server/__init__.py +10 -10
botrun_flow_lang/mcp_server/default_mcp.py +744 -744
botrun_flow_lang/models/nodes/utils.py +205 -205
botrun_flow_lang/models/token_usage.py +34 -34
botrun_flow_lang/requirements.txt +21 -21
botrun_flow_lang/services/base/firestore_base.py +30 -30
botrun_flow_lang/services/hatch/hatch_factory.py +11 -11
botrun_flow_lang/services/hatch/hatch_fs_store.py +419 -419
botrun_flow_lang/services/storage/storage_cs_store.py +206 -206
botrun_flow_lang/services/storage/storage_factory.py +12 -12
botrun_flow_lang/services/storage/storage_store.py +65 -65
botrun_flow_lang/services/user_setting/user_setting_factory.py +9 -9
botrun_flow_lang/services/user_setting/user_setting_fs_store.py +66 -66
botrun_flow_lang/static/docs/tools/index.html +926 -926
botrun_flow_lang/tests/api_functional_tests.py +1525 -1525
botrun_flow_lang/tests/api_stress_test.py +357 -357
botrun_flow_lang/tests/shared_hatch_tests.py +333 -333
botrun_flow_lang/tests/test_botrun_app.py +46 -46
botrun_flow_lang/tests/test_html_util.py +31 -31
botrun_flow_lang/tests/test_img_analyzer.py +190 -190
botrun_flow_lang/tests/test_img_util.py +39 -39
botrun_flow_lang/tests/test_local_files.py +114 -114
botrun_flow_lang/tests/test_mermaid_util.py +103 -103
botrun_flow_lang/tests/test_pdf_analyzer.py +104 -104
botrun_flow_lang/tests/test_plotly_util.py +151 -151
botrun_flow_lang/tests/test_run_workflow_engine.py +65 -65
botrun_flow_lang/tools/generate_docs.py +133 -133
botrun_flow_lang/tools/templates/tools.html +153 -153
botrun_flow_lang/utils/__init__.py +7 -7
botrun_flow_lang/utils/botrun_logger.py +344 -344
botrun_flow_lang/utils/clients/rate_limit_client.py +209 -209
botrun_flow_lang/utils/clients/token_verify_client.py +153 -153
botrun_flow_lang/utils/google_drive_utils.py +654 -654
botrun_flow_lang/utils/langchain_utils.py +324 -324
botrun_flow_lang/utils/yaml_utils.py +9 -9
{botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-5.12.264.dist-info}/METADATA +1 -1
botrun_flow_lang-5.12.264.dist-info/RECORD +102 -0
botrun_flow_lang-5.12.263.dist-info/RECORD +0 -102
{botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-5.12.264.dist-info}/WHEEL +0 -0

botrun_flow_lang/langgraph_agents/agents/util/pdf_processor.py CHANGED Viewed

@@ -1,204 +1,204 @@
-"""
-PDF 處理工具模組
-提供 PDF 切割等功能，用於處理大型 PDF 檔案。
-使用 pypdf（純 Python）實作，避免 C++ 庫的 segfault 問題。
-"""
-import io
-from typing import List, Tuple
-from pypdf import PdfReader, PdfWriter
-def get_pdf_size(pdf_content: bytes) -> int:
-    """
-    取得 PDF 檔案大小（bytes）
-    Args:
-        pdf_content: PDF 檔案的二進位內容
-    Returns:
-        int: 檔案大小（bytes）
-    """
-    return len(pdf_content)
-def get_pdf_size_mb(pdf_content: bytes) -> float:
-    """
-    取得 PDF 檔案大小（MB）
-    Args:
-        pdf_content: PDF 檔案的二進位內容
-    Returns:
-        float: 檔案大小（MB）
-    """
-    return len(pdf_content) / (1024 * 1024)
-def get_pdf_page_count(pdf_content: bytes) -> int:
-    """
-    取得 PDF 總頁數
-    Args:
-        pdf_content: PDF 檔案的二進位內容
-    Returns:
-        int: 總頁數
-    """
-    try:
-        reader = PdfReader(io.BytesIO(pdf_content))
-        return len(reader.pages)
-    except Exception as e:
-        print(f"[get_pdf_page_count] 無法讀取 PDF 頁數: {e}")
-        return 0
-def split_pdf_by_pages(
-    pdf_content: bytes, pages_per_chunk: int = 15
-) -> List[Tuple[bytes, str]]:
-    """
-    按頁數切割 PDF
-    Args:
-        pdf_content: PDF 檔案的二進位內容
-        pages_per_chunk: 每個切片的頁數（預設 15 頁）
-    Returns:
-        List[Tuple[bytes, str]]: 切片清單，每個元素為 (切片內容, 頁碼範圍字串)
-        例如: [(chunk_bytes, "page-001-015"), (chunk_bytes, "page-016-030"), ...]
-    """
-    chunks = []
-    try:
-        reader = PdfReader(io.BytesIO(pdf_content))
-        total_pages = len(reader.pages)
-        for start_idx in range(0, total_pages, pages_per_chunk):
-            end_idx = min(start_idx + pages_per_chunk, total_pages)
-            # 建立新的 PDF 並複製頁面
-            writer = PdfWriter()
-            for page_idx in range(start_idx, end_idx):
-                writer.add_page(reader.pages[page_idx])
-            # 輸出切片
-            output = io.BytesIO()
-            writer.write(output)
-            chunk_bytes = output.getvalue()
-            # 產生頁碼範圍字串（1-indexed）
-            page_range = f"page-{start_idx + 1:03d}-{end_idx:03d}"
-            chunks.append((chunk_bytes, page_range))
-    except Exception as e:
-        print(f"[split_pdf_by_pages] 切割 PDF 時發生錯誤: {e}")
-        # 如果切割失敗，回傳整個 PDF 作為單一切片
-        if pdf_content:
-            chunks.append((pdf_content, "page-001-all"))
-    return chunks
-def calculate_optimal_chunk_size(
-    pdf_content: bytes,
-    target_size_mb: float = 4.0,
-    min_pages: int = 5,
-    max_pages: int = 30,
-) -> int:
-    """
-    計算最佳切割頁數，確保每個切片小於目標大小
-    策略：
-    1. 先估算每頁平均大小
-    2. 計算達到目標大小需要的頁數
-    3. 限制在 min_pages 和 max_pages 之間
-    Args:
-        pdf_content: PDF 檔案的二進位內容
-        target_size_mb: 目標切片大小（MB），預設 4MB
-        min_pages: 最小頁數，預設 5 頁
-        max_pages: 最大頁數，預設 30 頁
-    Returns:
-        int: 建議的每個切片頁數
-    """
-    total_size_mb = get_pdf_size_mb(pdf_content)
-    total_pages = get_pdf_page_count(pdf_content)
-    if total_pages == 0:
-        return min_pages
-    # 估算每頁平均大小
-    avg_page_size_mb = total_size_mb / total_pages
-    # 計算達到目標大小需要的頁數
-    if avg_page_size_mb > 0:
-        optimal_pages = int(target_size_mb / avg_page_size_mb)
-    else:
-        optimal_pages = max_pages
-    # 限制在範圍內
-    optimal_pages = max(min_pages, min(optimal_pages, max_pages))
-    return optimal_pages
-def split_pdf_smart(
-    pdf_content: bytes, target_size_mb: float = 4.0
-) -> List[Tuple[bytes, str]]:
-    """
-    智慧切割 PDF
-    先計算最佳切割頁數，然後進行切割。
-    如果切割後某個切片仍超過目標大小，會進一步分割。
-    Args:
-        pdf_content: PDF 檔案的二進位內容
-        target_size_mb: 目標切片大小（MB），預設 4MB
-    Returns:
-        List[Tuple[bytes, str]]: 切片清單，每個元素為 (切片內容, 頁碼範圍字串)
-    """
-    # 計算最佳切割頁數
-    pages_per_chunk = calculate_optimal_chunk_size(pdf_content, target_size_mb)
-    print(f"[split_pdf_smart] 計算最佳切割頁數: {pages_per_chunk} 頁/切片")
-    # 進行初步切割
-    chunks = split_pdf_by_pages(pdf_content, pages_per_chunk)
-    # 檢查是否有切片超過目標大小，如果有則進一步分割
-    final_chunks = []
-    for chunk_bytes, page_range in chunks:
-        chunk_size_mb = get_pdf_size_mb(chunk_bytes)
-        if chunk_size_mb > target_size_mb and pages_per_chunk > 5:
-            # 這個切片太大，需要進一步分割
-            print(
-                f"[split_pdf_smart] 切片 {page_range} 大小 {chunk_size_mb:.2f}MB "
-                f"超過目標 {target_size_mb}MB，進一步分割"
-            )
-            # 取得這個切片的頁碼範圍
-            parts = page_range.replace("page-", "").split("-")
-            start_page = int(parts[0])
-            # 用更小的頁數重新切割
-            smaller_chunks = split_pdf_by_pages(chunk_bytes, pages_per_chunk // 2)
-            # 更新頁碼範圍
-            chunk_page_count = get_pdf_page_count(chunk_bytes)
-            for i, (sub_chunk, _) in enumerate(smaller_chunks):
-                sub_start = start_page + i * (pages_per_chunk // 2)
-                sub_end = min(
-                    sub_start + (pages_per_chunk // 2) - 1,
-                    start_page + chunk_page_count - 1,
-                )
-                sub_range = f"page-{sub_start:03d}-{sub_end:03d}"
-                final_chunks.append((sub_chunk, sub_range))
-        else:
-            final_chunks.append((chunk_bytes, page_range))
-    return final_chunks
+"""
+PDF 處理工具模組
+提供 PDF 切割等功能，用於處理大型 PDF 檔案。
+使用 pypdf（純 Python）實作，避免 C++ 庫的 segfault 問題。
+"""
+import io
+from typing import List, Tuple
+from pypdf import PdfReader, PdfWriter
+def get_pdf_size(pdf_content: bytes) -> int:
+    """
+    取得 PDF 檔案大小（bytes）
+    Args:
+        pdf_content: PDF 檔案的二進位內容
+    Returns:
+        int: 檔案大小（bytes）
+    """
+    return len(pdf_content)
+def get_pdf_size_mb(pdf_content: bytes) -> float:
+    """
+    取得 PDF 檔案大小（MB）
+    Args:
+        pdf_content: PDF 檔案的二進位內容
+    Returns:
+        float: 檔案大小（MB）
+    """
+    return len(pdf_content) / (1024 * 1024)
+def get_pdf_page_count(pdf_content: bytes) -> int:
+    """
+    取得 PDF 總頁數
+    Args:
+        pdf_content: PDF 檔案的二進位內容
+    Returns:
+        int: 總頁數
+    """
+    try:
+        reader = PdfReader(io.BytesIO(pdf_content))
+        return len(reader.pages)
+    except Exception as e:
+        print(f"[get_pdf_page_count] 無法讀取 PDF 頁數: {e}")
+        return 0
+def split_pdf_by_pages(
+    pdf_content: bytes, pages_per_chunk: int = 15
+) -> List[Tuple[bytes, str]]:
+    """
+    按頁數切割 PDF
+    Args:
+        pdf_content: PDF 檔案的二進位內容
+        pages_per_chunk: 每個切片的頁數（預設 15 頁）
+    Returns:
+        List[Tuple[bytes, str]]: 切片清單，每個元素為 (切片內容, 頁碼範圍字串)
+        例如: [(chunk_bytes, "page-001-015"), (chunk_bytes, "page-016-030"), ...]
+    """
+    chunks = []
+    try:
+        reader = PdfReader(io.BytesIO(pdf_content))
+        total_pages = len(reader.pages)
+        for start_idx in range(0, total_pages, pages_per_chunk):
+            end_idx = min(start_idx + pages_per_chunk, total_pages)
+            # 建立新的 PDF 並複製頁面
+            writer = PdfWriter()
+            for page_idx in range(start_idx, end_idx):
+                writer.add_page(reader.pages[page_idx])
+            # 輸出切片
+            output = io.BytesIO()
+            writer.write(output)
+            chunk_bytes = output.getvalue()
+            # 產生頁碼範圍字串（1-indexed）
+            page_range = f"page-{start_idx + 1:03d}-{end_idx:03d}"
+            chunks.append((chunk_bytes, page_range))
+    except Exception as e:
+        print(f"[split_pdf_by_pages] 切割 PDF 時發生錯誤: {e}")
+        # 如果切割失敗，回傳整個 PDF 作為單一切片
+        if pdf_content:
+            chunks.append((pdf_content, "page-001-all"))
+    return chunks
+def calculate_optimal_chunk_size(
+    pdf_content: bytes,
+    target_size_mb: float = 4.0,
+    min_pages: int = 5,
+    max_pages: int = 30,
+) -> int:
+    """
+    計算最佳切割頁數，確保每個切片小於目標大小
+    策略：
+    1. 先估算每頁平均大小
+    2. 計算達到目標大小需要的頁數
+    3. 限制在 min_pages 和 max_pages 之間
+    Args:
+        pdf_content: PDF 檔案的二進位內容
+        target_size_mb: 目標切片大小（MB），預設 4MB
+        min_pages: 最小頁數，預設 5 頁
+        max_pages: 最大頁數，預設 30 頁
+    Returns:
+        int: 建議的每個切片頁數
+    """
+    total_size_mb = get_pdf_size_mb(pdf_content)
+    total_pages = get_pdf_page_count(pdf_content)
+    if total_pages == 0:
+        return min_pages
+    # 估算每頁平均大小
+    avg_page_size_mb = total_size_mb / total_pages
+    # 計算達到目標大小需要的頁數
+    if avg_page_size_mb > 0:
+        optimal_pages = int(target_size_mb / avg_page_size_mb)
+    else:
+        optimal_pages = max_pages
+    # 限制在範圍內
+    optimal_pages = max(min_pages, min(optimal_pages, max_pages))
+    return optimal_pages
+def split_pdf_smart(
+    pdf_content: bytes, target_size_mb: float = 4.0
+) -> List[Tuple[bytes, str]]:
+    """
+    智慧切割 PDF
+    先計算最佳切割頁數，然後進行切割。
+    如果切割後某個切片仍超過目標大小，會進一步分割。
+    Args:
+        pdf_content: PDF 檔案的二進位內容
+        target_size_mb: 目標切片大小（MB），預設 4MB
+    Returns:
+        List[Tuple[bytes, str]]: 切片清單，每個元素為 (切片內容, 頁碼範圍字串)
+    """
+    # 計算最佳切割頁數
+    pages_per_chunk = calculate_optimal_chunk_size(pdf_content, target_size_mb)
+    print(f"[split_pdf_smart] 計算最佳切割頁數: {pages_per_chunk} 頁/切片")
+    # 進行初步切割
+    chunks = split_pdf_by_pages(pdf_content, pages_per_chunk)
+    # 檢查是否有切片超過目標大小，如果有則進一步分割
+    final_chunks = []
+    for chunk_bytes, page_range in chunks:
+        chunk_size_mb = get_pdf_size_mb(chunk_bytes)
+        if chunk_size_mb > target_size_mb and pages_per_chunk > 5:
+            # 這個切片太大，需要進一步分割
+            print(
+                f"[split_pdf_smart] 切片 {page_range} 大小 {chunk_size_mb:.2f}MB "
+                f"超過目標 {target_size_mb}MB，進一步分割"
+            )
+            # 取得這個切片的頁碼範圍
+            parts = page_range.replace("page-", "").split("-")
+            start_page = int(parts[0])
+            # 用更小的頁數重新切割
+            smaller_chunks = split_pdf_by_pages(chunk_bytes, pages_per_chunk // 2)
+            # 更新頁碼範圍
+            chunk_page_count = get_pdf_page_count(chunk_bytes)
+            for i, (sub_chunk, _) in enumerate(smaller_chunks):
+                sub_start = start_page + i * (pages_per_chunk // 2)
+                sub_end = min(
+                    sub_start + (pages_per_chunk // 2) - 1,
+                    start_page + chunk_page_count - 1,
+                )
+                sub_range = f"page-{sub_start:03d}-{sub_end:03d}"
+                final_chunks.append((sub_chunk, sub_range))
+        else:
+            final_chunks.append((chunk_bytes, page_range))
+    return final_chunks

botrun-flow-lang 5.12.263__py3-none-any.whl → 5.12.264__py3-none-any.whl

botrun-flow-lang 5.12.263py3-none-any.whl → 5.12.264py3-none-any.whl