botrun-flow-lang 5.11.281__py3-none-any.whl → 5.12.261__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -286,13 +286,40 @@ async def get_hatches(
286
286
  user_id: str,
287
287
  offset: int = Query(0, ge=0),
288
288
  limit: int = Query(20, ge=1, le=100),
289
+ sort_by: str = Query("updated_at", description="Field to sort by (name, updated_at)"),
290
+ order: str = Query("desc", regex="^(asc|desc)$", description="Sort order: asc or desc"),
289
291
  current_user: CurrentUser = Depends(verify_jwt_token),
290
292
  hatch_store=Depends(get_hatch_store),
291
293
  ):
294
+ """Get hatches for a user with sorting options.
295
+
296
+ Args:
297
+ user_id: User ID to get hatches for
298
+ offset: Pagination offset
299
+ limit: Maximum number of results (1-100)
300
+ sort_by: Field to sort by - only 'name' or 'updated_at' are supported (default: updated_at)
301
+ order: Sort order - 'asc' or 'desc' (default: desc for newest first)
302
+
303
+ Returns:
304
+ List of hatches sorted by the specified field
305
+
306
+ Raises:
307
+ HTTPException: 400 if sort_by field is not supported
308
+ """
292
309
  # Verify user permission to access hatches for the specified user_id
293
310
  verify_user_permission(current_user, user_id)
294
311
 
295
- hatches, error = await hatch_store.get_hatches(user_id, offset, limit)
312
+ # Validate sort_by field - only allow fields with Firestore indexes
313
+ allowed_sort_fields = ["name", "updated_at"]
314
+ if sort_by not in allowed_sort_fields:
315
+ raise HTTPException(
316
+ status_code=400,
317
+ detail=f"Invalid sort_by field '{sort_by}'. Allowed fields: {', '.join(allowed_sort_fields)}",
318
+ )
319
+
320
+ hatches, error = await hatch_store.get_hatches(
321
+ user_id, offset, limit, sort_by, order
322
+ )
296
323
  if error:
297
324
  raise HTTPException(status_code=500, detail=error)
298
325
  return hatches
@@ -722,10 +722,11 @@ async def subsidy_webhook(request: Request):
722
722
  )
723
723
  responses.append(response)
724
724
 
725
+ # NOTE: 按讚反讚功能已暫時停用(2025-12-03),日後需要可以取消註解以下程式碼
725
726
  # 處理使用者藉由按讚反讚按鈕反饋的postback事件
726
- elif isinstance(event, PostbackEvent):
727
- await handle_feedback(event, line_bot_api)
728
- responses.append("feedback_handled")
727
+ # elif isinstance(event, PostbackEvent):
728
+ # await handle_feedback(event, line_bot_api)
729
+ # responses.append("feedback_handled")
729
730
 
730
731
  return {"responses": responses}
731
732
 
@@ -963,34 +964,40 @@ async def handle_message(
963
964
  )
964
965
  )
965
966
 
967
+ # NOTE: 按讚反讚功能已暫時停用(2025-12-03),日後需要可以取消註解以下程式碼
966
968
  # 以 Quick Reply 作為按讚反讚按鈕
967
- quick_reply = QuickReply(
968
- items=[
969
- QuickReplyItem(
970
- action=PostbackAction(
971
- label="津好康,真是棒👍🏻",
972
- data="實用",
973
- display_text="津好康,真是棒👍🏻",
974
- )
975
- ),
976
- QuickReplyItem(
977
- action=PostbackAction(
978
- label="津可惜,不太實用😖",
979
- data="不實用",
980
- display_text="津可惜,不太實用😖",
981
- )
982
- ),
983
- ]
984
- )
969
+ # quick_reply = QuickReply(
970
+ # items=[
971
+ # QuickReplyItem(
972
+ # action=PostbackAction(
973
+ # label="津好康,真是棒👍🏻",
974
+ # data="實用",
975
+ # display_text="津好康,真是棒👍🏻",
976
+ # )
977
+ # ),
978
+ # QuickReplyItem(
979
+ # action=PostbackAction(
980
+ # label="津可惜,不太實用😖",
981
+ # data="不實用",
982
+ # display_text="津可惜,不太實用😖",
983
+ # )
984
+ # ),
985
+ # ]
986
+ # )
985
987
 
986
988
  if question_bubble:
987
989
  messages.append(FlexMessage(alt_text="相關問題", contents=question_bubble))
988
990
 
989
- messages[-1].quick_reply = quick_reply
990
-
991
+ # messages[-1].quick_reply = quick_reply
992
+ logging.info(
993
+ f"[Line Bot Webhook: handle_message] start reply_message"
994
+ )
991
995
  await line_bot_api.reply_message(
992
996
  ReplyMessageRequest(reply_token=event.reply_token, messages=messages)
993
997
  )
998
+ logging.info(
999
+ f"[Line Bot Webhook: handle_message] end reply_message"
1000
+ )
994
1001
  except Exception as e:
995
1002
  traceback.print_exc()
996
1003
  logging.error(
@@ -1085,14 +1092,15 @@ async def get_reply_text(
1085
1092
 
1086
1093
  try:
1087
1094
  # 取得系統指令
1088
- system_instruction = get_subsidy_api_system_prompt()
1095
+ # 暫時不需要,因為現在是直接呼叫 cbh 的 api, prompt 在它裡面
1096
+ # system_instruction = get_subsidy_api_system_prompt()
1089
1097
 
1090
1098
  # 調用外部 API
1091
1099
  api_response = await call_subsidy_api(
1092
1100
  user_message=line_user_message,
1093
1101
  user_id=user_id,
1094
1102
  display_name=display_name,
1095
- system_instruction=system_instruction
1103
+ # system_instruction=system_instruction
1096
1104
  )
1097
1105
 
1098
1106
  # 提取 token 使用量資訊
@@ -71,15 +71,19 @@ async def langgraph_runner(
71
71
  # 設定新的 recursion_limit 為 (multiplier + 1) * MAX_RECURSION_LIMIT
72
72
  config["recursion_limit"] = (multiplier + 1) * MAX_RECURSION_LIMIT
73
73
 
74
- async for event in graph.astream_events(
75
- invoke_state,
76
- config,
77
- version="v2",
78
- ):
79
- # state = await graph.aget_state(config)
80
- # print(state.config)
81
-
82
- yield event
74
+ try:
75
+ async for event in graph.astream_events(
76
+ invoke_state,
77
+ config,
78
+ version="v2",
79
+ ):
80
+ yield event
81
+ except Exception as e:
82
+ # 捕獲 SSE 流讀取錯誤(如 httpcore.ReadError)
83
+ import logging
84
+ logging.error(f"Error reading SSE stream: {e}", exc_info=True)
85
+ # 產生錯誤 event 讓調用者知道
86
+ yield {"error": f"SSE stream error: {str(e)}"}
83
87
 
84
88
 
85
89
  # graph 是 CompiledStateGraph,不傳入型別的原因是,loading import 需要 0.5秒
@@ -1,14 +1,32 @@
1
+ """
2
+ PDF 分析模組
3
+
4
+ 提供 PDF 檔案分析功能,支援:
5
+ - 小檔 (< 5MB):直接多模態問答
6
+ - 大檔 (>= 5MB):壓縮 → 切割 → 平行多模態問答 → LLM 統整結果
7
+ """
8
+
1
9
  import anthropic
10
+ import asyncio
2
11
  import base64
3
12
  import httpx
4
-
5
-
6
13
  import os
14
+ from typing import List, Dict, Any
15
+
7
16
  from dotenv import load_dotenv
8
17
  from google.oauth2 import service_account
9
18
 
10
19
  load_dotenv()
11
20
 
21
+ # 檔案大小閾值(MB)
22
+ PDF_SIZE_THRESHOLD_MB = 30.0
23
+
24
+ # 切片目標大小(MB)
25
+ PDF_CHUNK_TARGET_SIZE_MB = 30.0
26
+
27
+ # 最大平行問答數量
28
+ MAX_CONCURRENT_CHUNKS = 5
29
+
12
30
 
13
31
  def analyze_pdf_with_claude(
14
32
  pdf_data: str, user_input: str, model_name: str = "claude-sonnet-4-5-20250929"
@@ -55,7 +73,7 @@ def analyze_pdf_with_claude(
55
73
 
56
74
 
57
75
  def analyze_pdf_with_gemini(
58
- pdf_data: str, user_input: str, model_name: str = "gemini-2.5-flash"
76
+ pdf_data: str, user_input: str, model_name: str = "gemini-2.5-flash", pdf_url: str = ""
59
77
  ):
60
78
  """
61
79
  Analyze a PDF file using Gemini API
@@ -100,61 +118,369 @@ def analyze_pdf_with_gemini(
100
118
  f"analyze_pdf_with_gemini============> input_token: {response.usage_metadata.prompt_token_count} output_token: {response.usage_metadata.candidates_token_count}",
101
119
  )
102
120
 
121
+ print(f"{pdf_url} success")
103
122
  return response.text
104
123
 
105
124
 
106
- def analyze_pdf(pdf_url: str, user_input: str):
125
+ def _analyze_single_chunk(
126
+ chunk_data: str, page_range: str, user_input: str, model_name: str
127
+ ) -> Dict[str, Any]:
128
+ """
129
+ 分析單一 PDF 切片
130
+
131
+ Args:
132
+ chunk_data: Base64-encoded PDF chunk data
133
+ page_range: 頁碼範圍字串 (e.g., "page-001-015")
134
+ user_input: 使用者問題
135
+ model_name: 使用的模型名稱
136
+
137
+ Returns:
138
+ Dict: {"page_range": str, "answer": str, "relevant": bool, "error": str|None}
139
+ """
140
+ # 構建切片專用的 prompt
141
+ chunk_prompt = f"""你正在閱讀一份大型 PDF 文件的其中一部分({page_range})。
142
+
143
+ 使用者問題:{user_input}
144
+
145
+ 請根據這個部分的內容回答問題:
146
+ - 如果這個部分包含與問題相關的資訊,請詳細回答
147
+ - 如果這個部分與問題完全無關,請只回答「NOT_RELEVANT」(不要回答其他內容)
148
+ - 回答時請標註資訊來源的頁碼"""
149
+
150
+ try:
151
+ if model_name.startswith("gemini-"):
152
+ answer = analyze_pdf_with_gemini(chunk_data, chunk_prompt, model_name)
153
+ elif model_name.startswith("claude-"):
154
+ answer = analyze_pdf_with_claude(chunk_data, chunk_prompt, model_name)
155
+ else:
156
+ return {
157
+ "page_range": page_range,
158
+ "answer": "",
159
+ "relevant": False,
160
+ "error": f"Unknown model type: {model_name}",
161
+ }
162
+
163
+ # 判斷是否相關
164
+ is_relevant = "NOT_RELEVANT" not in answer.upper()
165
+
166
+ return {
167
+ "page_range": page_range,
168
+ "answer": answer if is_relevant else "",
169
+ "relevant": is_relevant,
170
+ "error": None,
171
+ }
172
+
173
+ except Exception as e:
174
+ import traceback
175
+
176
+ traceback.print_exc()
177
+ return {
178
+ "page_range": page_range,
179
+ "answer": "",
180
+ "relevant": False,
181
+ "error": str(e),
182
+ }
183
+
184
+
185
+ async def analyze_pdf_chunks_parallel(
186
+ chunks: List[tuple], user_input: str, model_name: str, max_concurrent: int = 5
187
+ ) -> List[Dict[str, Any]]:
107
188
  """
108
- Analyze a PDF file using multiple models in order of preference based on PDF_ANALYZER_MODEL env var
189
+ 平行問答多個 PDF 切片
190
+
191
+ Args:
192
+ chunks: 切片清單 [(chunk_bytes, page_range), ...]
193
+ user_input: 使用者問題
194
+ model_name: 使用的模型名稱
195
+ max_concurrent: 最大平行數量
196
+
197
+ Returns:
198
+ List[Dict]: 每個切片的回答結果
199
+ """
200
+ semaphore = asyncio.Semaphore(max_concurrent)
201
+
202
+ async def analyze_with_semaphore(chunk_bytes: bytes, page_range: str):
203
+ async with semaphore:
204
+ # 將 bytes 轉為 base64
205
+ chunk_data = base64.standard_b64encode(chunk_bytes).decode("utf-8")
206
+
207
+ # 使用 run_in_executor 執行同步函數
208
+ loop = asyncio.get_event_loop()
209
+ return await loop.run_in_executor(
210
+ None,
211
+ _analyze_single_chunk,
212
+ chunk_data,
213
+ page_range,
214
+ user_input,
215
+ model_name,
216
+ )
217
+
218
+ # 建立所有任務
219
+ tasks = [
220
+ analyze_with_semaphore(chunk_bytes, page_range)
221
+ for chunk_bytes, page_range in chunks
222
+ ]
223
+
224
+ # 平行執行
225
+ results = await asyncio.gather(*tasks, return_exceptions=True)
226
+
227
+ # 處理例外
228
+ processed_results = []
229
+ for i, result in enumerate(results):
230
+ if isinstance(result, Exception):
231
+ processed_results.append(
232
+ {
233
+ "page_range": chunks[i][1],
234
+ "answer": "",
235
+ "relevant": False,
236
+ "error": str(result),
237
+ }
238
+ )
239
+ else:
240
+ processed_results.append(result)
241
+
242
+ return processed_results
109
243
 
110
- If PDF_ANALYZER_MODEL contains comma-separated models, it will try them in order,
111
- falling back to the next one if the previous fails.
244
+
245
+ def merge_chunk_results(
246
+ chunk_results: List[Dict[str, Any]],
247
+ user_input: str,
248
+ model_name: str = "gemini-2.5-flash",
249
+ ) -> str:
250
+ """
251
+ 使用 LLM 統整多個切片的回答
112
252
 
113
253
  Args:
114
- pdf_url: URL to the PDF file
115
- user_input: User's query about the PDF content
254
+ chunk_results: 切片回答結果清單
255
+ user_input: 原始使用者問題
256
+ model_name: 統整使用的模型名稱
257
+
258
+ Returns:
259
+ str: 統整後的回答
260
+ """
261
+ # 過濾出相關的回答
262
+ relevant_results = [r for r in chunk_results if r.get("relevant", False)]
263
+
264
+ if not relevant_results:
265
+ # 沒有找到相關內容
266
+ error_results = [r for r in chunk_results if r.get("error")]
267
+ if error_results:
268
+ error_msgs = [f"{r['page_range']}: {r['error']}" for r in error_results]
269
+ return f"分析 PDF 時發生錯誤:\n" + "\n".join(error_msgs)
270
+ return "在 PDF 文件中未找到與您問題相關的內容。"
271
+
272
+ # 只有一個相關結果,直接回傳
273
+ if len(relevant_results) == 1:
274
+ return relevant_results[0]["answer"]
275
+
276
+ # 多個相關結果,需要統整
277
+ combined_content = "\n\n".join(
278
+ [
279
+ f"【{r['page_range']}】\n{r['answer']}"
280
+ for r in relevant_results
281
+ ]
282
+ )
283
+
284
+ merge_prompt = f"""以下是從一份大型 PDF 文件的不同部分擷取的回答,請統整這些資訊來回答使用者的問題。
285
+
286
+ 使用者問題:{user_input}
287
+
288
+ 各部分的回答:
289
+ {combined_content}
290
+
291
+ 請統整以上資訊,提供一個完整、連貫的回答。如果不同部分有互補的資訊,請整合在一起。請保留頁碼引用。"""
292
+
293
+ try:
294
+ # 使用 LLM 統整(這裡不需要傳 PDF,只是純文字統整)
295
+ from google import genai
296
+
297
+ credentials = service_account.Credentials.from_service_account_file(
298
+ os.getenv("GOOGLE_APPLICATION_CREDENTIALS_FOR_FASTAPI"),
299
+ scopes=["https://www.googleapis.com/auth/cloud-platform"],
300
+ )
301
+
302
+ client = genai.Client(
303
+ credentials=credentials,
304
+ project="scoop-386004",
305
+ location="us-central1",
306
+ )
307
+
308
+ response = client.models.generate_content(
309
+ model=model_name,
310
+ contents=[merge_prompt],
311
+ )
312
+
313
+ if hasattr(response, "usage_metadata"):
314
+ print(
315
+ f"merge_chunk_results============> input_token: {response.usage_metadata.prompt_token_count} output_token: {response.usage_metadata.candidates_token_count}",
316
+ )
317
+
318
+ return response.text
319
+
320
+ except Exception as e:
321
+ import traceback
322
+
323
+ traceback.print_exc()
324
+ # 統整失敗,直接回傳合併的內容
325
+ return f"統整時發生錯誤,以下是各部分的回答:\n\n{combined_content}"
326
+
327
+
328
+ async def analyze_pdf_async(pdf_url: str, user_input: str) -> str:
329
+ """
330
+ 非同步分析 PDF 檔案(智慧處理策略)
331
+
332
+ 根據檔案大小自動選擇處理策略:
333
+ - < 5MB: 直接多模態問答
334
+ - >= 5MB: 壓縮 → 切割 → 平行多模態問答 → LLM 統整結果
335
+
336
+ Args:
337
+ pdf_url: PDF 檔案的 URL
338
+ user_input: 使用者問題
116
339
 
117
340
  Returns:
118
- str: Analysis of the PDF content based on the query
341
+ str: 分析結果
119
342
  """
120
343
  try:
121
- # Download and encode the PDF file from URL
122
- pdf_data = base64.standard_b64encode(httpx.get(pdf_url).content).decode("utf-8")
344
+ # 1. 下載 PDF
345
+ print(f"[analyze_pdf_async] 下載 PDF: {pdf_url}")
346
+ pdf_content = httpx.get(pdf_url, timeout=60.0).content
347
+ pdf_size_mb = len(pdf_content) / (1024 * 1024)
348
+ print(f"[analyze_pdf_async] PDF 大小: {pdf_size_mb:.2f} MB")
123
349
 
124
- # Get models list from environment variable
350
+ # 取得模型設定
125
351
  models_str = os.getenv("PDF_ANALYZER_MODEL", "gemini-2.5-flash")
126
- print(f"[analyze_pdf] 分析PDF使用模型: {models_str}")
352
+ print(f"[analyze_pdf_async] 使用模型: {models_str}")
127
353
  models = [model.strip() for model in models_str.split(",")]
354
+ primary_model = models[0]
355
+
356
+ # 2. 判斷處理策略
357
+ if pdf_size_mb < PDF_SIZE_THRESHOLD_MB:
358
+ # 小檔:直接多模態問答
359
+ print(f"[analyze_pdf_async] 小檔模式 (< {PDF_SIZE_THRESHOLD_MB}MB)")
360
+ pdf_data = base64.standard_b64encode(pdf_content).decode("utf-8")
128
361
 
129
- last_error = None
130
-
131
- # Try each model in order
132
- for model in models:
133
- try:
134
- if model.startswith("gemini-"):
135
- print(f"Trying to analyze PDF with Gemini model: {model}")
136
- return analyze_pdf_with_gemini(pdf_data, user_input, model)
137
- elif model.startswith("claude-"):
138
- print(f"Trying to analyze PDF with Claude model: {model}")
139
- return analyze_pdf_with_claude(pdf_data, user_input, model)
140
- else:
141
- print(f"Unknown model type: {model}, skipping")
362
+ # 嘗試所有模型
363
+ last_error = None
364
+ for model in models:
365
+ try:
366
+ if model.startswith("gemini-"):
367
+ return analyze_pdf_with_gemini(pdf_data, user_input, model, pdf_url)
368
+ elif model.startswith("claude-"):
369
+ return analyze_pdf_with_claude(pdf_data, user_input, model)
370
+ except Exception as e:
371
+ import traceback
372
+
373
+ traceback.print_exc()
374
+ last_error = str(e)
142
375
  continue
143
- except Exception as e:
144
- import traceback
145
-
146
- traceback.print_exc()
147
- error_msg = f"Error analyzing PDF with {model}: {str(e)}"
148
- print(error_msg)
149
- last_error = error_msg
150
- # Continue to the next model in the list
151
- continue
152
-
153
- # If we've reached here, all models failed
154
- return (
155
- f"Error analyzing PDF with all specified models. Last error: {last_error}"
376
+
377
+ return f"分析 PDF 時所有模型都失敗。最後錯誤: {last_error}"
378
+
379
+ # 3. 大檔:壓縮 → 切割 → 平行問答 → 統整
380
+ print(f"[analyze_pdf_async] 大檔模式 (>= {PDF_SIZE_THRESHOLD_MB}MB)")
381
+
382
+ # 延遲 import 以加快載入
383
+ from botrun_flow_lang.langgraph_agents.agents.util.pdf_processor import (
384
+ split_pdf_smart,
385
+ get_pdf_page_count,
386
+ )
387
+ from botrun_flow_lang.langgraph_agents.agents.util.pdf_cache import (
388
+ get_cache_key,
389
+ check_cache,
390
+ save_to_cache,
156
391
  )
157
392
 
393
+ # 3.1 檢查快取
394
+ cache_key = get_cache_key(pdf_url)
395
+ print(f"[analyze_pdf_async] 檢查快取: {cache_key}")
396
+ cached_chunks = await check_cache(cache_key)
397
+
398
+ if cached_chunks:
399
+ # 有快取,直接使用
400
+ print(f"[analyze_pdf_async] 使用快取: {len(cached_chunks)} 個切片")
401
+ chunks = cached_chunks
402
+ total_pages = sum(
403
+ int(pr.split("-")[-1]) - int(pr.split("-")[-2]) + 1
404
+ for _, pr in chunks
405
+ if pr.startswith("page-")
406
+ ) if chunks else 0
407
+ else:
408
+ # 無快取,切割後存入快取
409
+
410
+ # 3.2 切割
411
+ print("[analyze_pdf_async] 切割 PDF...")
412
+ chunks = split_pdf_smart(pdf_content, target_size_mb=PDF_CHUNK_TARGET_SIZE_MB)
413
+ total_pages = get_pdf_page_count(pdf_content)
414
+ print(
415
+ f"[analyze_pdf_async] 切割完成: {len(chunks)} 個切片, 共 {total_pages} 頁"
416
+ )
417
+
418
+ # 3.3 存入快取
419
+ print("[analyze_pdf_async] 存入快取...")
420
+ await save_to_cache(
421
+ cache_key=cache_key,
422
+ chunks=chunks,
423
+ original_url=pdf_url,
424
+ original_size_mb=pdf_size_mb,
425
+ total_pages=total_pages,
426
+ )
427
+
428
+ # 3.3 平行問答
429
+ print(f"[analyze_pdf_async] 開始平行問答 (最大並行: {MAX_CONCURRENT_CHUNKS})...")
430
+ chunk_results = await analyze_pdf_chunks_parallel(
431
+ chunks, user_input, primary_model, max_concurrent=MAX_CONCURRENT_CHUNKS
432
+ )
433
+
434
+ # 統計結果
435
+ relevant_count = sum(1 for r in chunk_results if r.get("relevant", False))
436
+ error_count = sum(1 for r in chunk_results if r.get("error"))
437
+ print(
438
+ f"[analyze_pdf_async] 問答完成: {relevant_count}/{len(chunks)} 個切片有相關內容, "
439
+ f"{error_count} 個錯誤"
440
+ )
441
+
442
+ # 3.4 統整結果
443
+ print("[analyze_pdf_async] 統整結果...")
444
+ result = merge_chunk_results(chunk_results, user_input, primary_model)
445
+ print("[analyze_pdf_async] 完成")
446
+
447
+ return result
448
+
158
449
  except Exception as e:
159
- print(f"Error downloading PDF: {str(e)}")
160
- return f"Error downloading PDF: {str(e)}"
450
+ import traceback
451
+
452
+ traceback.print_exc()
453
+ return f"分析 PDF {pdf_url} 時發生錯誤: {str(e)}"
454
+
455
+
456
+ def analyze_pdf(pdf_url: str, user_input: str) -> str:
457
+ """
458
+ 分析 PDF 檔案(同步包裝函數)
459
+
460
+ 這是一個同步函數,內部會建立事件迴圈來執行非同步的 analyze_pdf_async。
461
+ 為了向後相容,保留這個同步介面。
462
+
463
+ Args:
464
+ pdf_url: PDF 檔案的 URL
465
+ user_input: 使用者問題
466
+
467
+ Returns:
468
+ str: 分析結果
469
+ """
470
+ try:
471
+ # 嘗試取得現有的事件迴圈
472
+ loop = asyncio.get_event_loop()
473
+ if loop.is_running():
474
+ # 如果已經在事件迴圈中,建立新的任務
475
+ import concurrent.futures
476
+
477
+ with concurrent.futures.ThreadPoolExecutor() as executor:
478
+ future = executor.submit(
479
+ asyncio.run, analyze_pdf_async(pdf_url, user_input)
480
+ )
481
+ return future.result()
482
+ else:
483
+ return loop.run_until_complete(analyze_pdf_async(pdf_url, user_input))
484
+ except RuntimeError:
485
+ # 沒有事件迴圈,建立新的
486
+ return asyncio.run(analyze_pdf_async(pdf_url, user_input))
@@ -0,0 +1,250 @@
1
+ """
2
+ PDF 快取模組
3
+
4
+ 提供 PDF 切片的 GCS 快取功能,避免重複切割相同的 PDF 檔案。
5
+ 快取會自動在 7 天後過期(透過 GCS Lifecycle Rule)。
6
+ """
7
+
8
+ import hashlib
9
+ import json
10
+ from io import BytesIO
11
+ from typing import List, Tuple, Optional
12
+ from datetime import datetime
13
+
14
+ from botrun_flow_lang.services.storage.storage_factory import storage_store_factory
15
+
16
+ # 快取目錄前綴
17
+ PDF_CACHE_PREFIX = "pdf-cache"
18
+
19
+ # 快取過期天數(用於 lifecycle rule)
20
+ PDF_CACHE_EXPIRY_DAYS = 7
21
+
22
+
23
+ def get_cache_key(pdf_url: str) -> str:
24
+ """
25
+ 根據 PDF URL 產生快取 key(hash)
26
+
27
+ Args:
28
+ pdf_url: PDF 檔案的 URL
29
+
30
+ Returns:
31
+ str: 32 字元的 MD5 hash
32
+ """
33
+ return hashlib.md5(pdf_url.encode()).hexdigest()
34
+
35
+
36
+ def _get_cache_path(cache_key: str) -> str:
37
+ """
38
+ 取得快取目錄路徑
39
+
40
+ Args:
41
+ cache_key: 快取 key
42
+
43
+ Returns:
44
+ str: GCS 路徑,格式為 "pdf-cache/{cache_key}"
45
+ """
46
+ return f"{PDF_CACHE_PREFIX}/{cache_key}"
47
+
48
+
49
+ def _get_metadata_path(cache_key: str) -> str:
50
+ """取得 metadata 檔案路徑"""
51
+ return f"{_get_cache_path(cache_key)}/metadata.json"
52
+
53
+
54
+ def _get_chunk_path(cache_key: str, chunk_index: int) -> str:
55
+ """取得切片檔案路徑"""
56
+ return f"{_get_cache_path(cache_key)}/chunk-{chunk_index:03d}.pdf"
57
+
58
+
59
+ async def check_cache(cache_key: str) -> Optional[List[Tuple[bytes, str]]]:
60
+ """
61
+ 檢查 GCS 是否有快取
62
+
63
+ Args:
64
+ cache_key: 快取 key(來自 get_cache_key)
65
+
66
+ Returns:
67
+ Optional[List[Tuple[bytes, str]]]: 如果有快取,回傳切片清單;否則回傳 None
68
+ """
69
+ try:
70
+ storage = storage_store_factory()
71
+ metadata_path = _get_metadata_path(cache_key)
72
+
73
+ # 檢查 metadata 檔案是否存在
74
+ if not await storage.file_exists(metadata_path):
75
+ print(f"[pdf_cache] 快取不存在: {cache_key}")
76
+ return None
77
+
78
+ # 讀取 metadata
79
+ metadata_file = await storage.retrieve_file(metadata_path)
80
+ if not metadata_file:
81
+ print(f"[pdf_cache] 無法讀取 metadata: {cache_key}")
82
+ return None
83
+
84
+ metadata = json.loads(metadata_file.getvalue().decode("utf-8"))
85
+ chunk_count = metadata.get("chunk_count", 0)
86
+ page_ranges = metadata.get("page_ranges", [])
87
+
88
+ if chunk_count == 0:
89
+ print(f"[pdf_cache] 快取無切片: {cache_key}")
90
+ return None
91
+
92
+ print(f"[pdf_cache] 找到快取: {cache_key}, {chunk_count} 個切片")
93
+
94
+ # 讀取所有切片
95
+ chunks = []
96
+ for i in range(chunk_count):
97
+ chunk_path = _get_chunk_path(cache_key, i)
98
+ chunk_file = await storage.retrieve_file(chunk_path)
99
+
100
+ if not chunk_file:
101
+ print(f"[pdf_cache] 無法讀取切片 {i}: {cache_key}")
102
+ return None # 快取不完整,放棄使用
103
+
104
+ chunk_bytes = chunk_file.getvalue()
105
+ page_range = page_ranges[i] if i < len(page_ranges) else f"chunk-{i:03d}"
106
+ chunks.append((chunk_bytes, page_range))
107
+
108
+ print(f"[pdf_cache] 成功載入快取: {cache_key}")
109
+ return chunks
110
+
111
+ except Exception as e:
112
+ print(f"[pdf_cache] 檢查快取時發生錯誤: {e}")
113
+ return None
114
+
115
+
116
+ async def save_to_cache(
117
+ cache_key: str,
118
+ chunks: List[Tuple[bytes, str]],
119
+ original_url: str,
120
+ original_size_mb: float,
121
+ total_pages: int,
122
+ ) -> bool:
123
+ """
124
+ 將切片存入 GCS 快取
125
+
126
+ Args:
127
+ cache_key: 快取 key
128
+ chunks: 切片清單 [(chunk_bytes, page_range), ...]
129
+ original_url: 原始 PDF URL
130
+ original_size_mb: 原始檔案大小(MB)
131
+ total_pages: 總頁數
132
+
133
+ Returns:
134
+ bool: 是否成功存入快取
135
+ """
136
+ try:
137
+ storage = storage_store_factory()
138
+
139
+ # 1. 存入所有切片
140
+ page_ranges = []
141
+ for i, (chunk_bytes, page_range) in enumerate(chunks):
142
+ chunk_path = _get_chunk_path(cache_key, i)
143
+ chunk_file = BytesIO(chunk_bytes)
144
+
145
+ success, _ = await storage.store_file(
146
+ chunk_path, chunk_file, public=False, content_type="application/pdf"
147
+ )
148
+
149
+ if not success:
150
+ print(f"[pdf_cache] 無法存入切片 {i}: {cache_key}")
151
+ return False
152
+
153
+ page_ranges.append(page_range)
154
+
155
+ # 2. 存入 metadata
156
+ metadata = {
157
+ "original_url": original_url,
158
+ "cache_key": cache_key,
159
+ "chunk_count": len(chunks),
160
+ "page_ranges": page_ranges,
161
+ "original_size_mb": original_size_mb,
162
+ "total_pages": total_pages,
163
+ "created_at": datetime.utcnow().isoformat(),
164
+ }
165
+
166
+ metadata_path = _get_metadata_path(cache_key)
167
+ metadata_file = BytesIO(json.dumps(metadata, ensure_ascii=False).encode("utf-8"))
168
+
169
+ success, _ = await storage.store_file(
170
+ metadata_path, metadata_file, public=False, content_type="application/json"
171
+ )
172
+
173
+ if not success:
174
+ print(f"[pdf_cache] 無法存入 metadata: {cache_key}")
175
+ return False
176
+
177
+ print(
178
+ f"[pdf_cache] 成功存入快取: {cache_key}, "
179
+ f"{len(chunks)} 個切片, {total_pages} 頁"
180
+ )
181
+ return True
182
+
183
+ except Exception as e:
184
+ print(f"[pdf_cache] 存入快取時發生錯誤: {e}")
185
+ return False
186
+
187
+
188
+ async def get_cache_metadata(cache_key: str) -> Optional[dict]:
189
+ """
190
+ 取得快取的 metadata(不載入切片內容)
191
+
192
+ Args:
193
+ cache_key: 快取 key
194
+
195
+ Returns:
196
+ Optional[dict]: metadata 字典,或 None
197
+ """
198
+ try:
199
+ storage = storage_store_factory()
200
+ metadata_path = _get_metadata_path(cache_key)
201
+
202
+ if not await storage.file_exists(metadata_path):
203
+ return None
204
+
205
+ metadata_file = await storage.retrieve_file(metadata_path)
206
+ if not metadata_file:
207
+ return None
208
+
209
+ return json.loads(metadata_file.getvalue().decode("utf-8"))
210
+
211
+ except Exception as e:
212
+ print(f"[pdf_cache] 讀取 metadata 時發生錯誤: {e}")
213
+ return None
214
+
215
+
216
+ async def delete_cache(cache_key: str) -> bool:
217
+ """
218
+ 刪除快取
219
+
220
+ Args:
221
+ cache_key: 快取 key
222
+
223
+ Returns:
224
+ bool: 是否成功刪除
225
+ """
226
+ try:
227
+ storage = storage_store_factory()
228
+
229
+ # 先讀取 metadata 取得切片數量
230
+ metadata = await get_cache_metadata(cache_key)
231
+ if not metadata:
232
+ return True # 快取不存在,視為成功
233
+
234
+ chunk_count = metadata.get("chunk_count", 0)
235
+
236
+ # 刪除所有切片
237
+ for i in range(chunk_count):
238
+ chunk_path = _get_chunk_path(cache_key, i)
239
+ await storage.delete_file(chunk_path)
240
+
241
+ # 刪除 metadata
242
+ metadata_path = _get_metadata_path(cache_key)
243
+ await storage.delete_file(metadata_path)
244
+
245
+ print(f"[pdf_cache] 已刪除快取: {cache_key}")
246
+ return True
247
+
248
+ except Exception as e:
249
+ print(f"[pdf_cache] 刪除快取時發生錯誤: {e}")
250
+ return False
@@ -0,0 +1,204 @@
1
+ """
2
+ PDF 處理工具模組
3
+
4
+ 提供 PDF 切割等功能,用於處理大型 PDF 檔案。
5
+ 使用 pypdf(純 Python)實作,避免 C++ 庫的 segfault 問題。
6
+ """
7
+
8
+ import io
9
+ from typing import List, Tuple
10
+
11
+ from pypdf import PdfReader, PdfWriter
12
+
13
+
14
+ def get_pdf_size(pdf_content: bytes) -> int:
15
+ """
16
+ 取得 PDF 檔案大小(bytes)
17
+
18
+ Args:
19
+ pdf_content: PDF 檔案的二進位內容
20
+
21
+ Returns:
22
+ int: 檔案大小(bytes)
23
+ """
24
+ return len(pdf_content)
25
+
26
+
27
+ def get_pdf_size_mb(pdf_content: bytes) -> float:
28
+ """
29
+ 取得 PDF 檔案大小(MB)
30
+
31
+ Args:
32
+ pdf_content: PDF 檔案的二進位內容
33
+
34
+ Returns:
35
+ float: 檔案大小(MB)
36
+ """
37
+ return len(pdf_content) / (1024 * 1024)
38
+
39
+
40
+ def get_pdf_page_count(pdf_content: bytes) -> int:
41
+ """
42
+ 取得 PDF 總頁數
43
+
44
+ Args:
45
+ pdf_content: PDF 檔案的二進位內容
46
+
47
+ Returns:
48
+ int: 總頁數
49
+ """
50
+ try:
51
+ reader = PdfReader(io.BytesIO(pdf_content))
52
+ return len(reader.pages)
53
+ except Exception as e:
54
+ print(f"[get_pdf_page_count] 無法讀取 PDF 頁數: {e}")
55
+ return 0
56
+
57
+
58
+ def split_pdf_by_pages(
59
+ pdf_content: bytes, pages_per_chunk: int = 15
60
+ ) -> List[Tuple[bytes, str]]:
61
+ """
62
+ 按頁數切割 PDF
63
+
64
+ Args:
65
+ pdf_content: PDF 檔案的二進位內容
66
+ pages_per_chunk: 每個切片的頁數(預設 15 頁)
67
+
68
+ Returns:
69
+ List[Tuple[bytes, str]]: 切片清單,每個元素為 (切片內容, 頁碼範圍字串)
70
+ 例如: [(chunk_bytes, "page-001-015"), (chunk_bytes, "page-016-030"), ...]
71
+ """
72
+ chunks = []
73
+
74
+ try:
75
+ reader = PdfReader(io.BytesIO(pdf_content))
76
+ total_pages = len(reader.pages)
77
+
78
+ for start_idx in range(0, total_pages, pages_per_chunk):
79
+ end_idx = min(start_idx + pages_per_chunk, total_pages)
80
+
81
+ # 建立新的 PDF 並複製頁面
82
+ writer = PdfWriter()
83
+ for page_idx in range(start_idx, end_idx):
84
+ writer.add_page(reader.pages[page_idx])
85
+
86
+ # 輸出切片
87
+ output = io.BytesIO()
88
+ writer.write(output)
89
+ chunk_bytes = output.getvalue()
90
+
91
+ # 產生頁碼範圍字串(1-indexed)
92
+ page_range = f"page-{start_idx + 1:03d}-{end_idx:03d}"
93
+
94
+ chunks.append((chunk_bytes, page_range))
95
+
96
+ except Exception as e:
97
+ print(f"[split_pdf_by_pages] 切割 PDF 時發生錯誤: {e}")
98
+ # 如果切割失敗,回傳整個 PDF 作為單一切片
99
+ if pdf_content:
100
+ chunks.append((pdf_content, "page-001-all"))
101
+
102
+ return chunks
103
+
104
+
105
+ def calculate_optimal_chunk_size(
106
+ pdf_content: bytes,
107
+ target_size_mb: float = 4.0,
108
+ min_pages: int = 5,
109
+ max_pages: int = 30,
110
+ ) -> int:
111
+ """
112
+ 計算最佳切割頁數,確保每個切片小於目標大小
113
+
114
+ 策略:
115
+ 1. 先估算每頁平均大小
116
+ 2. 計算達到目標大小需要的頁數
117
+ 3. 限制在 min_pages 和 max_pages 之間
118
+
119
+ Args:
120
+ pdf_content: PDF 檔案的二進位內容
121
+ target_size_mb: 目標切片大小(MB),預設 4MB
122
+ min_pages: 最小頁數,預設 5 頁
123
+ max_pages: 最大頁數,預設 30 頁
124
+
125
+ Returns:
126
+ int: 建議的每個切片頁數
127
+ """
128
+ total_size_mb = get_pdf_size_mb(pdf_content)
129
+ total_pages = get_pdf_page_count(pdf_content)
130
+
131
+ if total_pages == 0:
132
+ return min_pages
133
+
134
+ # 估算每頁平均大小
135
+ avg_page_size_mb = total_size_mb / total_pages
136
+
137
+ # 計算達到目標大小需要的頁數
138
+ if avg_page_size_mb > 0:
139
+ optimal_pages = int(target_size_mb / avg_page_size_mb)
140
+ else:
141
+ optimal_pages = max_pages
142
+
143
+ # 限制在範圍內
144
+ optimal_pages = max(min_pages, min(optimal_pages, max_pages))
145
+
146
+ return optimal_pages
147
+
148
+
149
+ def split_pdf_smart(
150
+ pdf_content: bytes, target_size_mb: float = 4.0
151
+ ) -> List[Tuple[bytes, str]]:
152
+ """
153
+ 智慧切割 PDF
154
+
155
+ 先計算最佳切割頁數,然後進行切割。
156
+ 如果切割後某個切片仍超過目標大小,會進一步分割。
157
+
158
+ Args:
159
+ pdf_content: PDF 檔案的二進位內容
160
+ target_size_mb: 目標切片大小(MB),預設 4MB
161
+
162
+ Returns:
163
+ List[Tuple[bytes, str]]: 切片清單,每個元素為 (切片內容, 頁碼範圍字串)
164
+ """
165
+ # 計算最佳切割頁數
166
+ pages_per_chunk = calculate_optimal_chunk_size(pdf_content, target_size_mb)
167
+ print(f"[split_pdf_smart] 計算最佳切割頁數: {pages_per_chunk} 頁/切片")
168
+
169
+ # 進行初步切割
170
+ chunks = split_pdf_by_pages(pdf_content, pages_per_chunk)
171
+
172
+ # 檢查是否有切片超過目標大小,如果有則進一步分割
173
+ final_chunks = []
174
+ for chunk_bytes, page_range in chunks:
175
+ chunk_size_mb = get_pdf_size_mb(chunk_bytes)
176
+
177
+ if chunk_size_mb > target_size_mb and pages_per_chunk > 5:
178
+ # 這個切片太大,需要進一步分割
179
+ print(
180
+ f"[split_pdf_smart] 切片 {page_range} 大小 {chunk_size_mb:.2f}MB "
181
+ f"超過目標 {target_size_mb}MB,進一步分割"
182
+ )
183
+
184
+ # 取得這個切片的頁碼範圍
185
+ parts = page_range.replace("page-", "").split("-")
186
+ start_page = int(parts[0])
187
+
188
+ # 用更小的頁數重新切割
189
+ smaller_chunks = split_pdf_by_pages(chunk_bytes, pages_per_chunk // 2)
190
+
191
+ # 更新頁碼範圍
192
+ chunk_page_count = get_pdf_page_count(chunk_bytes)
193
+ for i, (sub_chunk, _) in enumerate(smaller_chunks):
194
+ sub_start = start_page + i * (pages_per_chunk // 2)
195
+ sub_end = min(
196
+ sub_start + (pages_per_chunk // 2) - 1,
197
+ start_page + chunk_page_count - 1,
198
+ )
199
+ sub_range = f"page-{sub_start:03d}-{sub_end:03d}"
200
+ final_chunks.append((sub_chunk, sub_range))
201
+ else:
202
+ final_chunks.append((chunk_bytes, page_range))
203
+
204
+ return final_chunks
@@ -15,7 +15,9 @@ from langchain_core.runnables import RunnableConfig
15
15
 
16
16
  # Import necessary dependencies
17
17
  from botrun_flow_lang.models.nodes.utils import scrape_single_url
18
- from botrun_flow_lang.langgraph_agents.agents.util.pdf_analyzer import analyze_pdf
18
+ from botrun_flow_lang.langgraph_agents.agents.util.pdf_analyzer import (
19
+ analyze_pdf_async,
20
+ )
19
21
  from botrun_flow_lang.langgraph_agents.agents.util.img_util import analyze_imgs
20
22
  from botrun_flow_lang.langgraph_agents.agents.util.local_files import (
21
23
  upload_and_get_tmp_public_url,
@@ -76,6 +78,10 @@ async def chat_with_pdf(
76
78
  """
77
79
  Analyze a PDF file and answer questions about its content.
78
80
 
81
+ Supports intelligent processing based on file size:
82
+ - Small files (< 5MB): Direct multimodal analysis
83
+ - Large files (>= 5MB): Compress -> Split -> Parallel multimodal Q&A -> Merge results
84
+
79
85
  Args:
80
86
  pdf_url: The URL to the PDF file (can be generated using generate_tmp_public_url for local files)
81
87
  user_input: The user's question or instruction about the PDF content
@@ -91,7 +97,7 @@ async def chat_with_pdf(
91
97
  if not pdf_url.startswith("http"):
92
98
  pdf_url = upload_and_get_tmp_public_url(pdf_url, botrun_flow_lang_url, user_id)
93
99
 
94
- return analyze_pdf(pdf_url, user_input)
100
+ return await analyze_pdf_async(pdf_url, user_input)
95
101
 
96
102
 
97
103
  @mcp.tool()
@@ -175,10 +181,10 @@ async def generate_image(
175
181
  # 驗證必要參數
176
182
  if not user_id:
177
183
  logger.error("User ID not available")
178
- raise Exception("User ID not available")
184
+ return "User ID not available"
179
185
  if not botrun_flow_lang_url:
180
186
  logger.error("botrun_flow_lang_url not available")
181
- raise Exception("botrun_flow_lang_url not available")
187
+ return "botrun_flow_lang_url not available"
182
188
 
183
189
  # Check rate limit before generating image
184
190
  rate_limit_client = RateLimitClient()
@@ -195,10 +201,12 @@ async def generate_image(
195
201
  f"User {user_id} has reached daily limit of {daily_limit} image generations. "
196
202
  f"Current usage: {current_usage}. Please try again tomorrow."
197
203
  )
198
- raise BotrunRateLimitException(
199
- f"You have reached your daily limit of {daily_limit} image generations. "
204
+ return f"[Please tell user error] You have reached your daily limit of {daily_limit} image generations. " \
200
205
  f"Current usage: {current_usage}. Please try again tomorrow."
201
- )
206
+ # raise BotrunRateLimitException(
207
+ # f"You have reached your daily limit of {daily_limit} image generations. "
208
+ # f"Current usage: {current_usage}. Please try again tomorrow."
209
+ # )
202
210
 
203
211
  # 2. 使用 DALL-E 生成圖片
204
212
  dalle_wrapper = DallEAPIWrapper(
@@ -267,7 +275,8 @@ async def generate_tmp_public_url(
267
275
  logger.info(f"generate_tmp_public_url file_path: {file_path}")
268
276
 
269
277
  if not os.path.exists(file_path):
270
- raise FileNotFoundError(f"File not found: {file_path}")
278
+ return f"File not found: {file_path}"
279
+ # raise FileNotFoundError(f"File not found: {file_path}")
271
280
 
272
281
  return upload_and_get_tmp_public_url(file_path, botrun_flow_lang_url, user_id)
273
282
 
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  from typing import Union, List, Tuple
3
+ from datetime import datetime, timezone
3
4
  from google.cloud.exceptions import GoogleCloudError
4
5
  from botrun_flow_lang.constants import HATCH_SHARING_STORE_NAME, HATCH_STORE_NAME
5
6
  from botrun_flow_lang.services.base.firestore_base import FirestoreBase
@@ -27,6 +28,9 @@ class HatchFsStore(FirestoreBase):
27
28
 
28
29
  async def set_hatch(self, item: Hatch):
29
30
  try:
31
+ # Update updated_at timestamp with current UTC time
32
+ item.updated_at = datetime.now(timezone.utc).isoformat()
33
+
30
34
  # Proceed with saving the hatch
31
35
  doc_ref = self.collection.document(str(item.id))
32
36
  doc_ref.set(item.model_dump())
@@ -49,18 +53,29 @@ class HatchFsStore(FirestoreBase):
49
53
  return False
50
54
 
51
55
  async def get_hatches(
52
- self, user_id: str, offset: int = 0, limit: int = 20
56
+ self,
57
+ user_id: str,
58
+ offset: int = 0,
59
+ limit: int = 20,
60
+ sort_by: str = "updated_at",
61
+ order: str = "desc",
53
62
  ) -> Tuple[List[Hatch], str]:
54
63
  try:
55
- query = (
56
- self.collection.where(
57
- filter=firestore.FieldFilter("user_id", "==", user_id)
58
- )
59
- .order_by("name")
60
- .offset(offset)
61
- .limit(limit)
64
+ # Build base query
65
+ query = self.collection.where(
66
+ filter=firestore.FieldFilter("user_id", "==", user_id)
62
67
  )
63
68
 
69
+ # Add sorting
70
+ # Firestore direction: DESCENDING or ASCENDING
71
+ direction = (
72
+ firestore.Query.DESCENDING if order == "desc" else firestore.Query.ASCENDING
73
+ )
74
+ query = query.order_by(sort_by, direction=direction)
75
+
76
+ # Add pagination
77
+ query = query.offset(offset).limit(limit)
78
+
64
79
  docs = query.stream()
65
80
  hatches = [Hatch(**doc.to_dict()) for doc in docs]
66
81
  return hatches, ""
@@ -39,7 +39,11 @@ class StorageCsStore(StorageStore):
39
39
  {
40
40
  "action": {"type": "Delete"},
41
41
  "condition": {"age": 365, "matchesPrefix": ["tmp/"]},
42
- }
42
+ },
43
+ {
44
+ "action": {"type": "Delete"},
45
+ "condition": {"age": 7, "matchesPrefix": ["pdf-cache/"]},
46
+ },
43
47
  ]
44
48
 
45
49
  if not bucket.exists():
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: botrun-flow-lang
3
- Version: 5.11.281
3
+ Version: 5.12.261
4
4
  Summary: A flow language for botrun
5
5
  Author-email: sebastian-hsu <sebastian.hsu@gmail.com>
6
6
  License: MIT
@@ -13,7 +13,7 @@ Requires-Python: <3.13,>=3.11
13
13
  Requires-Dist: aiohttp>=3.10.8
14
14
  Requires-Dist: anthropic>=0.61.0
15
15
  Requires-Dist: boto3>=1.40.3
16
- Requires-Dist: botrun-hatch>=5.10.82
16
+ Requires-Dist: botrun-hatch>=5.12.261
17
17
  Requires-Dist: botrun-log>=0.3.0
18
18
  Requires-Dist: cachetools>=5.5.2
19
19
  Requires-Dist: chardet>=5.2.0
@@ -47,6 +47,7 @@ Requires-Dist: pandas>=2.2.3
47
47
  Requires-Dist: pdfminer-six==20250506
48
48
  Requires-Dist: plotly>=6.0.0
49
49
  Requires-Dist: pydantic-settings>=2.5.2
50
+ Requires-Dist: pypdf==6.4.2
50
51
  Requires-Dist: python-multipart>=0.0.20
51
52
  Requires-Dist: pytz>=2024.2
52
53
  Requires-Dist: pyyaml>=6.0.2
@@ -8,9 +8,9 @@ botrun_flow_lang/api/auth_api.py,sha256=o_ThrZFcOMQieUcUJIF_B7rsyvbkCvOCjCjknl9G
8
8
  botrun_flow_lang/api/auth_utils.py,sha256=qE7RIPDnX30FPmhlgmlQNoVNkLU028x4SldVl6VC4KQ,6455
9
9
  botrun_flow_lang/api/botrun_back_api.py,sha256=mE2NSejaYIiE0L9GmNJbLc_FRWCy6BXlcqRwkB1kKmc,2397
10
10
  botrun_flow_lang/api/flow_api.py,sha256=DcxuoGE1OcbTgLSYKZ2SO9IdcH3UB5Ik3cVmX3v3-Po,108
11
- botrun_flow_lang/api/hatch_api.py,sha256=qZG-Wwi_8SHPuWNfbt-dhz-O41VYetTxrJzcVjHbJCo,15913
11
+ botrun_flow_lang/api/hatch_api.py,sha256=trenrAJt95ufjpAzwuTvcCoJdMR2x4EZCuWBup9e4hA,16984
12
12
  botrun_flow_lang/api/langgraph_api.py,sha256=zqu0xeTiy2Pr4UL6vvGqVVAy2KX3ZUn1uzcq-Tfb_aM,29291
13
- botrun_flow_lang/api/line_bot_api.py,sha256=__Rul_JWd7KwtgGIyziOKxD9PSconkjuN0dmI8JEwjg,54930
13
+ botrun_flow_lang/api/line_bot_api.py,sha256=INTWnI09LhRhQ6NcetHrHAQZU86eyoJ2zZ6g-KtX8MM,55500
14
14
  botrun_flow_lang/api/model_api.py,sha256=bXemey_XUUdylZwh7Z10eksoBWe9xSa8I9TEL7jIBtE,9483
15
15
  botrun_flow_lang/api/rate_limit_api.py,sha256=SkpjfvShHRdP5XJzy3DdrH4jLtdYAEHROGBMBkC9OIY,948
16
16
  botrun_flow_lang/api/routes.py,sha256=rd0IoMsteJT9BO3MQuyXirhPQbas6OeiKaEC8Yf2SZs,1570
@@ -23,7 +23,7 @@ botrun_flow_lang/api/version_api.py,sha256=Mcs7hKBP7T7nlHDaZS4U0dtOkNQqW0BtT62Iv
23
23
  botrun_flow_lang/api/youtube_api.py,sha256=R384jNRheMKnDyzvlLnbzackipZhiLYTZl4w4hB6vtw,753
24
24
  botrun_flow_lang/langgraph_agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  botrun_flow_lang/langgraph_agents/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- botrun_flow_lang/langgraph_agents/agents/agent_runner.py,sha256=fOZgHDsCA_EDTTGQFBmhGUhpfLB3m_N6YW2UHgMpKBg,6241
26
+ botrun_flow_lang/langgraph_agents/agents/agent_runner.py,sha256=1BRIbEi7_NoIx1BuVUPFA-fDHErm3nHpX65HsY7YpWU,6495
27
27
  botrun_flow_lang/langgraph_agents/agents/langgraph_react_agent.py,sha256=4fK_hMoUAqcEYv7rrHbAx6PFsJ7UcvGI0G2OgWhVhnw,29972
28
28
  botrun_flow_lang/langgraph_agents/agents/search_agent_graph.py,sha256=6fz-ewLQGacEx-uqGfF3-go9FdiioiMzW_sfANzYTcI,31182
29
29
  botrun_flow_lang/langgraph_agents/agents/agent_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -44,7 +44,9 @@ botrun_flow_lang/langgraph_agents/agents/util/img_util.py,sha256=FF5XUFCtYBul6DZ
44
44
  botrun_flow_lang/langgraph_agents/agents/util/local_files.py,sha256=ib3JVvuTgKhFj-D_8d5MH3vImFe6Fk1aMxyxNqjDWWc,13110
45
45
  botrun_flow_lang/langgraph_agents/agents/util/mermaid_util.py,sha256=Aw_ufAEBAqSENOtueemYtapxPVvbQ6HScedeWUZ8lS4,2556
46
46
  botrun_flow_lang/langgraph_agents/agents/util/model_utils.py,sha256=lCORhM77agNgmDxsA40XlUth-E8ThM5Kv-x0_DQnYrs,4811
47
- botrun_flow_lang/langgraph_agents/agents/util/pdf_analyzer.py,sha256=u318H8mXS-qx3-te8fBEHUoOv0hkgXMVdvxlfY80w6g,5091
47
+ botrun_flow_lang/langgraph_agents/agents/util/pdf_analyzer.py,sha256=R50ttYF9uun9aawjFzyFlTrXbM03HNbc-yanILdyRtM,15691
48
+ botrun_flow_lang/langgraph_agents/agents/util/pdf_cache.py,sha256=ptWaNtu_ls1piiBqQG2ZISjxWJ4gxMbIs7hvBdeas5k,7125
49
+ botrun_flow_lang/langgraph_agents/agents/util/pdf_processor.py,sha256=OgaloAzjoMmdymM3O1EiES7Ji6qqZtZPQmDT6HwmnYM,6040
48
50
  botrun_flow_lang/langgraph_agents/agents/util/perplexity_search.py,sha256=4ynmvT8yXbi4KFEVliXWffozB6fHd8jVueA3MmGKlD8,19073
49
51
  botrun_flow_lang/langgraph_agents/agents/util/plotly_util.py,sha256=8xKoj9ABwqCA8dzWFmw-qXCXStgCx6hMc5mVBBJ3ZGw,1913
50
52
  botrun_flow_lang/langgraph_agents/agents/util/tavily_search.py,sha256=hhiuxF6u92X8Mz1VLAreFrB-LyR-UOKR9Xxh1TBveTg,6623
@@ -56,7 +58,7 @@ botrun_flow_lang/llm_agent/llm_agent.py,sha256=Ae9YCK0R_zvSU38Oewo7_qbnBMYTaero6
56
58
  botrun_flow_lang/llm_agent/llm_agent_util.py,sha256=1slGk7LIYUylRjHvw4d92VAxD16uwiK4Efy0m_7tCJs,3048
57
59
  botrun_flow_lang/log/.gitignore,sha256=ZZ3Viy7hKc5dYybO_EM74DBeCqrwL7vmnECmsw--nVc,16
58
60
  botrun_flow_lang/mcp_server/__init__.py,sha256=P02XN_G9ALIeNgPwIsLPdKjGO7lk_XYOxME6NFrBrzE,208
59
- botrun_flow_lang/mcp_server/default_mcp.py,sha256=zkixaPFicuwJBx3B7DsQ9Drfj0FouPRcNY6Gm3B6y5E,28028
61
+ botrun_flow_lang/mcp_server/default_mcp.py,sha256=P38E6p6s9yT2U33hxlOMXG0iioUQ-JgJze-9zmlu5mY,28490
60
62
  botrun_flow_lang/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
63
  botrun_flow_lang/models/token_usage.py,sha256=hwGYZzklnTrcDCjNqCn03kddzS3VH-i1l0d5WnF6iqA,842
62
64
  botrun_flow_lang/models/nodes/utils.py,sha256=kN9RFOSHPvOZU7T4ZfVU6u2wkKgS03xtAKvjLOpryKM,6699
@@ -64,9 +66,9 @@ botrun_flow_lang/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
64
66
  botrun_flow_lang/services/base/firestore_base.py,sha256=Z561TzGvYOUmGKc3IQh03nsK8XHCa96Nlx5m23TySks,1045
65
67
  botrun_flow_lang/services/hatch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
68
  botrun_flow_lang/services/hatch/hatch_factory.py,sha256=ObCcb_hTp8YFXRNXOsXtI_dND_dz7cusypF2tDnMqYY,271
67
- botrun_flow_lang/services/hatch/hatch_fs_store.py,sha256=U8y5ipP2TBux06-sXGddmvkKDtsndUXDJfS54q7PJp8,13092
69
+ botrun_flow_lang/services/hatch/hatch_fs_store.py,sha256=2wmRlFInjCD1RRRHVfX47xX74vih4zJBhfRcy5Uk7ew,13620
68
70
  botrun_flow_lang/services/storage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
- botrun_flow_lang/services/storage/storage_cs_store.py,sha256=uGyYZ-2QVMhh29DwNKIEAp_Sdd_UV7LH3476MUbORTw,7730
71
+ botrun_flow_lang/services/storage/storage_cs_store.py,sha256=5NQekxfzjH6420QHi6HxLFHM5qY9kfiYis-dmsyuHO4,7896
70
72
  botrun_flow_lang/services/storage/storage_factory.py,sha256=Yn40nB79qoEvClksIRnRpQGojXT4J4q1ExBqb3ydets,354
71
73
  botrun_flow_lang/services/storage/storage_store.py,sha256=cb31kDJHNqVA4HyiyJJ1Pnyqv1n5nOkHMpPpA8tvgXg,1905
72
74
  botrun_flow_lang/services/user_setting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -95,6 +97,6 @@ botrun_flow_lang/utils/yaml_utils.py,sha256=1A6PSEE8TM0HSD_6l-fhUsjYnXJcrEKuPgot
95
97
  botrun_flow_lang/utils/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
96
98
  botrun_flow_lang/utils/clients/rate_limit_client.py,sha256=TRpA56OKrfYsoLoJ-TPYlC7Znp9s267-u6CX6BLyVko,8349
97
99
  botrun_flow_lang/utils/clients/token_verify_client.py,sha256=BtrfLvMe-DtS8UKeDhaIkVKDZHphZVP7kyqXn9jhXEc,5740
98
- botrun_flow_lang-5.11.281.dist-info/METADATA,sha256=BN-Ufk4sSRSkuAr3c8mIbKfh78gZX6yOMMxzq4oRSMo,6192
99
- botrun_flow_lang-5.11.281.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
100
- botrun_flow_lang-5.11.281.dist-info/RECORD,,
100
+ botrun_flow_lang-5.12.261.dist-info/METADATA,sha256=vtLwjT6C29pqio3IV9gFQeWSuVexlZuCs_bjkUmCw4s,6221
101
+ botrun_flow_lang-5.12.261.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
102
+ botrun_flow_lang-5.12.261.dist-info/RECORD,,