botrun-flow-lang 5.11.281__py3-none-any.whl → 5.12.261__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- botrun_flow_lang/api/hatch_api.py +28 -1
- botrun_flow_lang/api/line_bot_api.py +33 -25
- botrun_flow_lang/langgraph_agents/agents/agent_runner.py +13 -9
- botrun_flow_lang/langgraph_agents/agents/util/pdf_analyzer.py +368 -42
- botrun_flow_lang/langgraph_agents/agents/util/pdf_cache.py +250 -0
- botrun_flow_lang/langgraph_agents/agents/util/pdf_processor.py +204 -0
- botrun_flow_lang/mcp_server/default_mcp.py +17 -8
- botrun_flow_lang/services/hatch/hatch_fs_store.py +23 -8
- botrun_flow_lang/services/storage/storage_cs_store.py +5 -1
- {botrun_flow_lang-5.11.281.dist-info → botrun_flow_lang-5.12.261.dist-info}/METADATA +3 -2
- {botrun_flow_lang-5.11.281.dist-info → botrun_flow_lang-5.12.261.dist-info}/RECORD +12 -10
- {botrun_flow_lang-5.11.281.dist-info → botrun_flow_lang-5.12.261.dist-info}/WHEEL +0 -0
|
@@ -286,13 +286,40 @@ async def get_hatches(
|
|
|
286
286
|
user_id: str,
|
|
287
287
|
offset: int = Query(0, ge=0),
|
|
288
288
|
limit: int = Query(20, ge=1, le=100),
|
|
289
|
+
sort_by: str = Query("updated_at", description="Field to sort by (name, updated_at)"),
|
|
290
|
+
order: str = Query("desc", regex="^(asc|desc)$", description="Sort order: asc or desc"),
|
|
289
291
|
current_user: CurrentUser = Depends(verify_jwt_token),
|
|
290
292
|
hatch_store=Depends(get_hatch_store),
|
|
291
293
|
):
|
|
294
|
+
"""Get hatches for a user with sorting options.
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
user_id: User ID to get hatches for
|
|
298
|
+
offset: Pagination offset
|
|
299
|
+
limit: Maximum number of results (1-100)
|
|
300
|
+
sort_by: Field to sort by - only 'name' or 'updated_at' are supported (default: updated_at)
|
|
301
|
+
order: Sort order - 'asc' or 'desc' (default: desc for newest first)
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
List of hatches sorted by the specified field
|
|
305
|
+
|
|
306
|
+
Raises:
|
|
307
|
+
HTTPException: 400 if sort_by field is not supported
|
|
308
|
+
"""
|
|
292
309
|
# Verify user permission to access hatches for the specified user_id
|
|
293
310
|
verify_user_permission(current_user, user_id)
|
|
294
311
|
|
|
295
|
-
|
|
312
|
+
# Validate sort_by field - only allow fields with Firestore indexes
|
|
313
|
+
allowed_sort_fields = ["name", "updated_at"]
|
|
314
|
+
if sort_by not in allowed_sort_fields:
|
|
315
|
+
raise HTTPException(
|
|
316
|
+
status_code=400,
|
|
317
|
+
detail=f"Invalid sort_by field '{sort_by}'. Allowed fields: {', '.join(allowed_sort_fields)}",
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
hatches, error = await hatch_store.get_hatches(
|
|
321
|
+
user_id, offset, limit, sort_by, order
|
|
322
|
+
)
|
|
296
323
|
if error:
|
|
297
324
|
raise HTTPException(status_code=500, detail=error)
|
|
298
325
|
return hatches
|
|
@@ -722,10 +722,11 @@ async def subsidy_webhook(request: Request):
|
|
|
722
722
|
)
|
|
723
723
|
responses.append(response)
|
|
724
724
|
|
|
725
|
+
# NOTE: 按讚反讚功能已暫時停用(2025-12-03),日後需要可以取消註解以下程式碼
|
|
725
726
|
# 處理使用者藉由按讚反讚按鈕反饋的postback事件
|
|
726
|
-
elif isinstance(event, PostbackEvent):
|
|
727
|
-
|
|
728
|
-
|
|
727
|
+
# elif isinstance(event, PostbackEvent):
|
|
728
|
+
# await handle_feedback(event, line_bot_api)
|
|
729
|
+
# responses.append("feedback_handled")
|
|
729
730
|
|
|
730
731
|
return {"responses": responses}
|
|
731
732
|
|
|
@@ -963,34 +964,40 @@ async def handle_message(
|
|
|
963
964
|
)
|
|
964
965
|
)
|
|
965
966
|
|
|
967
|
+
# NOTE: 按讚反讚功能已暫時停用(2025-12-03),日後需要可以取消註解以下程式碼
|
|
966
968
|
# 以 Quick Reply 作為按讚反讚按鈕
|
|
967
|
-
quick_reply = QuickReply(
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
)
|
|
969
|
+
# quick_reply = QuickReply(
|
|
970
|
+
# items=[
|
|
971
|
+
# QuickReplyItem(
|
|
972
|
+
# action=PostbackAction(
|
|
973
|
+
# label="津好康,真是棒👍🏻",
|
|
974
|
+
# data="實用",
|
|
975
|
+
# display_text="津好康,真是棒👍🏻",
|
|
976
|
+
# )
|
|
977
|
+
# ),
|
|
978
|
+
# QuickReplyItem(
|
|
979
|
+
# action=PostbackAction(
|
|
980
|
+
# label="津可惜,不太實用😖",
|
|
981
|
+
# data="不實用",
|
|
982
|
+
# display_text="津可惜,不太實用😖",
|
|
983
|
+
# )
|
|
984
|
+
# ),
|
|
985
|
+
# ]
|
|
986
|
+
# )
|
|
985
987
|
|
|
986
988
|
if question_bubble:
|
|
987
989
|
messages.append(FlexMessage(alt_text="相關問題", contents=question_bubble))
|
|
988
990
|
|
|
989
|
-
messages[-1].quick_reply = quick_reply
|
|
990
|
-
|
|
991
|
+
# messages[-1].quick_reply = quick_reply
|
|
992
|
+
logging.info(
|
|
993
|
+
f"[Line Bot Webhook: handle_message] start reply_message"
|
|
994
|
+
)
|
|
991
995
|
await line_bot_api.reply_message(
|
|
992
996
|
ReplyMessageRequest(reply_token=event.reply_token, messages=messages)
|
|
993
997
|
)
|
|
998
|
+
logging.info(
|
|
999
|
+
f"[Line Bot Webhook: handle_message] end reply_message"
|
|
1000
|
+
)
|
|
994
1001
|
except Exception as e:
|
|
995
1002
|
traceback.print_exc()
|
|
996
1003
|
logging.error(
|
|
@@ -1085,14 +1092,15 @@ async def get_reply_text(
|
|
|
1085
1092
|
|
|
1086
1093
|
try:
|
|
1087
1094
|
# 取得系統指令
|
|
1088
|
-
|
|
1095
|
+
# 暫時不需要,因為現在是直接呼叫 cbh 的 api, prompt 在它裡面
|
|
1096
|
+
# system_instruction = get_subsidy_api_system_prompt()
|
|
1089
1097
|
|
|
1090
1098
|
# 調用外部 API
|
|
1091
1099
|
api_response = await call_subsidy_api(
|
|
1092
1100
|
user_message=line_user_message,
|
|
1093
1101
|
user_id=user_id,
|
|
1094
1102
|
display_name=display_name,
|
|
1095
|
-
system_instruction=system_instruction
|
|
1103
|
+
# system_instruction=system_instruction
|
|
1096
1104
|
)
|
|
1097
1105
|
|
|
1098
1106
|
# 提取 token 使用量資訊
|
|
@@ -71,15 +71,19 @@ async def langgraph_runner(
|
|
|
71
71
|
# 設定新的 recursion_limit 為 (multiplier + 1) * MAX_RECURSION_LIMIT
|
|
72
72
|
config["recursion_limit"] = (multiplier + 1) * MAX_RECURSION_LIMIT
|
|
73
73
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
74
|
+
try:
|
|
75
|
+
async for event in graph.astream_events(
|
|
76
|
+
invoke_state,
|
|
77
|
+
config,
|
|
78
|
+
version="v2",
|
|
79
|
+
):
|
|
80
|
+
yield event
|
|
81
|
+
except Exception as e:
|
|
82
|
+
# 捕獲 SSE 流讀取錯誤(如 httpcore.ReadError)
|
|
83
|
+
import logging
|
|
84
|
+
logging.error(f"Error reading SSE stream: {e}", exc_info=True)
|
|
85
|
+
# 產生錯誤 event 讓調用者知道
|
|
86
|
+
yield {"error": f"SSE stream error: {str(e)}"}
|
|
83
87
|
|
|
84
88
|
|
|
85
89
|
# graph 是 CompiledStateGraph,不傳入型別的原因是,loading import 需要 0.5秒
|
|
@@ -1,14 +1,32 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PDF 分析模組
|
|
3
|
+
|
|
4
|
+
提供 PDF 檔案分析功能,支援:
|
|
5
|
+
- 小檔 (< 5MB):直接多模態問答
|
|
6
|
+
- 大檔 (>= 5MB):壓縮 → 切割 → 平行多模態問答 → LLM 統整結果
|
|
7
|
+
"""
|
|
8
|
+
|
|
1
9
|
import anthropic
|
|
10
|
+
import asyncio
|
|
2
11
|
import base64
|
|
3
12
|
import httpx
|
|
4
|
-
|
|
5
|
-
|
|
6
13
|
import os
|
|
14
|
+
from typing import List, Dict, Any
|
|
15
|
+
|
|
7
16
|
from dotenv import load_dotenv
|
|
8
17
|
from google.oauth2 import service_account
|
|
9
18
|
|
|
10
19
|
load_dotenv()
|
|
11
20
|
|
|
21
|
+
# 檔案大小閾值(MB)
|
|
22
|
+
PDF_SIZE_THRESHOLD_MB = 30.0
|
|
23
|
+
|
|
24
|
+
# 切片目標大小(MB)
|
|
25
|
+
PDF_CHUNK_TARGET_SIZE_MB = 30.0
|
|
26
|
+
|
|
27
|
+
# 最大平行問答數量
|
|
28
|
+
MAX_CONCURRENT_CHUNKS = 5
|
|
29
|
+
|
|
12
30
|
|
|
13
31
|
def analyze_pdf_with_claude(
|
|
14
32
|
pdf_data: str, user_input: str, model_name: str = "claude-sonnet-4-5-20250929"
|
|
@@ -55,7 +73,7 @@ def analyze_pdf_with_claude(
|
|
|
55
73
|
|
|
56
74
|
|
|
57
75
|
def analyze_pdf_with_gemini(
|
|
58
|
-
pdf_data: str, user_input: str, model_name: str = "gemini-2.5-flash"
|
|
76
|
+
pdf_data: str, user_input: str, model_name: str = "gemini-2.5-flash", pdf_url: str = ""
|
|
59
77
|
):
|
|
60
78
|
"""
|
|
61
79
|
Analyze a PDF file using Gemini API
|
|
@@ -100,61 +118,369 @@ def analyze_pdf_with_gemini(
|
|
|
100
118
|
f"analyze_pdf_with_gemini============> input_token: {response.usage_metadata.prompt_token_count} output_token: {response.usage_metadata.candidates_token_count}",
|
|
101
119
|
)
|
|
102
120
|
|
|
121
|
+
print(f"{pdf_url} success")
|
|
103
122
|
return response.text
|
|
104
123
|
|
|
105
124
|
|
|
106
|
-
def
|
|
125
|
+
def _analyze_single_chunk(
|
|
126
|
+
chunk_data: str, page_range: str, user_input: str, model_name: str
|
|
127
|
+
) -> Dict[str, Any]:
|
|
128
|
+
"""
|
|
129
|
+
分析單一 PDF 切片
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
chunk_data: Base64-encoded PDF chunk data
|
|
133
|
+
page_range: 頁碼範圍字串 (e.g., "page-001-015")
|
|
134
|
+
user_input: 使用者問題
|
|
135
|
+
model_name: 使用的模型名稱
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
Dict: {"page_range": str, "answer": str, "relevant": bool, "error": str|None}
|
|
139
|
+
"""
|
|
140
|
+
# 構建切片專用的 prompt
|
|
141
|
+
chunk_prompt = f"""你正在閱讀一份大型 PDF 文件的其中一部分({page_range})。
|
|
142
|
+
|
|
143
|
+
使用者問題:{user_input}
|
|
144
|
+
|
|
145
|
+
請根據這個部分的內容回答問題:
|
|
146
|
+
- 如果這個部分包含與問題相關的資訊,請詳細回答
|
|
147
|
+
- 如果這個部分與問題完全無關,請只回答「NOT_RELEVANT」(不要回答其他內容)
|
|
148
|
+
- 回答時請標註資訊來源的頁碼"""
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
if model_name.startswith("gemini-"):
|
|
152
|
+
answer = analyze_pdf_with_gemini(chunk_data, chunk_prompt, model_name)
|
|
153
|
+
elif model_name.startswith("claude-"):
|
|
154
|
+
answer = analyze_pdf_with_claude(chunk_data, chunk_prompt, model_name)
|
|
155
|
+
else:
|
|
156
|
+
return {
|
|
157
|
+
"page_range": page_range,
|
|
158
|
+
"answer": "",
|
|
159
|
+
"relevant": False,
|
|
160
|
+
"error": f"Unknown model type: {model_name}",
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
# 判斷是否相關
|
|
164
|
+
is_relevant = "NOT_RELEVANT" not in answer.upper()
|
|
165
|
+
|
|
166
|
+
return {
|
|
167
|
+
"page_range": page_range,
|
|
168
|
+
"answer": answer if is_relevant else "",
|
|
169
|
+
"relevant": is_relevant,
|
|
170
|
+
"error": None,
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
except Exception as e:
|
|
174
|
+
import traceback
|
|
175
|
+
|
|
176
|
+
traceback.print_exc()
|
|
177
|
+
return {
|
|
178
|
+
"page_range": page_range,
|
|
179
|
+
"answer": "",
|
|
180
|
+
"relevant": False,
|
|
181
|
+
"error": str(e),
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
async def analyze_pdf_chunks_parallel(
|
|
186
|
+
chunks: List[tuple], user_input: str, model_name: str, max_concurrent: int = 5
|
|
187
|
+
) -> List[Dict[str, Any]]:
|
|
107
188
|
"""
|
|
108
|
-
|
|
189
|
+
平行問答多個 PDF 切片
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
chunks: 切片清單 [(chunk_bytes, page_range), ...]
|
|
193
|
+
user_input: 使用者問題
|
|
194
|
+
model_name: 使用的模型名稱
|
|
195
|
+
max_concurrent: 最大平行數量
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
List[Dict]: 每個切片的回答結果
|
|
199
|
+
"""
|
|
200
|
+
semaphore = asyncio.Semaphore(max_concurrent)
|
|
201
|
+
|
|
202
|
+
async def analyze_with_semaphore(chunk_bytes: bytes, page_range: str):
|
|
203
|
+
async with semaphore:
|
|
204
|
+
# 將 bytes 轉為 base64
|
|
205
|
+
chunk_data = base64.standard_b64encode(chunk_bytes).decode("utf-8")
|
|
206
|
+
|
|
207
|
+
# 使用 run_in_executor 執行同步函數
|
|
208
|
+
loop = asyncio.get_event_loop()
|
|
209
|
+
return await loop.run_in_executor(
|
|
210
|
+
None,
|
|
211
|
+
_analyze_single_chunk,
|
|
212
|
+
chunk_data,
|
|
213
|
+
page_range,
|
|
214
|
+
user_input,
|
|
215
|
+
model_name,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
# 建立所有任務
|
|
219
|
+
tasks = [
|
|
220
|
+
analyze_with_semaphore(chunk_bytes, page_range)
|
|
221
|
+
for chunk_bytes, page_range in chunks
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
# 平行執行
|
|
225
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
226
|
+
|
|
227
|
+
# 處理例外
|
|
228
|
+
processed_results = []
|
|
229
|
+
for i, result in enumerate(results):
|
|
230
|
+
if isinstance(result, Exception):
|
|
231
|
+
processed_results.append(
|
|
232
|
+
{
|
|
233
|
+
"page_range": chunks[i][1],
|
|
234
|
+
"answer": "",
|
|
235
|
+
"relevant": False,
|
|
236
|
+
"error": str(result),
|
|
237
|
+
}
|
|
238
|
+
)
|
|
239
|
+
else:
|
|
240
|
+
processed_results.append(result)
|
|
241
|
+
|
|
242
|
+
return processed_results
|
|
109
243
|
|
|
110
|
-
|
|
111
|
-
|
|
244
|
+
|
|
245
|
+
def merge_chunk_results(
|
|
246
|
+
chunk_results: List[Dict[str, Any]],
|
|
247
|
+
user_input: str,
|
|
248
|
+
model_name: str = "gemini-2.5-flash",
|
|
249
|
+
) -> str:
|
|
250
|
+
"""
|
|
251
|
+
使用 LLM 統整多個切片的回答
|
|
112
252
|
|
|
113
253
|
Args:
|
|
114
|
-
|
|
115
|
-
user_input:
|
|
254
|
+
chunk_results: 切片回答結果清單
|
|
255
|
+
user_input: 原始使用者問題
|
|
256
|
+
model_name: 統整使用的模型名稱
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
str: 統整後的回答
|
|
260
|
+
"""
|
|
261
|
+
# 過濾出相關的回答
|
|
262
|
+
relevant_results = [r for r in chunk_results if r.get("relevant", False)]
|
|
263
|
+
|
|
264
|
+
if not relevant_results:
|
|
265
|
+
# 沒有找到相關內容
|
|
266
|
+
error_results = [r for r in chunk_results if r.get("error")]
|
|
267
|
+
if error_results:
|
|
268
|
+
error_msgs = [f"{r['page_range']}: {r['error']}" for r in error_results]
|
|
269
|
+
return f"分析 PDF 時發生錯誤:\n" + "\n".join(error_msgs)
|
|
270
|
+
return "在 PDF 文件中未找到與您問題相關的內容。"
|
|
271
|
+
|
|
272
|
+
# 只有一個相關結果,直接回傳
|
|
273
|
+
if len(relevant_results) == 1:
|
|
274
|
+
return relevant_results[0]["answer"]
|
|
275
|
+
|
|
276
|
+
# 多個相關結果,需要統整
|
|
277
|
+
combined_content = "\n\n".join(
|
|
278
|
+
[
|
|
279
|
+
f"【{r['page_range']}】\n{r['answer']}"
|
|
280
|
+
for r in relevant_results
|
|
281
|
+
]
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
merge_prompt = f"""以下是從一份大型 PDF 文件的不同部分擷取的回答,請統整這些資訊來回答使用者的問題。
|
|
285
|
+
|
|
286
|
+
使用者問題:{user_input}
|
|
287
|
+
|
|
288
|
+
各部分的回答:
|
|
289
|
+
{combined_content}
|
|
290
|
+
|
|
291
|
+
請統整以上資訊,提供一個完整、連貫的回答。如果不同部分有互補的資訊,請整合在一起。請保留頁碼引用。"""
|
|
292
|
+
|
|
293
|
+
try:
|
|
294
|
+
# 使用 LLM 統整(這裡不需要傳 PDF,只是純文字統整)
|
|
295
|
+
from google import genai
|
|
296
|
+
|
|
297
|
+
credentials = service_account.Credentials.from_service_account_file(
|
|
298
|
+
os.getenv("GOOGLE_APPLICATION_CREDENTIALS_FOR_FASTAPI"),
|
|
299
|
+
scopes=["https://www.googleapis.com/auth/cloud-platform"],
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
client = genai.Client(
|
|
303
|
+
credentials=credentials,
|
|
304
|
+
project="scoop-386004",
|
|
305
|
+
location="us-central1",
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
response = client.models.generate_content(
|
|
309
|
+
model=model_name,
|
|
310
|
+
contents=[merge_prompt],
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
if hasattr(response, "usage_metadata"):
|
|
314
|
+
print(
|
|
315
|
+
f"merge_chunk_results============> input_token: {response.usage_metadata.prompt_token_count} output_token: {response.usage_metadata.candidates_token_count}",
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
return response.text
|
|
319
|
+
|
|
320
|
+
except Exception as e:
|
|
321
|
+
import traceback
|
|
322
|
+
|
|
323
|
+
traceback.print_exc()
|
|
324
|
+
# 統整失敗,直接回傳合併的內容
|
|
325
|
+
return f"統整時發生錯誤,以下是各部分的回答:\n\n{combined_content}"
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
async def analyze_pdf_async(pdf_url: str, user_input: str) -> str:
|
|
329
|
+
"""
|
|
330
|
+
非同步分析 PDF 檔案(智慧處理策略)
|
|
331
|
+
|
|
332
|
+
根據檔案大小自動選擇處理策略:
|
|
333
|
+
- < 5MB: 直接多模態問答
|
|
334
|
+
- >= 5MB: 壓縮 → 切割 → 平行多模態問答 → LLM 統整結果
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
pdf_url: PDF 檔案的 URL
|
|
338
|
+
user_input: 使用者問題
|
|
116
339
|
|
|
117
340
|
Returns:
|
|
118
|
-
str:
|
|
341
|
+
str: 分析結果
|
|
119
342
|
"""
|
|
120
343
|
try:
|
|
121
|
-
#
|
|
122
|
-
|
|
344
|
+
# 1. 下載 PDF
|
|
345
|
+
print(f"[analyze_pdf_async] 下載 PDF: {pdf_url}")
|
|
346
|
+
pdf_content = httpx.get(pdf_url, timeout=60.0).content
|
|
347
|
+
pdf_size_mb = len(pdf_content) / (1024 * 1024)
|
|
348
|
+
print(f"[analyze_pdf_async] PDF 大小: {pdf_size_mb:.2f} MB")
|
|
123
349
|
|
|
124
|
-
#
|
|
350
|
+
# 取得模型設定
|
|
125
351
|
models_str = os.getenv("PDF_ANALYZER_MODEL", "gemini-2.5-flash")
|
|
126
|
-
print(f"[
|
|
352
|
+
print(f"[analyze_pdf_async] 使用模型: {models_str}")
|
|
127
353
|
models = [model.strip() for model in models_str.split(",")]
|
|
354
|
+
primary_model = models[0]
|
|
355
|
+
|
|
356
|
+
# 2. 判斷處理策略
|
|
357
|
+
if pdf_size_mb < PDF_SIZE_THRESHOLD_MB:
|
|
358
|
+
# 小檔:直接多模態問答
|
|
359
|
+
print(f"[analyze_pdf_async] 小檔模式 (< {PDF_SIZE_THRESHOLD_MB}MB)")
|
|
360
|
+
pdf_data = base64.standard_b64encode(pdf_content).decode("utf-8")
|
|
128
361
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
362
|
+
# 嘗試所有模型
|
|
363
|
+
last_error = None
|
|
364
|
+
for model in models:
|
|
365
|
+
try:
|
|
366
|
+
if model.startswith("gemini-"):
|
|
367
|
+
return analyze_pdf_with_gemini(pdf_data, user_input, model, pdf_url)
|
|
368
|
+
elif model.startswith("claude-"):
|
|
369
|
+
return analyze_pdf_with_claude(pdf_data, user_input, model)
|
|
370
|
+
except Exception as e:
|
|
371
|
+
import traceback
|
|
372
|
+
|
|
373
|
+
traceback.print_exc()
|
|
374
|
+
last_error = str(e)
|
|
142
375
|
continue
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
376
|
+
|
|
377
|
+
return f"分析 PDF 時所有模型都失敗。最後錯誤: {last_error}"
|
|
378
|
+
|
|
379
|
+
# 3. 大檔:壓縮 → 切割 → 平行問答 → 統整
|
|
380
|
+
print(f"[analyze_pdf_async] 大檔模式 (>= {PDF_SIZE_THRESHOLD_MB}MB)")
|
|
381
|
+
|
|
382
|
+
# 延遲 import 以加快載入
|
|
383
|
+
from botrun_flow_lang.langgraph_agents.agents.util.pdf_processor import (
|
|
384
|
+
split_pdf_smart,
|
|
385
|
+
get_pdf_page_count,
|
|
386
|
+
)
|
|
387
|
+
from botrun_flow_lang.langgraph_agents.agents.util.pdf_cache import (
|
|
388
|
+
get_cache_key,
|
|
389
|
+
check_cache,
|
|
390
|
+
save_to_cache,
|
|
156
391
|
)
|
|
157
392
|
|
|
393
|
+
# 3.1 檢查快取
|
|
394
|
+
cache_key = get_cache_key(pdf_url)
|
|
395
|
+
print(f"[analyze_pdf_async] 檢查快取: {cache_key}")
|
|
396
|
+
cached_chunks = await check_cache(cache_key)
|
|
397
|
+
|
|
398
|
+
if cached_chunks:
|
|
399
|
+
# 有快取,直接使用
|
|
400
|
+
print(f"[analyze_pdf_async] 使用快取: {len(cached_chunks)} 個切片")
|
|
401
|
+
chunks = cached_chunks
|
|
402
|
+
total_pages = sum(
|
|
403
|
+
int(pr.split("-")[-1]) - int(pr.split("-")[-2]) + 1
|
|
404
|
+
for _, pr in chunks
|
|
405
|
+
if pr.startswith("page-")
|
|
406
|
+
) if chunks else 0
|
|
407
|
+
else:
|
|
408
|
+
# 無快取,切割後存入快取
|
|
409
|
+
|
|
410
|
+
# 3.2 切割
|
|
411
|
+
print("[analyze_pdf_async] 切割 PDF...")
|
|
412
|
+
chunks = split_pdf_smart(pdf_content, target_size_mb=PDF_CHUNK_TARGET_SIZE_MB)
|
|
413
|
+
total_pages = get_pdf_page_count(pdf_content)
|
|
414
|
+
print(
|
|
415
|
+
f"[analyze_pdf_async] 切割完成: {len(chunks)} 個切片, 共 {total_pages} 頁"
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
# 3.3 存入快取
|
|
419
|
+
print("[analyze_pdf_async] 存入快取...")
|
|
420
|
+
await save_to_cache(
|
|
421
|
+
cache_key=cache_key,
|
|
422
|
+
chunks=chunks,
|
|
423
|
+
original_url=pdf_url,
|
|
424
|
+
original_size_mb=pdf_size_mb,
|
|
425
|
+
total_pages=total_pages,
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
# 3.3 平行問答
|
|
429
|
+
print(f"[analyze_pdf_async] 開始平行問答 (最大並行: {MAX_CONCURRENT_CHUNKS})...")
|
|
430
|
+
chunk_results = await analyze_pdf_chunks_parallel(
|
|
431
|
+
chunks, user_input, primary_model, max_concurrent=MAX_CONCURRENT_CHUNKS
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
# 統計結果
|
|
435
|
+
relevant_count = sum(1 for r in chunk_results if r.get("relevant", False))
|
|
436
|
+
error_count = sum(1 for r in chunk_results if r.get("error"))
|
|
437
|
+
print(
|
|
438
|
+
f"[analyze_pdf_async] 問答完成: {relevant_count}/{len(chunks)} 個切片有相關內容, "
|
|
439
|
+
f"{error_count} 個錯誤"
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
# 3.4 統整結果
|
|
443
|
+
print("[analyze_pdf_async] 統整結果...")
|
|
444
|
+
result = merge_chunk_results(chunk_results, user_input, primary_model)
|
|
445
|
+
print("[analyze_pdf_async] 完成")
|
|
446
|
+
|
|
447
|
+
return result
|
|
448
|
+
|
|
158
449
|
except Exception as e:
|
|
159
|
-
|
|
160
|
-
|
|
450
|
+
import traceback
|
|
451
|
+
|
|
452
|
+
traceback.print_exc()
|
|
453
|
+
return f"分析 PDF {pdf_url} 時發生錯誤: {str(e)}"
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def analyze_pdf(pdf_url: str, user_input: str) -> str:
|
|
457
|
+
"""
|
|
458
|
+
分析 PDF 檔案(同步包裝函數)
|
|
459
|
+
|
|
460
|
+
這是一個同步函數,內部會建立事件迴圈來執行非同步的 analyze_pdf_async。
|
|
461
|
+
為了向後相容,保留這個同步介面。
|
|
462
|
+
|
|
463
|
+
Args:
|
|
464
|
+
pdf_url: PDF 檔案的 URL
|
|
465
|
+
user_input: 使用者問題
|
|
466
|
+
|
|
467
|
+
Returns:
|
|
468
|
+
str: 分析結果
|
|
469
|
+
"""
|
|
470
|
+
try:
|
|
471
|
+
# 嘗試取得現有的事件迴圈
|
|
472
|
+
loop = asyncio.get_event_loop()
|
|
473
|
+
if loop.is_running():
|
|
474
|
+
# 如果已經在事件迴圈中,建立新的任務
|
|
475
|
+
import concurrent.futures
|
|
476
|
+
|
|
477
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
478
|
+
future = executor.submit(
|
|
479
|
+
asyncio.run, analyze_pdf_async(pdf_url, user_input)
|
|
480
|
+
)
|
|
481
|
+
return future.result()
|
|
482
|
+
else:
|
|
483
|
+
return loop.run_until_complete(analyze_pdf_async(pdf_url, user_input))
|
|
484
|
+
except RuntimeError:
|
|
485
|
+
# 沒有事件迴圈,建立新的
|
|
486
|
+
return asyncio.run(analyze_pdf_async(pdf_url, user_input))
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PDF 快取模組
|
|
3
|
+
|
|
4
|
+
提供 PDF 切片的 GCS 快取功能,避免重複切割相同的 PDF 檔案。
|
|
5
|
+
快取會自動在 7 天後過期(透過 GCS Lifecycle Rule)。
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import hashlib
|
|
9
|
+
import json
|
|
10
|
+
from io import BytesIO
|
|
11
|
+
from typing import List, Tuple, Optional
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
|
|
14
|
+
from botrun_flow_lang.services.storage.storage_factory import storage_store_factory
|
|
15
|
+
|
|
16
|
+
# 快取目錄前綴
|
|
17
|
+
PDF_CACHE_PREFIX = "pdf-cache"
|
|
18
|
+
|
|
19
|
+
# 快取過期天數(用於 lifecycle rule)
|
|
20
|
+
PDF_CACHE_EXPIRY_DAYS = 7
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_cache_key(pdf_url: str) -> str:
|
|
24
|
+
"""
|
|
25
|
+
根據 PDF URL 產生快取 key(hash)
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
pdf_url: PDF 檔案的 URL
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
str: 32 字元的 MD5 hash
|
|
32
|
+
"""
|
|
33
|
+
return hashlib.md5(pdf_url.encode()).hexdigest()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _get_cache_path(cache_key: str) -> str:
|
|
37
|
+
"""
|
|
38
|
+
取得快取目錄路徑
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
cache_key: 快取 key
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
str: GCS 路徑,格式為 "pdf-cache/{cache_key}"
|
|
45
|
+
"""
|
|
46
|
+
return f"{PDF_CACHE_PREFIX}/{cache_key}"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _get_metadata_path(cache_key: str) -> str:
|
|
50
|
+
"""取得 metadata 檔案路徑"""
|
|
51
|
+
return f"{_get_cache_path(cache_key)}/metadata.json"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _get_chunk_path(cache_key: str, chunk_index: int) -> str:
|
|
55
|
+
"""取得切片檔案路徑"""
|
|
56
|
+
return f"{_get_cache_path(cache_key)}/chunk-{chunk_index:03d}.pdf"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
async def check_cache(cache_key: str) -> Optional[List[Tuple[bytes, str]]]:
|
|
60
|
+
"""
|
|
61
|
+
檢查 GCS 是否有快取
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
cache_key: 快取 key(來自 get_cache_key)
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
Optional[List[Tuple[bytes, str]]]: 如果有快取,回傳切片清單;否則回傳 None
|
|
68
|
+
"""
|
|
69
|
+
try:
|
|
70
|
+
storage = storage_store_factory()
|
|
71
|
+
metadata_path = _get_metadata_path(cache_key)
|
|
72
|
+
|
|
73
|
+
# 檢查 metadata 檔案是否存在
|
|
74
|
+
if not await storage.file_exists(metadata_path):
|
|
75
|
+
print(f"[pdf_cache] 快取不存在: {cache_key}")
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
# 讀取 metadata
|
|
79
|
+
metadata_file = await storage.retrieve_file(metadata_path)
|
|
80
|
+
if not metadata_file:
|
|
81
|
+
print(f"[pdf_cache] 無法讀取 metadata: {cache_key}")
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
metadata = json.loads(metadata_file.getvalue().decode("utf-8"))
|
|
85
|
+
chunk_count = metadata.get("chunk_count", 0)
|
|
86
|
+
page_ranges = metadata.get("page_ranges", [])
|
|
87
|
+
|
|
88
|
+
if chunk_count == 0:
|
|
89
|
+
print(f"[pdf_cache] 快取無切片: {cache_key}")
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
print(f"[pdf_cache] 找到快取: {cache_key}, {chunk_count} 個切片")
|
|
93
|
+
|
|
94
|
+
# 讀取所有切片
|
|
95
|
+
chunks = []
|
|
96
|
+
for i in range(chunk_count):
|
|
97
|
+
chunk_path = _get_chunk_path(cache_key, i)
|
|
98
|
+
chunk_file = await storage.retrieve_file(chunk_path)
|
|
99
|
+
|
|
100
|
+
if not chunk_file:
|
|
101
|
+
print(f"[pdf_cache] 無法讀取切片 {i}: {cache_key}")
|
|
102
|
+
return None # 快取不完整,放棄使用
|
|
103
|
+
|
|
104
|
+
chunk_bytes = chunk_file.getvalue()
|
|
105
|
+
page_range = page_ranges[i] if i < len(page_ranges) else f"chunk-{i:03d}"
|
|
106
|
+
chunks.append((chunk_bytes, page_range))
|
|
107
|
+
|
|
108
|
+
print(f"[pdf_cache] 成功載入快取: {cache_key}")
|
|
109
|
+
return chunks
|
|
110
|
+
|
|
111
|
+
except Exception as e:
|
|
112
|
+
print(f"[pdf_cache] 檢查快取時發生錯誤: {e}")
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
async def save_to_cache(
|
|
117
|
+
cache_key: str,
|
|
118
|
+
chunks: List[Tuple[bytes, str]],
|
|
119
|
+
original_url: str,
|
|
120
|
+
original_size_mb: float,
|
|
121
|
+
total_pages: int,
|
|
122
|
+
) -> bool:
|
|
123
|
+
"""
|
|
124
|
+
將切片存入 GCS 快取
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
cache_key: 快取 key
|
|
128
|
+
chunks: 切片清單 [(chunk_bytes, page_range), ...]
|
|
129
|
+
original_url: 原始 PDF URL
|
|
130
|
+
original_size_mb: 原始檔案大小(MB)
|
|
131
|
+
total_pages: 總頁數
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
bool: 是否成功存入快取
|
|
135
|
+
"""
|
|
136
|
+
try:
|
|
137
|
+
storage = storage_store_factory()
|
|
138
|
+
|
|
139
|
+
# 1. 存入所有切片
|
|
140
|
+
page_ranges = []
|
|
141
|
+
for i, (chunk_bytes, page_range) in enumerate(chunks):
|
|
142
|
+
chunk_path = _get_chunk_path(cache_key, i)
|
|
143
|
+
chunk_file = BytesIO(chunk_bytes)
|
|
144
|
+
|
|
145
|
+
success, _ = await storage.store_file(
|
|
146
|
+
chunk_path, chunk_file, public=False, content_type="application/pdf"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
if not success:
|
|
150
|
+
print(f"[pdf_cache] 無法存入切片 {i}: {cache_key}")
|
|
151
|
+
return False
|
|
152
|
+
|
|
153
|
+
page_ranges.append(page_range)
|
|
154
|
+
|
|
155
|
+
# 2. 存入 metadata
|
|
156
|
+
metadata = {
|
|
157
|
+
"original_url": original_url,
|
|
158
|
+
"cache_key": cache_key,
|
|
159
|
+
"chunk_count": len(chunks),
|
|
160
|
+
"page_ranges": page_ranges,
|
|
161
|
+
"original_size_mb": original_size_mb,
|
|
162
|
+
"total_pages": total_pages,
|
|
163
|
+
"created_at": datetime.utcnow().isoformat(),
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
metadata_path = _get_metadata_path(cache_key)
|
|
167
|
+
metadata_file = BytesIO(json.dumps(metadata, ensure_ascii=False).encode("utf-8"))
|
|
168
|
+
|
|
169
|
+
success, _ = await storage.store_file(
|
|
170
|
+
metadata_path, metadata_file, public=False, content_type="application/json"
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
if not success:
|
|
174
|
+
print(f"[pdf_cache] 無法存入 metadata: {cache_key}")
|
|
175
|
+
return False
|
|
176
|
+
|
|
177
|
+
print(
|
|
178
|
+
f"[pdf_cache] 成功存入快取: {cache_key}, "
|
|
179
|
+
f"{len(chunks)} 個切片, {total_pages} 頁"
|
|
180
|
+
)
|
|
181
|
+
return True
|
|
182
|
+
|
|
183
|
+
except Exception as e:
|
|
184
|
+
print(f"[pdf_cache] 存入快取時發生錯誤: {e}")
|
|
185
|
+
return False
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
async def get_cache_metadata(cache_key: str) -> Optional[dict]:
|
|
189
|
+
"""
|
|
190
|
+
取得快取的 metadata(不載入切片內容)
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
cache_key: 快取 key
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
Optional[dict]: metadata 字典,或 None
|
|
197
|
+
"""
|
|
198
|
+
try:
|
|
199
|
+
storage = storage_store_factory()
|
|
200
|
+
metadata_path = _get_metadata_path(cache_key)
|
|
201
|
+
|
|
202
|
+
if not await storage.file_exists(metadata_path):
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
metadata_file = await storage.retrieve_file(metadata_path)
|
|
206
|
+
if not metadata_file:
|
|
207
|
+
return None
|
|
208
|
+
|
|
209
|
+
return json.loads(metadata_file.getvalue().decode("utf-8"))
|
|
210
|
+
|
|
211
|
+
except Exception as e:
|
|
212
|
+
print(f"[pdf_cache] 讀取 metadata 時發生錯誤: {e}")
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
async def delete_cache(cache_key: str) -> bool:
|
|
217
|
+
"""
|
|
218
|
+
刪除快取
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
cache_key: 快取 key
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
bool: 是否成功刪除
|
|
225
|
+
"""
|
|
226
|
+
try:
|
|
227
|
+
storage = storage_store_factory()
|
|
228
|
+
|
|
229
|
+
# 先讀取 metadata 取得切片數量
|
|
230
|
+
metadata = await get_cache_metadata(cache_key)
|
|
231
|
+
if not metadata:
|
|
232
|
+
return True # 快取不存在,視為成功
|
|
233
|
+
|
|
234
|
+
chunk_count = metadata.get("chunk_count", 0)
|
|
235
|
+
|
|
236
|
+
# 刪除所有切片
|
|
237
|
+
for i in range(chunk_count):
|
|
238
|
+
chunk_path = _get_chunk_path(cache_key, i)
|
|
239
|
+
await storage.delete_file(chunk_path)
|
|
240
|
+
|
|
241
|
+
# 刪除 metadata
|
|
242
|
+
metadata_path = _get_metadata_path(cache_key)
|
|
243
|
+
await storage.delete_file(metadata_path)
|
|
244
|
+
|
|
245
|
+
print(f"[pdf_cache] 已刪除快取: {cache_key}")
|
|
246
|
+
return True
|
|
247
|
+
|
|
248
|
+
except Exception as e:
|
|
249
|
+
print(f"[pdf_cache] 刪除快取時發生錯誤: {e}")
|
|
250
|
+
return False
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PDF 處理工具模組
|
|
3
|
+
|
|
4
|
+
提供 PDF 切割等功能,用於處理大型 PDF 檔案。
|
|
5
|
+
使用 pypdf(純 Python)實作,避免 C++ 庫的 segfault 問題。
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import io
|
|
9
|
+
from typing import List, Tuple
|
|
10
|
+
|
|
11
|
+
from pypdf import PdfReader, PdfWriter
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_pdf_size(pdf_content: bytes) -> int:
|
|
15
|
+
"""
|
|
16
|
+
取得 PDF 檔案大小(bytes)
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
pdf_content: PDF 檔案的二進位內容
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
int: 檔案大小(bytes)
|
|
23
|
+
"""
|
|
24
|
+
return len(pdf_content)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_pdf_size_mb(pdf_content: bytes) -> float:
|
|
28
|
+
"""
|
|
29
|
+
取得 PDF 檔案大小(MB)
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
pdf_content: PDF 檔案的二進位內容
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
float: 檔案大小(MB)
|
|
36
|
+
"""
|
|
37
|
+
return len(pdf_content) / (1024 * 1024)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_pdf_page_count(pdf_content: bytes) -> int:
|
|
41
|
+
"""
|
|
42
|
+
取得 PDF 總頁數
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
pdf_content: PDF 檔案的二進位內容
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
int: 總頁數
|
|
49
|
+
"""
|
|
50
|
+
try:
|
|
51
|
+
reader = PdfReader(io.BytesIO(pdf_content))
|
|
52
|
+
return len(reader.pages)
|
|
53
|
+
except Exception as e:
|
|
54
|
+
print(f"[get_pdf_page_count] 無法讀取 PDF 頁數: {e}")
|
|
55
|
+
return 0
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def split_pdf_by_pages(
|
|
59
|
+
pdf_content: bytes, pages_per_chunk: int = 15
|
|
60
|
+
) -> List[Tuple[bytes, str]]:
|
|
61
|
+
"""
|
|
62
|
+
按頁數切割 PDF
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
pdf_content: PDF 檔案的二進位內容
|
|
66
|
+
pages_per_chunk: 每個切片的頁數(預設 15 頁)
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
List[Tuple[bytes, str]]: 切片清單,每個元素為 (切片內容, 頁碼範圍字串)
|
|
70
|
+
例如: [(chunk_bytes, "page-001-015"), (chunk_bytes, "page-016-030"), ...]
|
|
71
|
+
"""
|
|
72
|
+
chunks = []
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
reader = PdfReader(io.BytesIO(pdf_content))
|
|
76
|
+
total_pages = len(reader.pages)
|
|
77
|
+
|
|
78
|
+
for start_idx in range(0, total_pages, pages_per_chunk):
|
|
79
|
+
end_idx = min(start_idx + pages_per_chunk, total_pages)
|
|
80
|
+
|
|
81
|
+
# 建立新的 PDF 並複製頁面
|
|
82
|
+
writer = PdfWriter()
|
|
83
|
+
for page_idx in range(start_idx, end_idx):
|
|
84
|
+
writer.add_page(reader.pages[page_idx])
|
|
85
|
+
|
|
86
|
+
# 輸出切片
|
|
87
|
+
output = io.BytesIO()
|
|
88
|
+
writer.write(output)
|
|
89
|
+
chunk_bytes = output.getvalue()
|
|
90
|
+
|
|
91
|
+
# 產生頁碼範圍字串(1-indexed)
|
|
92
|
+
page_range = f"page-{start_idx + 1:03d}-{end_idx:03d}"
|
|
93
|
+
|
|
94
|
+
chunks.append((chunk_bytes, page_range))
|
|
95
|
+
|
|
96
|
+
except Exception as e:
|
|
97
|
+
print(f"[split_pdf_by_pages] 切割 PDF 時發生錯誤: {e}")
|
|
98
|
+
# 如果切割失敗,回傳整個 PDF 作為單一切片
|
|
99
|
+
if pdf_content:
|
|
100
|
+
chunks.append((pdf_content, "page-001-all"))
|
|
101
|
+
|
|
102
|
+
return chunks
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def calculate_optimal_chunk_size(
|
|
106
|
+
pdf_content: bytes,
|
|
107
|
+
target_size_mb: float = 4.0,
|
|
108
|
+
min_pages: int = 5,
|
|
109
|
+
max_pages: int = 30,
|
|
110
|
+
) -> int:
|
|
111
|
+
"""
|
|
112
|
+
計算最佳切割頁數,確保每個切片小於目標大小
|
|
113
|
+
|
|
114
|
+
策略:
|
|
115
|
+
1. 先估算每頁平均大小
|
|
116
|
+
2. 計算達到目標大小需要的頁數
|
|
117
|
+
3. 限制在 min_pages 和 max_pages 之間
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
pdf_content: PDF 檔案的二進位內容
|
|
121
|
+
target_size_mb: 目標切片大小(MB),預設 4MB
|
|
122
|
+
min_pages: 最小頁數,預設 5 頁
|
|
123
|
+
max_pages: 最大頁數,預設 30 頁
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
int: 建議的每個切片頁數
|
|
127
|
+
"""
|
|
128
|
+
total_size_mb = get_pdf_size_mb(pdf_content)
|
|
129
|
+
total_pages = get_pdf_page_count(pdf_content)
|
|
130
|
+
|
|
131
|
+
if total_pages == 0:
|
|
132
|
+
return min_pages
|
|
133
|
+
|
|
134
|
+
# 估算每頁平均大小
|
|
135
|
+
avg_page_size_mb = total_size_mb / total_pages
|
|
136
|
+
|
|
137
|
+
# 計算達到目標大小需要的頁數
|
|
138
|
+
if avg_page_size_mb > 0:
|
|
139
|
+
optimal_pages = int(target_size_mb / avg_page_size_mb)
|
|
140
|
+
else:
|
|
141
|
+
optimal_pages = max_pages
|
|
142
|
+
|
|
143
|
+
# 限制在範圍內
|
|
144
|
+
optimal_pages = max(min_pages, min(optimal_pages, max_pages))
|
|
145
|
+
|
|
146
|
+
return optimal_pages
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def split_pdf_smart(
|
|
150
|
+
pdf_content: bytes, target_size_mb: float = 4.0
|
|
151
|
+
) -> List[Tuple[bytes, str]]:
|
|
152
|
+
"""
|
|
153
|
+
智慧切割 PDF
|
|
154
|
+
|
|
155
|
+
先計算最佳切割頁數,然後進行切割。
|
|
156
|
+
如果切割後某個切片仍超過目標大小,會進一步分割。
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
pdf_content: PDF 檔案的二進位內容
|
|
160
|
+
target_size_mb: 目標切片大小(MB),預設 4MB
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
List[Tuple[bytes, str]]: 切片清單,每個元素為 (切片內容, 頁碼範圍字串)
|
|
164
|
+
"""
|
|
165
|
+
# 計算最佳切割頁數
|
|
166
|
+
pages_per_chunk = calculate_optimal_chunk_size(pdf_content, target_size_mb)
|
|
167
|
+
print(f"[split_pdf_smart] 計算最佳切割頁數: {pages_per_chunk} 頁/切片")
|
|
168
|
+
|
|
169
|
+
# 進行初步切割
|
|
170
|
+
chunks = split_pdf_by_pages(pdf_content, pages_per_chunk)
|
|
171
|
+
|
|
172
|
+
# 檢查是否有切片超過目標大小,如果有則進一步分割
|
|
173
|
+
final_chunks = []
|
|
174
|
+
for chunk_bytes, page_range in chunks:
|
|
175
|
+
chunk_size_mb = get_pdf_size_mb(chunk_bytes)
|
|
176
|
+
|
|
177
|
+
if chunk_size_mb > target_size_mb and pages_per_chunk > 5:
|
|
178
|
+
# 這個切片太大,需要進一步分割
|
|
179
|
+
print(
|
|
180
|
+
f"[split_pdf_smart] 切片 {page_range} 大小 {chunk_size_mb:.2f}MB "
|
|
181
|
+
f"超過目標 {target_size_mb}MB,進一步分割"
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# 取得這個切片的頁碼範圍
|
|
185
|
+
parts = page_range.replace("page-", "").split("-")
|
|
186
|
+
start_page = int(parts[0])
|
|
187
|
+
|
|
188
|
+
# 用更小的頁數重新切割
|
|
189
|
+
smaller_chunks = split_pdf_by_pages(chunk_bytes, pages_per_chunk // 2)
|
|
190
|
+
|
|
191
|
+
# 更新頁碼範圍
|
|
192
|
+
chunk_page_count = get_pdf_page_count(chunk_bytes)
|
|
193
|
+
for i, (sub_chunk, _) in enumerate(smaller_chunks):
|
|
194
|
+
sub_start = start_page + i * (pages_per_chunk // 2)
|
|
195
|
+
sub_end = min(
|
|
196
|
+
sub_start + (pages_per_chunk // 2) - 1,
|
|
197
|
+
start_page + chunk_page_count - 1,
|
|
198
|
+
)
|
|
199
|
+
sub_range = f"page-{sub_start:03d}-{sub_end:03d}"
|
|
200
|
+
final_chunks.append((sub_chunk, sub_range))
|
|
201
|
+
else:
|
|
202
|
+
final_chunks.append((chunk_bytes, page_range))
|
|
203
|
+
|
|
204
|
+
return final_chunks
|
|
@@ -15,7 +15,9 @@ from langchain_core.runnables import RunnableConfig
|
|
|
15
15
|
|
|
16
16
|
# Import necessary dependencies
|
|
17
17
|
from botrun_flow_lang.models.nodes.utils import scrape_single_url
|
|
18
|
-
from botrun_flow_lang.langgraph_agents.agents.util.pdf_analyzer import
|
|
18
|
+
from botrun_flow_lang.langgraph_agents.agents.util.pdf_analyzer import (
|
|
19
|
+
analyze_pdf_async,
|
|
20
|
+
)
|
|
19
21
|
from botrun_flow_lang.langgraph_agents.agents.util.img_util import analyze_imgs
|
|
20
22
|
from botrun_flow_lang.langgraph_agents.agents.util.local_files import (
|
|
21
23
|
upload_and_get_tmp_public_url,
|
|
@@ -76,6 +78,10 @@ async def chat_with_pdf(
|
|
|
76
78
|
"""
|
|
77
79
|
Analyze a PDF file and answer questions about its content.
|
|
78
80
|
|
|
81
|
+
Supports intelligent processing based on file size:
|
|
82
|
+
- Small files (< 5MB): Direct multimodal analysis
|
|
83
|
+
- Large files (>= 5MB): Compress -> Split -> Parallel multimodal Q&A -> Merge results
|
|
84
|
+
|
|
79
85
|
Args:
|
|
80
86
|
pdf_url: The URL to the PDF file (can be generated using generate_tmp_public_url for local files)
|
|
81
87
|
user_input: The user's question or instruction about the PDF content
|
|
@@ -91,7 +97,7 @@ async def chat_with_pdf(
|
|
|
91
97
|
if not pdf_url.startswith("http"):
|
|
92
98
|
pdf_url = upload_and_get_tmp_public_url(pdf_url, botrun_flow_lang_url, user_id)
|
|
93
99
|
|
|
94
|
-
return
|
|
100
|
+
return await analyze_pdf_async(pdf_url, user_input)
|
|
95
101
|
|
|
96
102
|
|
|
97
103
|
@mcp.tool()
|
|
@@ -175,10 +181,10 @@ async def generate_image(
|
|
|
175
181
|
# 驗證必要參數
|
|
176
182
|
if not user_id:
|
|
177
183
|
logger.error("User ID not available")
|
|
178
|
-
|
|
184
|
+
return "User ID not available"
|
|
179
185
|
if not botrun_flow_lang_url:
|
|
180
186
|
logger.error("botrun_flow_lang_url not available")
|
|
181
|
-
|
|
187
|
+
return "botrun_flow_lang_url not available"
|
|
182
188
|
|
|
183
189
|
# Check rate limit before generating image
|
|
184
190
|
rate_limit_client = RateLimitClient()
|
|
@@ -195,10 +201,12 @@ async def generate_image(
|
|
|
195
201
|
f"User {user_id} has reached daily limit of {daily_limit} image generations. "
|
|
196
202
|
f"Current usage: {current_usage}. Please try again tomorrow."
|
|
197
203
|
)
|
|
198
|
-
|
|
199
|
-
f"You have reached your daily limit of {daily_limit} image generations. "
|
|
204
|
+
return f"[Please tell user error] You have reached your daily limit of {daily_limit} image generations. " \
|
|
200
205
|
f"Current usage: {current_usage}. Please try again tomorrow."
|
|
201
|
-
|
|
206
|
+
# raise BotrunRateLimitException(
|
|
207
|
+
# f"You have reached your daily limit of {daily_limit} image generations. "
|
|
208
|
+
# f"Current usage: {current_usage}. Please try again tomorrow."
|
|
209
|
+
# )
|
|
202
210
|
|
|
203
211
|
# 2. 使用 DALL-E 生成圖片
|
|
204
212
|
dalle_wrapper = DallEAPIWrapper(
|
|
@@ -267,7 +275,8 @@ async def generate_tmp_public_url(
|
|
|
267
275
|
logger.info(f"generate_tmp_public_url file_path: {file_path}")
|
|
268
276
|
|
|
269
277
|
if not os.path.exists(file_path):
|
|
270
|
-
|
|
278
|
+
return f"File not found: {file_path}"
|
|
279
|
+
# raise FileNotFoundError(f"File not found: {file_path}")
|
|
271
280
|
|
|
272
281
|
return upload_and_get_tmp_public_url(file_path, botrun_flow_lang_url, user_id)
|
|
273
282
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Union, List, Tuple
|
|
3
|
+
from datetime import datetime, timezone
|
|
3
4
|
from google.cloud.exceptions import GoogleCloudError
|
|
4
5
|
from botrun_flow_lang.constants import HATCH_SHARING_STORE_NAME, HATCH_STORE_NAME
|
|
5
6
|
from botrun_flow_lang.services.base.firestore_base import FirestoreBase
|
|
@@ -27,6 +28,9 @@ class HatchFsStore(FirestoreBase):
|
|
|
27
28
|
|
|
28
29
|
async def set_hatch(self, item: Hatch):
|
|
29
30
|
try:
|
|
31
|
+
# Update updated_at timestamp with current UTC time
|
|
32
|
+
item.updated_at = datetime.now(timezone.utc).isoformat()
|
|
33
|
+
|
|
30
34
|
# Proceed with saving the hatch
|
|
31
35
|
doc_ref = self.collection.document(str(item.id))
|
|
32
36
|
doc_ref.set(item.model_dump())
|
|
@@ -49,18 +53,29 @@ class HatchFsStore(FirestoreBase):
|
|
|
49
53
|
return False
|
|
50
54
|
|
|
51
55
|
async def get_hatches(
|
|
52
|
-
self,
|
|
56
|
+
self,
|
|
57
|
+
user_id: str,
|
|
58
|
+
offset: int = 0,
|
|
59
|
+
limit: int = 20,
|
|
60
|
+
sort_by: str = "updated_at",
|
|
61
|
+
order: str = "desc",
|
|
53
62
|
) -> Tuple[List[Hatch], str]:
|
|
54
63
|
try:
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
)
|
|
59
|
-
.order_by("name")
|
|
60
|
-
.offset(offset)
|
|
61
|
-
.limit(limit)
|
|
64
|
+
# Build base query
|
|
65
|
+
query = self.collection.where(
|
|
66
|
+
filter=firestore.FieldFilter("user_id", "==", user_id)
|
|
62
67
|
)
|
|
63
68
|
|
|
69
|
+
# Add sorting
|
|
70
|
+
# Firestore direction: DESCENDING or ASCENDING
|
|
71
|
+
direction = (
|
|
72
|
+
firestore.Query.DESCENDING if order == "desc" else firestore.Query.ASCENDING
|
|
73
|
+
)
|
|
74
|
+
query = query.order_by(sort_by, direction=direction)
|
|
75
|
+
|
|
76
|
+
# Add pagination
|
|
77
|
+
query = query.offset(offset).limit(limit)
|
|
78
|
+
|
|
64
79
|
docs = query.stream()
|
|
65
80
|
hatches = [Hatch(**doc.to_dict()) for doc in docs]
|
|
66
81
|
return hatches, ""
|
|
@@ -39,7 +39,11 @@ class StorageCsStore(StorageStore):
|
|
|
39
39
|
{
|
|
40
40
|
"action": {"type": "Delete"},
|
|
41
41
|
"condition": {"age": 365, "matchesPrefix": ["tmp/"]},
|
|
42
|
-
}
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
"action": {"type": "Delete"},
|
|
45
|
+
"condition": {"age": 7, "matchesPrefix": ["pdf-cache/"]},
|
|
46
|
+
},
|
|
43
47
|
]
|
|
44
48
|
|
|
45
49
|
if not bucket.exists():
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: botrun-flow-lang
|
|
3
|
-
Version: 5.
|
|
3
|
+
Version: 5.12.261
|
|
4
4
|
Summary: A flow language for botrun
|
|
5
5
|
Author-email: sebastian-hsu <sebastian.hsu@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -13,7 +13,7 @@ Requires-Python: <3.13,>=3.11
|
|
|
13
13
|
Requires-Dist: aiohttp>=3.10.8
|
|
14
14
|
Requires-Dist: anthropic>=0.61.0
|
|
15
15
|
Requires-Dist: boto3>=1.40.3
|
|
16
|
-
Requires-Dist: botrun-hatch>=5.
|
|
16
|
+
Requires-Dist: botrun-hatch>=5.12.261
|
|
17
17
|
Requires-Dist: botrun-log>=0.3.0
|
|
18
18
|
Requires-Dist: cachetools>=5.5.2
|
|
19
19
|
Requires-Dist: chardet>=5.2.0
|
|
@@ -47,6 +47,7 @@ Requires-Dist: pandas>=2.2.3
|
|
|
47
47
|
Requires-Dist: pdfminer-six==20250506
|
|
48
48
|
Requires-Dist: plotly>=6.0.0
|
|
49
49
|
Requires-Dist: pydantic-settings>=2.5.2
|
|
50
|
+
Requires-Dist: pypdf==6.4.2
|
|
50
51
|
Requires-Dist: python-multipart>=0.0.20
|
|
51
52
|
Requires-Dist: pytz>=2024.2
|
|
52
53
|
Requires-Dist: pyyaml>=6.0.2
|
|
@@ -8,9 +8,9 @@ botrun_flow_lang/api/auth_api.py,sha256=o_ThrZFcOMQieUcUJIF_B7rsyvbkCvOCjCjknl9G
|
|
|
8
8
|
botrun_flow_lang/api/auth_utils.py,sha256=qE7RIPDnX30FPmhlgmlQNoVNkLU028x4SldVl6VC4KQ,6455
|
|
9
9
|
botrun_flow_lang/api/botrun_back_api.py,sha256=mE2NSejaYIiE0L9GmNJbLc_FRWCy6BXlcqRwkB1kKmc,2397
|
|
10
10
|
botrun_flow_lang/api/flow_api.py,sha256=DcxuoGE1OcbTgLSYKZ2SO9IdcH3UB5Ik3cVmX3v3-Po,108
|
|
11
|
-
botrun_flow_lang/api/hatch_api.py,sha256=
|
|
11
|
+
botrun_flow_lang/api/hatch_api.py,sha256=trenrAJt95ufjpAzwuTvcCoJdMR2x4EZCuWBup9e4hA,16984
|
|
12
12
|
botrun_flow_lang/api/langgraph_api.py,sha256=zqu0xeTiy2Pr4UL6vvGqVVAy2KX3ZUn1uzcq-Tfb_aM,29291
|
|
13
|
-
botrun_flow_lang/api/line_bot_api.py,sha256=
|
|
13
|
+
botrun_flow_lang/api/line_bot_api.py,sha256=INTWnI09LhRhQ6NcetHrHAQZU86eyoJ2zZ6g-KtX8MM,55500
|
|
14
14
|
botrun_flow_lang/api/model_api.py,sha256=bXemey_XUUdylZwh7Z10eksoBWe9xSa8I9TEL7jIBtE,9483
|
|
15
15
|
botrun_flow_lang/api/rate_limit_api.py,sha256=SkpjfvShHRdP5XJzy3DdrH4jLtdYAEHROGBMBkC9OIY,948
|
|
16
16
|
botrun_flow_lang/api/routes.py,sha256=rd0IoMsteJT9BO3MQuyXirhPQbas6OeiKaEC8Yf2SZs,1570
|
|
@@ -23,7 +23,7 @@ botrun_flow_lang/api/version_api.py,sha256=Mcs7hKBP7T7nlHDaZS4U0dtOkNQqW0BtT62Iv
|
|
|
23
23
|
botrun_flow_lang/api/youtube_api.py,sha256=R384jNRheMKnDyzvlLnbzackipZhiLYTZl4w4hB6vtw,753
|
|
24
24
|
botrun_flow_lang/langgraph_agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
25
|
botrun_flow_lang/langgraph_agents/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
|
-
botrun_flow_lang/langgraph_agents/agents/agent_runner.py,sha256=
|
|
26
|
+
botrun_flow_lang/langgraph_agents/agents/agent_runner.py,sha256=1BRIbEi7_NoIx1BuVUPFA-fDHErm3nHpX65HsY7YpWU,6495
|
|
27
27
|
botrun_flow_lang/langgraph_agents/agents/langgraph_react_agent.py,sha256=4fK_hMoUAqcEYv7rrHbAx6PFsJ7UcvGI0G2OgWhVhnw,29972
|
|
28
28
|
botrun_flow_lang/langgraph_agents/agents/search_agent_graph.py,sha256=6fz-ewLQGacEx-uqGfF3-go9FdiioiMzW_sfANzYTcI,31182
|
|
29
29
|
botrun_flow_lang/langgraph_agents/agents/agent_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -44,7 +44,9 @@ botrun_flow_lang/langgraph_agents/agents/util/img_util.py,sha256=FF5XUFCtYBul6DZ
|
|
|
44
44
|
botrun_flow_lang/langgraph_agents/agents/util/local_files.py,sha256=ib3JVvuTgKhFj-D_8d5MH3vImFe6Fk1aMxyxNqjDWWc,13110
|
|
45
45
|
botrun_flow_lang/langgraph_agents/agents/util/mermaid_util.py,sha256=Aw_ufAEBAqSENOtueemYtapxPVvbQ6HScedeWUZ8lS4,2556
|
|
46
46
|
botrun_flow_lang/langgraph_agents/agents/util/model_utils.py,sha256=lCORhM77agNgmDxsA40XlUth-E8ThM5Kv-x0_DQnYrs,4811
|
|
47
|
-
botrun_flow_lang/langgraph_agents/agents/util/pdf_analyzer.py,sha256=
|
|
47
|
+
botrun_flow_lang/langgraph_agents/agents/util/pdf_analyzer.py,sha256=R50ttYF9uun9aawjFzyFlTrXbM03HNbc-yanILdyRtM,15691
|
|
48
|
+
botrun_flow_lang/langgraph_agents/agents/util/pdf_cache.py,sha256=ptWaNtu_ls1piiBqQG2ZISjxWJ4gxMbIs7hvBdeas5k,7125
|
|
49
|
+
botrun_flow_lang/langgraph_agents/agents/util/pdf_processor.py,sha256=OgaloAzjoMmdymM3O1EiES7Ji6qqZtZPQmDT6HwmnYM,6040
|
|
48
50
|
botrun_flow_lang/langgraph_agents/agents/util/perplexity_search.py,sha256=4ynmvT8yXbi4KFEVliXWffozB6fHd8jVueA3MmGKlD8,19073
|
|
49
51
|
botrun_flow_lang/langgraph_agents/agents/util/plotly_util.py,sha256=8xKoj9ABwqCA8dzWFmw-qXCXStgCx6hMc5mVBBJ3ZGw,1913
|
|
50
52
|
botrun_flow_lang/langgraph_agents/agents/util/tavily_search.py,sha256=hhiuxF6u92X8Mz1VLAreFrB-LyR-UOKR9Xxh1TBveTg,6623
|
|
@@ -56,7 +58,7 @@ botrun_flow_lang/llm_agent/llm_agent.py,sha256=Ae9YCK0R_zvSU38Oewo7_qbnBMYTaero6
|
|
|
56
58
|
botrun_flow_lang/llm_agent/llm_agent_util.py,sha256=1slGk7LIYUylRjHvw4d92VAxD16uwiK4Efy0m_7tCJs,3048
|
|
57
59
|
botrun_flow_lang/log/.gitignore,sha256=ZZ3Viy7hKc5dYybO_EM74DBeCqrwL7vmnECmsw--nVc,16
|
|
58
60
|
botrun_flow_lang/mcp_server/__init__.py,sha256=P02XN_G9ALIeNgPwIsLPdKjGO7lk_XYOxME6NFrBrzE,208
|
|
59
|
-
botrun_flow_lang/mcp_server/default_mcp.py,sha256=
|
|
61
|
+
botrun_flow_lang/mcp_server/default_mcp.py,sha256=P38E6p6s9yT2U33hxlOMXG0iioUQ-JgJze-9zmlu5mY,28490
|
|
60
62
|
botrun_flow_lang/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
61
63
|
botrun_flow_lang/models/token_usage.py,sha256=hwGYZzklnTrcDCjNqCn03kddzS3VH-i1l0d5WnF6iqA,842
|
|
62
64
|
botrun_flow_lang/models/nodes/utils.py,sha256=kN9RFOSHPvOZU7T4ZfVU6u2wkKgS03xtAKvjLOpryKM,6699
|
|
@@ -64,9 +66,9 @@ botrun_flow_lang/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
|
|
|
64
66
|
botrun_flow_lang/services/base/firestore_base.py,sha256=Z561TzGvYOUmGKc3IQh03nsK8XHCa96Nlx5m23TySks,1045
|
|
65
67
|
botrun_flow_lang/services/hatch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
68
|
botrun_flow_lang/services/hatch/hatch_factory.py,sha256=ObCcb_hTp8YFXRNXOsXtI_dND_dz7cusypF2tDnMqYY,271
|
|
67
|
-
botrun_flow_lang/services/hatch/hatch_fs_store.py,sha256=
|
|
69
|
+
botrun_flow_lang/services/hatch/hatch_fs_store.py,sha256=2wmRlFInjCD1RRRHVfX47xX74vih4zJBhfRcy5Uk7ew,13620
|
|
68
70
|
botrun_flow_lang/services/storage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
69
|
-
botrun_flow_lang/services/storage/storage_cs_store.py,sha256=
|
|
71
|
+
botrun_flow_lang/services/storage/storage_cs_store.py,sha256=5NQekxfzjH6420QHi6HxLFHM5qY9kfiYis-dmsyuHO4,7896
|
|
70
72
|
botrun_flow_lang/services/storage/storage_factory.py,sha256=Yn40nB79qoEvClksIRnRpQGojXT4J4q1ExBqb3ydets,354
|
|
71
73
|
botrun_flow_lang/services/storage/storage_store.py,sha256=cb31kDJHNqVA4HyiyJJ1Pnyqv1n5nOkHMpPpA8tvgXg,1905
|
|
72
74
|
botrun_flow_lang/services/user_setting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -95,6 +97,6 @@ botrun_flow_lang/utils/yaml_utils.py,sha256=1A6PSEE8TM0HSD_6l-fhUsjYnXJcrEKuPgot
|
|
|
95
97
|
botrun_flow_lang/utils/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
96
98
|
botrun_flow_lang/utils/clients/rate_limit_client.py,sha256=TRpA56OKrfYsoLoJ-TPYlC7Znp9s267-u6CX6BLyVko,8349
|
|
97
99
|
botrun_flow_lang/utils/clients/token_verify_client.py,sha256=BtrfLvMe-DtS8UKeDhaIkVKDZHphZVP7kyqXn9jhXEc,5740
|
|
98
|
-
botrun_flow_lang-5.
|
|
99
|
-
botrun_flow_lang-5.
|
|
100
|
-
botrun_flow_lang-5.
|
|
100
|
+
botrun_flow_lang-5.12.261.dist-info/METADATA,sha256=vtLwjT6C29pqio3IV9gFQeWSuVexlZuCs_bjkUmCw4s,6221
|
|
101
|
+
botrun_flow_lang-5.12.261.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
102
|
+
botrun_flow_lang-5.12.261.dist-info/RECORD,,
|
|
File without changes
|