botrun-flow-lang 5.12.263__py3-none-any.whl → 5.12.264__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- botrun_flow_lang/api/auth_api.py +39 -39
- botrun_flow_lang/api/auth_utils.py +183 -183
- botrun_flow_lang/api/botrun_back_api.py +65 -65
- botrun_flow_lang/api/flow_api.py +3 -3
- botrun_flow_lang/api/hatch_api.py +508 -508
- botrun_flow_lang/api/langgraph_api.py +811 -811
- botrun_flow_lang/api/line_bot_api.py +1484 -1484
- botrun_flow_lang/api/model_api.py +300 -300
- botrun_flow_lang/api/rate_limit_api.py +32 -32
- botrun_flow_lang/api/routes.py +79 -79
- botrun_flow_lang/api/search_api.py +53 -53
- botrun_flow_lang/api/storage_api.py +395 -395
- botrun_flow_lang/api/subsidy_api.py +290 -290
- botrun_flow_lang/api/subsidy_api_system_prompt.txt +109 -109
- botrun_flow_lang/api/user_setting_api.py +70 -70
- botrun_flow_lang/api/version_api.py +31 -31
- botrun_flow_lang/api/youtube_api.py +26 -26
- botrun_flow_lang/constants.py +13 -13
- botrun_flow_lang/langgraph_agents/agents/agent_runner.py +178 -178
- botrun_flow_lang/langgraph_agents/agents/agent_tools/step_planner.py +77 -77
- botrun_flow_lang/langgraph_agents/agents/checkpointer/firestore_checkpointer.py +666 -666
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/GOV_RESEARCHER_PRD.md +192 -192
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/gemini_subsidy_graph.py +460 -460
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_2_graph.py +1002 -1002
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_graph.py +822 -822
- botrun_flow_lang/langgraph_agents/agents/langgraph_react_agent.py +723 -723
- botrun_flow_lang/langgraph_agents/agents/search_agent_graph.py +864 -864
- botrun_flow_lang/langgraph_agents/agents/tools/__init__.py +4 -4
- botrun_flow_lang/langgraph_agents/agents/tools/gemini_code_execution.py +376 -376
- botrun_flow_lang/langgraph_agents/agents/util/gemini_grounding.py +66 -66
- botrun_flow_lang/langgraph_agents/agents/util/html_util.py +316 -316
- botrun_flow_lang/langgraph_agents/agents/util/img_util.py +294 -294
- botrun_flow_lang/langgraph_agents/agents/util/local_files.py +419 -419
- botrun_flow_lang/langgraph_agents/agents/util/mermaid_util.py +86 -86
- botrun_flow_lang/langgraph_agents/agents/util/model_utils.py +143 -143
- botrun_flow_lang/langgraph_agents/agents/util/pdf_analyzer.py +486 -486
- botrun_flow_lang/langgraph_agents/agents/util/pdf_cache.py +250 -250
- botrun_flow_lang/langgraph_agents/agents/util/pdf_processor.py +204 -204
- botrun_flow_lang/langgraph_agents/agents/util/perplexity_search.py +464 -464
- botrun_flow_lang/langgraph_agents/agents/util/plotly_util.py +59 -59
- botrun_flow_lang/langgraph_agents/agents/util/tavily_search.py +199 -199
- botrun_flow_lang/langgraph_agents/agents/util/youtube_util.py +90 -90
- botrun_flow_lang/langgraph_agents/cache/langgraph_botrun_cache.py +197 -197
- botrun_flow_lang/llm_agent/llm_agent.py +19 -19
- botrun_flow_lang/llm_agent/llm_agent_util.py +83 -83
- botrun_flow_lang/log/.gitignore +2 -2
- botrun_flow_lang/main.py +61 -61
- botrun_flow_lang/main_fast.py +51 -51
- botrun_flow_lang/mcp_server/__init__.py +10 -10
- botrun_flow_lang/mcp_server/default_mcp.py +744 -744
- botrun_flow_lang/models/nodes/utils.py +205 -205
- botrun_flow_lang/models/token_usage.py +34 -34
- botrun_flow_lang/requirements.txt +21 -21
- botrun_flow_lang/services/base/firestore_base.py +30 -30
- botrun_flow_lang/services/hatch/hatch_factory.py +11 -11
- botrun_flow_lang/services/hatch/hatch_fs_store.py +419 -419
- botrun_flow_lang/services/storage/storage_cs_store.py +206 -206
- botrun_flow_lang/services/storage/storage_factory.py +12 -12
- botrun_flow_lang/services/storage/storage_store.py +65 -65
- botrun_flow_lang/services/user_setting/user_setting_factory.py +9 -9
- botrun_flow_lang/services/user_setting/user_setting_fs_store.py +66 -66
- botrun_flow_lang/static/docs/tools/index.html +926 -926
- botrun_flow_lang/tests/api_functional_tests.py +1525 -1525
- botrun_flow_lang/tests/api_stress_test.py +357 -357
- botrun_flow_lang/tests/shared_hatch_tests.py +333 -333
- botrun_flow_lang/tests/test_botrun_app.py +46 -46
- botrun_flow_lang/tests/test_html_util.py +31 -31
- botrun_flow_lang/tests/test_img_analyzer.py +190 -190
- botrun_flow_lang/tests/test_img_util.py +39 -39
- botrun_flow_lang/tests/test_local_files.py +114 -114
- botrun_flow_lang/tests/test_mermaid_util.py +103 -103
- botrun_flow_lang/tests/test_pdf_analyzer.py +104 -104
- botrun_flow_lang/tests/test_plotly_util.py +151 -151
- botrun_flow_lang/tests/test_run_workflow_engine.py +65 -65
- botrun_flow_lang/tools/generate_docs.py +133 -133
- botrun_flow_lang/tools/templates/tools.html +153 -153
- botrun_flow_lang/utils/__init__.py +7 -7
- botrun_flow_lang/utils/botrun_logger.py +344 -344
- botrun_flow_lang/utils/clients/rate_limit_client.py +209 -209
- botrun_flow_lang/utils/clients/token_verify_client.py +153 -153
- botrun_flow_lang/utils/google_drive_utils.py +654 -654
- botrun_flow_lang/utils/langchain_utils.py +324 -324
- botrun_flow_lang/utils/yaml_utils.py +9 -9
- {botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-5.12.264.dist-info}/METADATA +1 -1
- botrun_flow_lang-5.12.264.dist-info/RECORD +102 -0
- botrun_flow_lang-5.12.263.dist-info/RECORD +0 -102
- {botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-5.12.264.dist-info}/WHEEL +0 -0
|
@@ -1,250 +1,250 @@
|
|
|
1
|
-
"""
|
|
2
|
-
PDF 快取模組
|
|
3
|
-
|
|
4
|
-
提供 PDF 切片的 GCS 快取功能,避免重複切割相同的 PDF 檔案。
|
|
5
|
-
快取會自動在 7 天後過期(透過 GCS Lifecycle Rule)。
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import hashlib
|
|
9
|
-
import json
|
|
10
|
-
from io import BytesIO
|
|
11
|
-
from typing import List, Tuple, Optional
|
|
12
|
-
from datetime import datetime
|
|
13
|
-
|
|
14
|
-
from botrun_flow_lang.services.storage.storage_factory import storage_store_factory
|
|
15
|
-
|
|
16
|
-
# 快取目錄前綴
|
|
17
|
-
PDF_CACHE_PREFIX = "pdf-cache"
|
|
18
|
-
|
|
19
|
-
# 快取過期天數(用於 lifecycle rule)
|
|
20
|
-
PDF_CACHE_EXPIRY_DAYS = 7
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def get_cache_key(pdf_url: str) -> str:
|
|
24
|
-
"""
|
|
25
|
-
根據 PDF URL 產生快取 key(hash)
|
|
26
|
-
|
|
27
|
-
Args:
|
|
28
|
-
pdf_url: PDF 檔案的 URL
|
|
29
|
-
|
|
30
|
-
Returns:
|
|
31
|
-
str: 32 字元的 MD5 hash
|
|
32
|
-
"""
|
|
33
|
-
return hashlib.md5(pdf_url.encode()).hexdigest()
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def _get_cache_path(cache_key: str) -> str:
|
|
37
|
-
"""
|
|
38
|
-
取得快取目錄路徑
|
|
39
|
-
|
|
40
|
-
Args:
|
|
41
|
-
cache_key: 快取 key
|
|
42
|
-
|
|
43
|
-
Returns:
|
|
44
|
-
str: GCS 路徑,格式為 "pdf-cache/{cache_key}"
|
|
45
|
-
"""
|
|
46
|
-
return f"{PDF_CACHE_PREFIX}/{cache_key}"
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def _get_metadata_path(cache_key: str) -> str:
|
|
50
|
-
"""取得 metadata 檔案路徑"""
|
|
51
|
-
return f"{_get_cache_path(cache_key)}/metadata.json"
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def _get_chunk_path(cache_key: str, chunk_index: int) -> str:
|
|
55
|
-
"""取得切片檔案路徑"""
|
|
56
|
-
return f"{_get_cache_path(cache_key)}/chunk-{chunk_index:03d}.pdf"
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
async def check_cache(cache_key: str) -> Optional[List[Tuple[bytes, str]]]:
|
|
60
|
-
"""
|
|
61
|
-
檢查 GCS 是否有快取
|
|
62
|
-
|
|
63
|
-
Args:
|
|
64
|
-
cache_key: 快取 key(來自 get_cache_key)
|
|
65
|
-
|
|
66
|
-
Returns:
|
|
67
|
-
Optional[List[Tuple[bytes, str]]]: 如果有快取,回傳切片清單;否則回傳 None
|
|
68
|
-
"""
|
|
69
|
-
try:
|
|
70
|
-
storage = storage_store_factory()
|
|
71
|
-
metadata_path = _get_metadata_path(cache_key)
|
|
72
|
-
|
|
73
|
-
# 檢查 metadata 檔案是否存在
|
|
74
|
-
if not await storage.file_exists(metadata_path):
|
|
75
|
-
print(f"[pdf_cache] 快取不存在: {cache_key}")
|
|
76
|
-
return None
|
|
77
|
-
|
|
78
|
-
# 讀取 metadata
|
|
79
|
-
metadata_file = await storage.retrieve_file(metadata_path)
|
|
80
|
-
if not metadata_file:
|
|
81
|
-
print(f"[pdf_cache] 無法讀取 metadata: {cache_key}")
|
|
82
|
-
return None
|
|
83
|
-
|
|
84
|
-
metadata = json.loads(metadata_file.getvalue().decode("utf-8"))
|
|
85
|
-
chunk_count = metadata.get("chunk_count", 0)
|
|
86
|
-
page_ranges = metadata.get("page_ranges", [])
|
|
87
|
-
|
|
88
|
-
if chunk_count == 0:
|
|
89
|
-
print(f"[pdf_cache] 快取無切片: {cache_key}")
|
|
90
|
-
return None
|
|
91
|
-
|
|
92
|
-
print(f"[pdf_cache] 找到快取: {cache_key}, {chunk_count} 個切片")
|
|
93
|
-
|
|
94
|
-
# 讀取所有切片
|
|
95
|
-
chunks = []
|
|
96
|
-
for i in range(chunk_count):
|
|
97
|
-
chunk_path = _get_chunk_path(cache_key, i)
|
|
98
|
-
chunk_file = await storage.retrieve_file(chunk_path)
|
|
99
|
-
|
|
100
|
-
if not chunk_file:
|
|
101
|
-
print(f"[pdf_cache] 無法讀取切片 {i}: {cache_key}")
|
|
102
|
-
return None # 快取不完整,放棄使用
|
|
103
|
-
|
|
104
|
-
chunk_bytes = chunk_file.getvalue()
|
|
105
|
-
page_range = page_ranges[i] if i < len(page_ranges) else f"chunk-{i:03d}"
|
|
106
|
-
chunks.append((chunk_bytes, page_range))
|
|
107
|
-
|
|
108
|
-
print(f"[pdf_cache] 成功載入快取: {cache_key}")
|
|
109
|
-
return chunks
|
|
110
|
-
|
|
111
|
-
except Exception as e:
|
|
112
|
-
print(f"[pdf_cache] 檢查快取時發生錯誤: {e}")
|
|
113
|
-
return None
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
async def save_to_cache(
|
|
117
|
-
cache_key: str,
|
|
118
|
-
chunks: List[Tuple[bytes, str]],
|
|
119
|
-
original_url: str,
|
|
120
|
-
original_size_mb: float,
|
|
121
|
-
total_pages: int,
|
|
122
|
-
) -> bool:
|
|
123
|
-
"""
|
|
124
|
-
將切片存入 GCS 快取
|
|
125
|
-
|
|
126
|
-
Args:
|
|
127
|
-
cache_key: 快取 key
|
|
128
|
-
chunks: 切片清單 [(chunk_bytes, page_range), ...]
|
|
129
|
-
original_url: 原始 PDF URL
|
|
130
|
-
original_size_mb: 原始檔案大小(MB)
|
|
131
|
-
total_pages: 總頁數
|
|
132
|
-
|
|
133
|
-
Returns:
|
|
134
|
-
bool: 是否成功存入快取
|
|
135
|
-
"""
|
|
136
|
-
try:
|
|
137
|
-
storage = storage_store_factory()
|
|
138
|
-
|
|
139
|
-
# 1. 存入所有切片
|
|
140
|
-
page_ranges = []
|
|
141
|
-
for i, (chunk_bytes, page_range) in enumerate(chunks):
|
|
142
|
-
chunk_path = _get_chunk_path(cache_key, i)
|
|
143
|
-
chunk_file = BytesIO(chunk_bytes)
|
|
144
|
-
|
|
145
|
-
success, _ = await storage.store_file(
|
|
146
|
-
chunk_path, chunk_file, public=False, content_type="application/pdf"
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
if not success:
|
|
150
|
-
print(f"[pdf_cache] 無法存入切片 {i}: {cache_key}")
|
|
151
|
-
return False
|
|
152
|
-
|
|
153
|
-
page_ranges.append(page_range)
|
|
154
|
-
|
|
155
|
-
# 2. 存入 metadata
|
|
156
|
-
metadata = {
|
|
157
|
-
"original_url": original_url,
|
|
158
|
-
"cache_key": cache_key,
|
|
159
|
-
"chunk_count": len(chunks),
|
|
160
|
-
"page_ranges": page_ranges,
|
|
161
|
-
"original_size_mb": original_size_mb,
|
|
162
|
-
"total_pages": total_pages,
|
|
163
|
-
"created_at": datetime.utcnow().isoformat(),
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
metadata_path = _get_metadata_path(cache_key)
|
|
167
|
-
metadata_file = BytesIO(json.dumps(metadata, ensure_ascii=False).encode("utf-8"))
|
|
168
|
-
|
|
169
|
-
success, _ = await storage.store_file(
|
|
170
|
-
metadata_path, metadata_file, public=False, content_type="application/json"
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
if not success:
|
|
174
|
-
print(f"[pdf_cache] 無法存入 metadata: {cache_key}")
|
|
175
|
-
return False
|
|
176
|
-
|
|
177
|
-
print(
|
|
178
|
-
f"[pdf_cache] 成功存入快取: {cache_key}, "
|
|
179
|
-
f"{len(chunks)} 個切片, {total_pages} 頁"
|
|
180
|
-
)
|
|
181
|
-
return True
|
|
182
|
-
|
|
183
|
-
except Exception as e:
|
|
184
|
-
print(f"[pdf_cache] 存入快取時發生錯誤: {e}")
|
|
185
|
-
return False
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
async def get_cache_metadata(cache_key: str) -> Optional[dict]:
|
|
189
|
-
"""
|
|
190
|
-
取得快取的 metadata(不載入切片內容)
|
|
191
|
-
|
|
192
|
-
Args:
|
|
193
|
-
cache_key: 快取 key
|
|
194
|
-
|
|
195
|
-
Returns:
|
|
196
|
-
Optional[dict]: metadata 字典,或 None
|
|
197
|
-
"""
|
|
198
|
-
try:
|
|
199
|
-
storage = storage_store_factory()
|
|
200
|
-
metadata_path = _get_metadata_path(cache_key)
|
|
201
|
-
|
|
202
|
-
if not await storage.file_exists(metadata_path):
|
|
203
|
-
return None
|
|
204
|
-
|
|
205
|
-
metadata_file = await storage.retrieve_file(metadata_path)
|
|
206
|
-
if not metadata_file:
|
|
207
|
-
return None
|
|
208
|
-
|
|
209
|
-
return json.loads(metadata_file.getvalue().decode("utf-8"))
|
|
210
|
-
|
|
211
|
-
except Exception as e:
|
|
212
|
-
print(f"[pdf_cache] 讀取 metadata 時發生錯誤: {e}")
|
|
213
|
-
return None
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
async def delete_cache(cache_key: str) -> bool:
|
|
217
|
-
"""
|
|
218
|
-
刪除快取
|
|
219
|
-
|
|
220
|
-
Args:
|
|
221
|
-
cache_key: 快取 key
|
|
222
|
-
|
|
223
|
-
Returns:
|
|
224
|
-
bool: 是否成功刪除
|
|
225
|
-
"""
|
|
226
|
-
try:
|
|
227
|
-
storage = storage_store_factory()
|
|
228
|
-
|
|
229
|
-
# 先讀取 metadata 取得切片數量
|
|
230
|
-
metadata = await get_cache_metadata(cache_key)
|
|
231
|
-
if not metadata:
|
|
232
|
-
return True # 快取不存在,視為成功
|
|
233
|
-
|
|
234
|
-
chunk_count = metadata.get("chunk_count", 0)
|
|
235
|
-
|
|
236
|
-
# 刪除所有切片
|
|
237
|
-
for i in range(chunk_count):
|
|
238
|
-
chunk_path = _get_chunk_path(cache_key, i)
|
|
239
|
-
await storage.delete_file(chunk_path)
|
|
240
|
-
|
|
241
|
-
# 刪除 metadata
|
|
242
|
-
metadata_path = _get_metadata_path(cache_key)
|
|
243
|
-
await storage.delete_file(metadata_path)
|
|
244
|
-
|
|
245
|
-
print(f"[pdf_cache] 已刪除快取: {cache_key}")
|
|
246
|
-
return True
|
|
247
|
-
|
|
248
|
-
except Exception as e:
|
|
249
|
-
print(f"[pdf_cache] 刪除快取時發生錯誤: {e}")
|
|
250
|
-
return False
|
|
1
|
+
"""
|
|
2
|
+
PDF 快取模組
|
|
3
|
+
|
|
4
|
+
提供 PDF 切片的 GCS 快取功能,避免重複切割相同的 PDF 檔案。
|
|
5
|
+
快取會自動在 7 天後過期(透過 GCS Lifecycle Rule)。
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import hashlib
|
|
9
|
+
import json
|
|
10
|
+
from io import BytesIO
|
|
11
|
+
from typing import List, Tuple, Optional
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
|
|
14
|
+
from botrun_flow_lang.services.storage.storage_factory import storage_store_factory
|
|
15
|
+
|
|
16
|
+
# 快取目錄前綴
|
|
17
|
+
PDF_CACHE_PREFIX = "pdf-cache"
|
|
18
|
+
|
|
19
|
+
# 快取過期天數(用於 lifecycle rule)
|
|
20
|
+
PDF_CACHE_EXPIRY_DAYS = 7
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_cache_key(pdf_url: str) -> str:
|
|
24
|
+
"""
|
|
25
|
+
根據 PDF URL 產生快取 key(hash)
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
pdf_url: PDF 檔案的 URL
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
str: 32 字元的 MD5 hash
|
|
32
|
+
"""
|
|
33
|
+
return hashlib.md5(pdf_url.encode()).hexdigest()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _get_cache_path(cache_key: str) -> str:
|
|
37
|
+
"""
|
|
38
|
+
取得快取目錄路徑
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
cache_key: 快取 key
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
str: GCS 路徑,格式為 "pdf-cache/{cache_key}"
|
|
45
|
+
"""
|
|
46
|
+
return f"{PDF_CACHE_PREFIX}/{cache_key}"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _get_metadata_path(cache_key: str) -> str:
|
|
50
|
+
"""取得 metadata 檔案路徑"""
|
|
51
|
+
return f"{_get_cache_path(cache_key)}/metadata.json"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _get_chunk_path(cache_key: str, chunk_index: int) -> str:
|
|
55
|
+
"""取得切片檔案路徑"""
|
|
56
|
+
return f"{_get_cache_path(cache_key)}/chunk-{chunk_index:03d}.pdf"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
async def check_cache(cache_key: str) -> Optional[List[Tuple[bytes, str]]]:
|
|
60
|
+
"""
|
|
61
|
+
檢查 GCS 是否有快取
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
cache_key: 快取 key(來自 get_cache_key)
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
Optional[List[Tuple[bytes, str]]]: 如果有快取,回傳切片清單;否則回傳 None
|
|
68
|
+
"""
|
|
69
|
+
try:
|
|
70
|
+
storage = storage_store_factory()
|
|
71
|
+
metadata_path = _get_metadata_path(cache_key)
|
|
72
|
+
|
|
73
|
+
# 檢查 metadata 檔案是否存在
|
|
74
|
+
if not await storage.file_exists(metadata_path):
|
|
75
|
+
print(f"[pdf_cache] 快取不存在: {cache_key}")
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
# 讀取 metadata
|
|
79
|
+
metadata_file = await storage.retrieve_file(metadata_path)
|
|
80
|
+
if not metadata_file:
|
|
81
|
+
print(f"[pdf_cache] 無法讀取 metadata: {cache_key}")
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
metadata = json.loads(metadata_file.getvalue().decode("utf-8"))
|
|
85
|
+
chunk_count = metadata.get("chunk_count", 0)
|
|
86
|
+
page_ranges = metadata.get("page_ranges", [])
|
|
87
|
+
|
|
88
|
+
if chunk_count == 0:
|
|
89
|
+
print(f"[pdf_cache] 快取無切片: {cache_key}")
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
print(f"[pdf_cache] 找到快取: {cache_key}, {chunk_count} 個切片")
|
|
93
|
+
|
|
94
|
+
# 讀取所有切片
|
|
95
|
+
chunks = []
|
|
96
|
+
for i in range(chunk_count):
|
|
97
|
+
chunk_path = _get_chunk_path(cache_key, i)
|
|
98
|
+
chunk_file = await storage.retrieve_file(chunk_path)
|
|
99
|
+
|
|
100
|
+
if not chunk_file:
|
|
101
|
+
print(f"[pdf_cache] 無法讀取切片 {i}: {cache_key}")
|
|
102
|
+
return None # 快取不完整,放棄使用
|
|
103
|
+
|
|
104
|
+
chunk_bytes = chunk_file.getvalue()
|
|
105
|
+
page_range = page_ranges[i] if i < len(page_ranges) else f"chunk-{i:03d}"
|
|
106
|
+
chunks.append((chunk_bytes, page_range))
|
|
107
|
+
|
|
108
|
+
print(f"[pdf_cache] 成功載入快取: {cache_key}")
|
|
109
|
+
return chunks
|
|
110
|
+
|
|
111
|
+
except Exception as e:
|
|
112
|
+
print(f"[pdf_cache] 檢查快取時發生錯誤: {e}")
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
async def save_to_cache(
|
|
117
|
+
cache_key: str,
|
|
118
|
+
chunks: List[Tuple[bytes, str]],
|
|
119
|
+
original_url: str,
|
|
120
|
+
original_size_mb: float,
|
|
121
|
+
total_pages: int,
|
|
122
|
+
) -> bool:
|
|
123
|
+
"""
|
|
124
|
+
將切片存入 GCS 快取
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
cache_key: 快取 key
|
|
128
|
+
chunks: 切片清單 [(chunk_bytes, page_range), ...]
|
|
129
|
+
original_url: 原始 PDF URL
|
|
130
|
+
original_size_mb: 原始檔案大小(MB)
|
|
131
|
+
total_pages: 總頁數
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
bool: 是否成功存入快取
|
|
135
|
+
"""
|
|
136
|
+
try:
|
|
137
|
+
storage = storage_store_factory()
|
|
138
|
+
|
|
139
|
+
# 1. 存入所有切片
|
|
140
|
+
page_ranges = []
|
|
141
|
+
for i, (chunk_bytes, page_range) in enumerate(chunks):
|
|
142
|
+
chunk_path = _get_chunk_path(cache_key, i)
|
|
143
|
+
chunk_file = BytesIO(chunk_bytes)
|
|
144
|
+
|
|
145
|
+
success, _ = await storage.store_file(
|
|
146
|
+
chunk_path, chunk_file, public=False, content_type="application/pdf"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
if not success:
|
|
150
|
+
print(f"[pdf_cache] 無法存入切片 {i}: {cache_key}")
|
|
151
|
+
return False
|
|
152
|
+
|
|
153
|
+
page_ranges.append(page_range)
|
|
154
|
+
|
|
155
|
+
# 2. 存入 metadata
|
|
156
|
+
metadata = {
|
|
157
|
+
"original_url": original_url,
|
|
158
|
+
"cache_key": cache_key,
|
|
159
|
+
"chunk_count": len(chunks),
|
|
160
|
+
"page_ranges": page_ranges,
|
|
161
|
+
"original_size_mb": original_size_mb,
|
|
162
|
+
"total_pages": total_pages,
|
|
163
|
+
"created_at": datetime.utcnow().isoformat(),
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
metadata_path = _get_metadata_path(cache_key)
|
|
167
|
+
metadata_file = BytesIO(json.dumps(metadata, ensure_ascii=False).encode("utf-8"))
|
|
168
|
+
|
|
169
|
+
success, _ = await storage.store_file(
|
|
170
|
+
metadata_path, metadata_file, public=False, content_type="application/json"
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
if not success:
|
|
174
|
+
print(f"[pdf_cache] 無法存入 metadata: {cache_key}")
|
|
175
|
+
return False
|
|
176
|
+
|
|
177
|
+
print(
|
|
178
|
+
f"[pdf_cache] 成功存入快取: {cache_key}, "
|
|
179
|
+
f"{len(chunks)} 個切片, {total_pages} 頁"
|
|
180
|
+
)
|
|
181
|
+
return True
|
|
182
|
+
|
|
183
|
+
except Exception as e:
|
|
184
|
+
print(f"[pdf_cache] 存入快取時發生錯誤: {e}")
|
|
185
|
+
return False
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
async def get_cache_metadata(cache_key: str) -> Optional[dict]:
|
|
189
|
+
"""
|
|
190
|
+
取得快取的 metadata(不載入切片內容)
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
cache_key: 快取 key
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
Optional[dict]: metadata 字典,或 None
|
|
197
|
+
"""
|
|
198
|
+
try:
|
|
199
|
+
storage = storage_store_factory()
|
|
200
|
+
metadata_path = _get_metadata_path(cache_key)
|
|
201
|
+
|
|
202
|
+
if not await storage.file_exists(metadata_path):
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
metadata_file = await storage.retrieve_file(metadata_path)
|
|
206
|
+
if not metadata_file:
|
|
207
|
+
return None
|
|
208
|
+
|
|
209
|
+
return json.loads(metadata_file.getvalue().decode("utf-8"))
|
|
210
|
+
|
|
211
|
+
except Exception as e:
|
|
212
|
+
print(f"[pdf_cache] 讀取 metadata 時發生錯誤: {e}")
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
async def delete_cache(cache_key: str) -> bool:
|
|
217
|
+
"""
|
|
218
|
+
刪除快取
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
cache_key: 快取 key
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
bool: 是否成功刪除
|
|
225
|
+
"""
|
|
226
|
+
try:
|
|
227
|
+
storage = storage_store_factory()
|
|
228
|
+
|
|
229
|
+
# 先讀取 metadata 取得切片數量
|
|
230
|
+
metadata = await get_cache_metadata(cache_key)
|
|
231
|
+
if not metadata:
|
|
232
|
+
return True # 快取不存在,視為成功
|
|
233
|
+
|
|
234
|
+
chunk_count = metadata.get("chunk_count", 0)
|
|
235
|
+
|
|
236
|
+
# 刪除所有切片
|
|
237
|
+
for i in range(chunk_count):
|
|
238
|
+
chunk_path = _get_chunk_path(cache_key, i)
|
|
239
|
+
await storage.delete_file(chunk_path)
|
|
240
|
+
|
|
241
|
+
# 刪除 metadata
|
|
242
|
+
metadata_path = _get_metadata_path(cache_key)
|
|
243
|
+
await storage.delete_file(metadata_path)
|
|
244
|
+
|
|
245
|
+
print(f"[pdf_cache] 已刪除快取: {cache_key}")
|
|
246
|
+
return True
|
|
247
|
+
|
|
248
|
+
except Exception as e:
|
|
249
|
+
print(f"[pdf_cache] 刪除快取時發生錯誤: {e}")
|
|
250
|
+
return False
|