botrun-flow-lang 5.12.263__py3-none-any.whl → 5.12.264__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- botrun_flow_lang/api/auth_api.py +39 -39
- botrun_flow_lang/api/auth_utils.py +183 -183
- botrun_flow_lang/api/botrun_back_api.py +65 -65
- botrun_flow_lang/api/flow_api.py +3 -3
- botrun_flow_lang/api/hatch_api.py +508 -508
- botrun_flow_lang/api/langgraph_api.py +811 -811
- botrun_flow_lang/api/line_bot_api.py +1484 -1484
- botrun_flow_lang/api/model_api.py +300 -300
- botrun_flow_lang/api/rate_limit_api.py +32 -32
- botrun_flow_lang/api/routes.py +79 -79
- botrun_flow_lang/api/search_api.py +53 -53
- botrun_flow_lang/api/storage_api.py +395 -395
- botrun_flow_lang/api/subsidy_api.py +290 -290
- botrun_flow_lang/api/subsidy_api_system_prompt.txt +109 -109
- botrun_flow_lang/api/user_setting_api.py +70 -70
- botrun_flow_lang/api/version_api.py +31 -31
- botrun_flow_lang/api/youtube_api.py +26 -26
- botrun_flow_lang/constants.py +13 -13
- botrun_flow_lang/langgraph_agents/agents/agent_runner.py +178 -178
- botrun_flow_lang/langgraph_agents/agents/agent_tools/step_planner.py +77 -77
- botrun_flow_lang/langgraph_agents/agents/checkpointer/firestore_checkpointer.py +666 -666
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/GOV_RESEARCHER_PRD.md +192 -192
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/gemini_subsidy_graph.py +460 -460
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_2_graph.py +1002 -1002
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_graph.py +822 -822
- botrun_flow_lang/langgraph_agents/agents/langgraph_react_agent.py +723 -723
- botrun_flow_lang/langgraph_agents/agents/search_agent_graph.py +864 -864
- botrun_flow_lang/langgraph_agents/agents/tools/__init__.py +4 -4
- botrun_flow_lang/langgraph_agents/agents/tools/gemini_code_execution.py +376 -376
- botrun_flow_lang/langgraph_agents/agents/util/gemini_grounding.py +66 -66
- botrun_flow_lang/langgraph_agents/agents/util/html_util.py +316 -316
- botrun_flow_lang/langgraph_agents/agents/util/img_util.py +294 -294
- botrun_flow_lang/langgraph_agents/agents/util/local_files.py +419 -419
- botrun_flow_lang/langgraph_agents/agents/util/mermaid_util.py +86 -86
- botrun_flow_lang/langgraph_agents/agents/util/model_utils.py +143 -143
- botrun_flow_lang/langgraph_agents/agents/util/pdf_analyzer.py +486 -486
- botrun_flow_lang/langgraph_agents/agents/util/pdf_cache.py +250 -250
- botrun_flow_lang/langgraph_agents/agents/util/pdf_processor.py +204 -204
- botrun_flow_lang/langgraph_agents/agents/util/perplexity_search.py +464 -464
- botrun_flow_lang/langgraph_agents/agents/util/plotly_util.py +59 -59
- botrun_flow_lang/langgraph_agents/agents/util/tavily_search.py +199 -199
- botrun_flow_lang/langgraph_agents/agents/util/youtube_util.py +90 -90
- botrun_flow_lang/langgraph_agents/cache/langgraph_botrun_cache.py +197 -197
- botrun_flow_lang/llm_agent/llm_agent.py +19 -19
- botrun_flow_lang/llm_agent/llm_agent_util.py +83 -83
- botrun_flow_lang/log/.gitignore +2 -2
- botrun_flow_lang/main.py +61 -61
- botrun_flow_lang/main_fast.py +51 -51
- botrun_flow_lang/mcp_server/__init__.py +10 -10
- botrun_flow_lang/mcp_server/default_mcp.py +744 -744
- botrun_flow_lang/models/nodes/utils.py +205 -205
- botrun_flow_lang/models/token_usage.py +34 -34
- botrun_flow_lang/requirements.txt +21 -21
- botrun_flow_lang/services/base/firestore_base.py +30 -30
- botrun_flow_lang/services/hatch/hatch_factory.py +11 -11
- botrun_flow_lang/services/hatch/hatch_fs_store.py +419 -419
- botrun_flow_lang/services/storage/storage_cs_store.py +206 -206
- botrun_flow_lang/services/storage/storage_factory.py +12 -12
- botrun_flow_lang/services/storage/storage_store.py +65 -65
- botrun_flow_lang/services/user_setting/user_setting_factory.py +9 -9
- botrun_flow_lang/services/user_setting/user_setting_fs_store.py +66 -66
- botrun_flow_lang/static/docs/tools/index.html +926 -926
- botrun_flow_lang/tests/api_functional_tests.py +1525 -1525
- botrun_flow_lang/tests/api_stress_test.py +357 -357
- botrun_flow_lang/tests/shared_hatch_tests.py +333 -333
- botrun_flow_lang/tests/test_botrun_app.py +46 -46
- botrun_flow_lang/tests/test_html_util.py +31 -31
- botrun_flow_lang/tests/test_img_analyzer.py +190 -190
- botrun_flow_lang/tests/test_img_util.py +39 -39
- botrun_flow_lang/tests/test_local_files.py +114 -114
- botrun_flow_lang/tests/test_mermaid_util.py +103 -103
- botrun_flow_lang/tests/test_pdf_analyzer.py +104 -104
- botrun_flow_lang/tests/test_plotly_util.py +151 -151
- botrun_flow_lang/tests/test_run_workflow_engine.py +65 -65
- botrun_flow_lang/tools/generate_docs.py +133 -133
- botrun_flow_lang/tools/templates/tools.html +153 -153
- botrun_flow_lang/utils/__init__.py +7 -7
- botrun_flow_lang/utils/botrun_logger.py +344 -344
- botrun_flow_lang/utils/clients/rate_limit_client.py +209 -209
- botrun_flow_lang/utils/clients/token_verify_client.py +153 -153
- botrun_flow_lang/utils/google_drive_utils.py +654 -654
- botrun_flow_lang/utils/langchain_utils.py +324 -324
- botrun_flow_lang/utils/yaml_utils.py +9 -9
- {botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-5.12.264.dist-info}/METADATA +1 -1
- botrun_flow_lang-5.12.264.dist-info/RECORD +102 -0
- botrun_flow_lang-5.12.263.dist-info/RECORD +0 -102
- {botrun_flow_lang-5.12.263.dist-info → botrun_flow_lang-5.12.264.dist-info}/WHEEL +0 -0
|
@@ -1,654 +1,654 @@
|
|
|
1
|
-
import io
|
|
2
|
-
import os
|
|
3
|
-
import re
|
|
4
|
-
import asyncio
|
|
5
|
-
import logging
|
|
6
|
-
import requests
|
|
7
|
-
import uuid
|
|
8
|
-
from typing import Optional, Tuple
|
|
9
|
-
|
|
10
|
-
import chardet
|
|
11
|
-
from dotenv import load_dotenv
|
|
12
|
-
from google.oauth2 import service_account
|
|
13
|
-
from google.auth.credentials import Credentials
|
|
14
|
-
from googleapiclient.discovery import build
|
|
15
|
-
from googleapiclient.errors import HttpError
|
|
16
|
-
from googleapiclient.http import MediaIoBaseDownload
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
load_dotenv()
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def authenticate_google_services(service_account_file: str):
|
|
23
|
-
credentials = service_account.Credentials.from_service_account_file(
|
|
24
|
-
service_account_file,
|
|
25
|
-
scopes=[
|
|
26
|
-
"https://www.googleapis.com/auth/drive",
|
|
27
|
-
"https://www.googleapis.com/auth/documents",
|
|
28
|
-
"https://www.googleapis.com/auth/spreadsheets",
|
|
29
|
-
],
|
|
30
|
-
)
|
|
31
|
-
drive_service = build("drive", "v3", credentials=credentials)
|
|
32
|
-
docs_service = build("docs", "v1", credentials=credentials)
|
|
33
|
-
return drive_service, docs_service
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
# def service_account_authentication(service_name, version, scopes):
|
|
37
|
-
# service_account_file: str = os.getenv(
|
|
38
|
-
# "GOOGLE_APPLICATION_CREDENTIALS", "./keys/google_service_account_key.json"
|
|
39
|
-
# )
|
|
40
|
-
# credentials: Credentials = service_account.Credentials.from_service_account_file(
|
|
41
|
-
# service_account_file, scopes=scopes
|
|
42
|
-
# )
|
|
43
|
-
# return build(service_name, version, credentials=credentials)
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def get_google_doc_content_with_service(
|
|
47
|
-
file_id: str, mime_type, service, with_decode=True
|
|
48
|
-
):
|
|
49
|
-
request = None
|
|
50
|
-
if mime_type == "application/vnd.google-apps.document":
|
|
51
|
-
request = service.files().export_media(fileId=file_id, mimeType="text/plain")
|
|
52
|
-
elif mime_type == "application/octet-stream":
|
|
53
|
-
request = service.files().get_media(fileId=file_id)
|
|
54
|
-
elif (
|
|
55
|
-
mime_type
|
|
56
|
-
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
57
|
-
):
|
|
58
|
-
request = service.files().get_media(fileId=file_id)
|
|
59
|
-
else:
|
|
60
|
-
request = service.files().get_media(fileId=file_id)
|
|
61
|
-
|
|
62
|
-
if request is None:
|
|
63
|
-
return None
|
|
64
|
-
|
|
65
|
-
fh = io.BytesIO()
|
|
66
|
-
downloader = MediaIoBaseDownload(fh, request)
|
|
67
|
-
done = False
|
|
68
|
-
while done is False:
|
|
69
|
-
status, done = downloader.next_chunk()
|
|
70
|
-
fh.seek(0)
|
|
71
|
-
|
|
72
|
-
if with_decode:
|
|
73
|
-
raw_content = fh.getvalue()
|
|
74
|
-
detected_encoding = chardet.detect(raw_content)
|
|
75
|
-
content = raw_content.decode(detected_encoding["encoding"])
|
|
76
|
-
if content.startswith("\ufeff"):
|
|
77
|
-
content = content[1:]
|
|
78
|
-
content = content.replace("\r\n", "\n")
|
|
79
|
-
return content
|
|
80
|
-
content = fh.getvalue()
|
|
81
|
-
content = content.replace("\r\n", "\n")
|
|
82
|
-
return content
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def get_google_doc_mime_type(file_id: str, drive_service) -> str:
|
|
86
|
-
"""
|
|
87
|
-
取得指定 Google 文件的 MIME 類型
|
|
88
|
-
|
|
89
|
-
Args:
|
|
90
|
-
file_id (str): Google 文件的 ID
|
|
91
|
-
|
|
92
|
-
Returns:
|
|
93
|
-
str: 文件的 MIME 類型,例如 'application/vnd.google-apps.document'
|
|
94
|
-
|
|
95
|
-
Raises:
|
|
96
|
-
HttpError: 當無法取得檔案資訊時拋出
|
|
97
|
-
"""
|
|
98
|
-
# scopes = ['https://www.googleapis.com/auth/drive']
|
|
99
|
-
try:
|
|
100
|
-
# service = service_account_authentication(
|
|
101
|
-
# service_name="drive",
|
|
102
|
-
# version="v3",
|
|
103
|
-
# scopes=scopes
|
|
104
|
-
# )
|
|
105
|
-
|
|
106
|
-
# 取得檔案的中繼資料
|
|
107
|
-
file_metadata = (
|
|
108
|
-
drive_service.files().get(fileId=file_id, fields="mimeType").execute()
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
return file_metadata.get("mimeType", "")
|
|
112
|
-
except HttpError as error:
|
|
113
|
-
print(f"An error occurred: {error}")
|
|
114
|
-
raise
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def get_sheets_service(service_account_file: str):
|
|
118
|
-
"""
|
|
119
|
-
取得 Google Sheets 服務物件
|
|
120
|
-
|
|
121
|
-
Args:
|
|
122
|
-
service_account_file (str): Google 服務帳戶金鑰檔案路徑
|
|
123
|
-
|
|
124
|
-
Returns:
|
|
125
|
-
googleapiclient.discovery.Resource: Google Sheets 服務物件
|
|
126
|
-
"""
|
|
127
|
-
try:
|
|
128
|
-
from googleapiclient.discovery import build
|
|
129
|
-
from google.oauth2 import service_account
|
|
130
|
-
|
|
131
|
-
credentials = service_account.Credentials.from_service_account_file(
|
|
132
|
-
service_account_file,
|
|
133
|
-
scopes=["https://www.googleapis.com/auth/spreadsheets"],
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
return build("sheets", "v4", credentials=credentials)
|
|
137
|
-
|
|
138
|
-
except Exception as e:
|
|
139
|
-
import logging
|
|
140
|
-
|
|
141
|
-
logging.error(f"[Google Sheets] 建立服務物件失敗: {e}")
|
|
142
|
-
raise
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
def create_sheet_if_not_exists(
|
|
146
|
-
service_account_file: str,
|
|
147
|
-
spreadsheet_id: str,
|
|
148
|
-
sheet_name: str,
|
|
149
|
-
headers: list = None,
|
|
150
|
-
):
|
|
151
|
-
"""
|
|
152
|
-
檢查工作表是否存在,不存在則建立
|
|
153
|
-
|
|
154
|
-
Args:
|
|
155
|
-
service_account_file (str): Google 服務帳戶金鑰檔案路徑
|
|
156
|
-
spreadsheet_id (str): Google Sheets 的 ID
|
|
157
|
-
sheet_name (str): 工作表名稱
|
|
158
|
-
headers (list): 可選,要加入的標題列
|
|
159
|
-
|
|
160
|
-
Returns:
|
|
161
|
-
bool: 成功返回 True,失敗返回 False
|
|
162
|
-
"""
|
|
163
|
-
try:
|
|
164
|
-
service = get_sheets_service(service_account_file)
|
|
165
|
-
sheet = service.spreadsheets()
|
|
166
|
-
|
|
167
|
-
# 檢查工作表是否存在
|
|
168
|
-
spreadsheet = sheet.get(spreadsheetId=spreadsheet_id).execute()
|
|
169
|
-
worksheet_names = [ws["properties"]["title"] for ws in spreadsheet["sheets"]]
|
|
170
|
-
|
|
171
|
-
if sheet_name not in worksheet_names:
|
|
172
|
-
# 建立新工作表
|
|
173
|
-
requests = [{"addSheet": {"properties": {"title": sheet_name}}}]
|
|
174
|
-
sheet.batchUpdate(
|
|
175
|
-
spreadsheetId=spreadsheet_id, body={"requests": requests}
|
|
176
|
-
).execute()
|
|
177
|
-
|
|
178
|
-
# 如果提供了標題列,則加入
|
|
179
|
-
if headers:
|
|
180
|
-
sheet.values().update(
|
|
181
|
-
spreadsheetId=spreadsheet_id,
|
|
182
|
-
range=f"{sheet_name}!1:1",
|
|
183
|
-
valueInputOption="RAW",
|
|
184
|
-
body={"values": [headers]},
|
|
185
|
-
).execute()
|
|
186
|
-
|
|
187
|
-
return True
|
|
188
|
-
|
|
189
|
-
return True # 工作表已存在
|
|
190
|
-
|
|
191
|
-
except Exception as e:
|
|
192
|
-
import logging
|
|
193
|
-
|
|
194
|
-
logging.error(f"[Google Sheets] 建立工作表失敗: {e}")
|
|
195
|
-
return False
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
def get_sheet_content(service_account_file: str, spreadsheet_id: str, sheet_name: str):
|
|
199
|
-
"""
|
|
200
|
-
讀取指定 Google Sheet 的內容,回傳 dict(key 為欄位名稱,value 為 list)。
|
|
201
|
-
|
|
202
|
-
Args:
|
|
203
|
-
service_account_file (str): Google 服務帳戶金鑰檔案路徑
|
|
204
|
-
spreadsheet_id (str): Google Sheets 的 ID
|
|
205
|
-
sheet_name (str): 工作表名稱
|
|
206
|
-
|
|
207
|
-
Returns:
|
|
208
|
-
dict: key 為欄位名稱,value 為 list
|
|
209
|
-
|
|
210
|
-
Raises:
|
|
211
|
-
Exception: 讀取失敗時拋出
|
|
212
|
-
"""
|
|
213
|
-
try:
|
|
214
|
-
service = get_sheets_service(service_account_file)
|
|
215
|
-
range_name = f"{sheet_name}"
|
|
216
|
-
result = (
|
|
217
|
-
service.spreadsheets()
|
|
218
|
-
.values()
|
|
219
|
-
.get(spreadsheetId=spreadsheet_id, range=range_name)
|
|
220
|
-
.execute()
|
|
221
|
-
)
|
|
222
|
-
values = result.get("values", [])
|
|
223
|
-
if not values:
|
|
224
|
-
return {}
|
|
225
|
-
header, *rows = values
|
|
226
|
-
columns = {col: [] for col in header}
|
|
227
|
-
for row in rows:
|
|
228
|
-
for idx, col in enumerate(header):
|
|
229
|
-
columns[col].append(row[idx] if idx < len(row) else None)
|
|
230
|
-
return columns
|
|
231
|
-
except Exception as e:
|
|
232
|
-
raise Exception(f"讀取 Google Sheet 內容失敗: {e}")
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
def append_data_to_gsheet(
|
|
236
|
-
service_account_file: str,
|
|
237
|
-
spreadsheet_id: str,
|
|
238
|
-
sheet_name: str,
|
|
239
|
-
data_dict: dict,
|
|
240
|
-
sort_order: str = "new_to_old",
|
|
241
|
-
):
|
|
242
|
-
"""
|
|
243
|
-
插入資料到 Google Sheets,支援新到舊和舊到新兩種排序方式
|
|
244
|
-
|
|
245
|
-
Args:
|
|
246
|
-
service_account_file (str): Google 服務帳戶金鑰檔案路徑
|
|
247
|
-
spreadsheet_id (str): Google Sheets 的 ID
|
|
248
|
-
sheet_name (str): 工作表名稱
|
|
249
|
-
data_dict (dict): 要插入的資料,key 為欄位名稱,value 為資料
|
|
250
|
-
sort_order (str): 排序方式,"new_to_old" (預設) 或 "old_to_new"
|
|
251
|
-
|
|
252
|
-
Returns:
|
|
253
|
-
dict: API 回應結果
|
|
254
|
-
|
|
255
|
-
Raises:
|
|
256
|
-
Exception: 當操作失敗時拋出例外
|
|
257
|
-
"""
|
|
258
|
-
try:
|
|
259
|
-
service = get_sheets_service(service_account_file)
|
|
260
|
-
sheet = service.spreadsheets()
|
|
261
|
-
|
|
262
|
-
# 讀取第一行標題列
|
|
263
|
-
header_range = f"{sheet_name}!1:1"
|
|
264
|
-
result = (
|
|
265
|
-
sheet.values()
|
|
266
|
-
.get(spreadsheetId=spreadsheet_id, range=header_range)
|
|
267
|
-
.execute()
|
|
268
|
-
)
|
|
269
|
-
|
|
270
|
-
headers = result.get("values", [[]])[0]
|
|
271
|
-
if not headers:
|
|
272
|
-
raise Exception(f"工作表 {sheet_name} 沒有標題列")
|
|
273
|
-
|
|
274
|
-
# 根據標題列建立資料陣列
|
|
275
|
-
row_data = []
|
|
276
|
-
for header in headers:
|
|
277
|
-
row_data.append(data_dict.get(header, ""))
|
|
278
|
-
|
|
279
|
-
if sort_order == "new_to_old":
|
|
280
|
-
# 新到舊:插入到第2行
|
|
281
|
-
# 取得工作表資訊以獲得 sheetId
|
|
282
|
-
spreadsheet = sheet.get(spreadsheetId=spreadsheet_id).execute()
|
|
283
|
-
sheet_id = None
|
|
284
|
-
for ws in spreadsheet["sheets"]:
|
|
285
|
-
if ws["properties"]["title"] == sheet_name:
|
|
286
|
-
sheet_id = ws["properties"]["sheetId"]
|
|
287
|
-
break
|
|
288
|
-
|
|
289
|
-
if sheet_id is None:
|
|
290
|
-
raise Exception(f"找不到工作表: {sheet_name}")
|
|
291
|
-
|
|
292
|
-
# 先插入一個空行在第2行
|
|
293
|
-
insert_request = {
|
|
294
|
-
"insertDimension": {
|
|
295
|
-
"range": {
|
|
296
|
-
"sheetId": sheet_id,
|
|
297
|
-
"dimension": "ROWS",
|
|
298
|
-
"startIndex": 1, # 第2行(0-based索引)
|
|
299
|
-
"endIndex": 2,
|
|
300
|
-
},
|
|
301
|
-
"inheritFromBefore": False,
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
# 執行插入空行操作
|
|
306
|
-
sheet.batchUpdate(
|
|
307
|
-
spreadsheetId=spreadsheet_id, body={"requests": [insert_request]}
|
|
308
|
-
).execute()
|
|
309
|
-
|
|
310
|
-
# 然後將資料寫入第2行
|
|
311
|
-
range_name = f"{sheet_name}!2:2"
|
|
312
|
-
result = (
|
|
313
|
-
sheet.values()
|
|
314
|
-
.update(
|
|
315
|
-
spreadsheetId=spreadsheet_id,
|
|
316
|
-
range=range_name,
|
|
317
|
-
valueInputOption="RAW",
|
|
318
|
-
body={"values": [row_data]},
|
|
319
|
-
)
|
|
320
|
-
.execute()
|
|
321
|
-
)
|
|
322
|
-
|
|
323
|
-
elif sort_order == "old_to_new":
|
|
324
|
-
# 舊到新:附加到最後一行
|
|
325
|
-
range_name = f"{sheet_name}!A:A" # 使用 A:A 讓 Google Sheets 自動定位到最後
|
|
326
|
-
result = (
|
|
327
|
-
sheet.values()
|
|
328
|
-
.append(
|
|
329
|
-
spreadsheetId=spreadsheet_id,
|
|
330
|
-
range=range_name,
|
|
331
|
-
valueInputOption="RAW",
|
|
332
|
-
insertDataOption="INSERT_ROWS",
|
|
333
|
-
body={"values": [row_data]},
|
|
334
|
-
)
|
|
335
|
-
.execute()
|
|
336
|
-
)
|
|
337
|
-
|
|
338
|
-
else:
|
|
339
|
-
raise ValueError(
|
|
340
|
-
f"不支援的排序方式: {sort_order},請使用 'new_to_old' 或 'old_to_new'"
|
|
341
|
-
)
|
|
342
|
-
|
|
343
|
-
return result
|
|
344
|
-
|
|
345
|
-
except Exception as e:
|
|
346
|
-
import logging
|
|
347
|
-
|
|
348
|
-
logging.error(f"[Google Sheets] 插入資料失敗 (排序方式: {sort_order}): {e}")
|
|
349
|
-
raise
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
def extract_google_doc_id_from_link(google_doc_link: str) -> Optional[str]:
|
|
353
|
-
"""
|
|
354
|
-
Extract Google Doc ID from various Google Doc URL formats.
|
|
355
|
-
|
|
356
|
-
Args:
|
|
357
|
-
google_doc_link: Google Doc URL in various formats
|
|
358
|
-
|
|
359
|
-
Returns:
|
|
360
|
-
Google Doc ID if found, None otherwise
|
|
361
|
-
"""
|
|
362
|
-
if not google_doc_link:
|
|
363
|
-
return None
|
|
364
|
-
|
|
365
|
-
# Common patterns for Google Doc URLs
|
|
366
|
-
patterns = [
|
|
367
|
-
r"/document/d/([a-zA-Z0-9-_]+)",
|
|
368
|
-
r"id=([a-zA-Z0-9-_]+)",
|
|
369
|
-
r"/file/d/([a-zA-Z0-9-_]+)",
|
|
370
|
-
]
|
|
371
|
-
|
|
372
|
-
for pattern in patterns:
|
|
373
|
-
match = re.search(pattern, google_doc_link)
|
|
374
|
-
if match:
|
|
375
|
-
return match.group(1)
|
|
376
|
-
|
|
377
|
-
# If it's already just an ID (alphanumeric string)
|
|
378
|
-
if re.match(r"^[a-zA-Z0-9-_]+$", google_doc_link.strip()):
|
|
379
|
-
return google_doc_link.strip()
|
|
380
|
-
|
|
381
|
-
return None
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
async def fetch_google_doc_content(doc_link: str) -> Optional[str]:
|
|
385
|
-
"""
|
|
386
|
-
Fetch content from Google Doc using service account credentials.
|
|
387
|
-
|
|
388
|
-
Args:
|
|
389
|
-
doc_link: Google Doc link or ID
|
|
390
|
-
|
|
391
|
-
Returns:
|
|
392
|
-
Document content as plain text, or None if failed
|
|
393
|
-
"""
|
|
394
|
-
try:
|
|
395
|
-
# Check for required environment variables
|
|
396
|
-
credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_FOR_BOTRUN_DOC")
|
|
397
|
-
if not credentials_path:
|
|
398
|
-
logging.error(
|
|
399
|
-
"GOOGLE_APPLICATION_CREDENTIALS_FOR_BOTRUN_DOC environment variable not set"
|
|
400
|
-
)
|
|
401
|
-
return None
|
|
402
|
-
|
|
403
|
-
if not os.path.exists(credentials_path):
|
|
404
|
-
logging.error(
|
|
405
|
-
f"Google service account credentials file not found: {credentials_path}"
|
|
406
|
-
)
|
|
407
|
-
return None
|
|
408
|
-
|
|
409
|
-
# Extract document ID from the link
|
|
410
|
-
doc_id = extract_google_doc_id_from_link(doc_link)
|
|
411
|
-
if not doc_id:
|
|
412
|
-
logging.error(f"Unable to extract Google Doc ID from link: {doc_link}")
|
|
413
|
-
return None
|
|
414
|
-
|
|
415
|
-
# Run the Google API calls in an executor to avoid blocking
|
|
416
|
-
loop = asyncio.get_event_loop()
|
|
417
|
-
content = await loop.run_in_executor(
|
|
418
|
-
None, fetch_google_doc_content_sync, credentials_path, doc_id
|
|
419
|
-
)
|
|
420
|
-
|
|
421
|
-
return content
|
|
422
|
-
|
|
423
|
-
except Exception as e:
|
|
424
|
-
logging.error(f"Error fetching Google Doc content from {doc_link}: {e}")
|
|
425
|
-
return None
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
def fetch_google_doc_content_sync(credentials_path: str, doc_id: str) -> Optional[str]:
|
|
429
|
-
"""
|
|
430
|
-
Synchronous helper method to fetch Google Doc content.
|
|
431
|
-
"""
|
|
432
|
-
try:
|
|
433
|
-
# Authenticate with Google services
|
|
434
|
-
drive_service, docs_service = authenticate_google_services(credentials_path)
|
|
435
|
-
|
|
436
|
-
# Get document MIME type
|
|
437
|
-
mime_type = get_google_doc_mime_type(doc_id, drive_service)
|
|
438
|
-
|
|
439
|
-
# Fetch document content
|
|
440
|
-
content = get_google_doc_content_with_service(
|
|
441
|
-
doc_id, mime_type, drive_service, with_decode=True
|
|
442
|
-
)
|
|
443
|
-
|
|
444
|
-
if content and isinstance(content, str):
|
|
445
|
-
# Clean up the content
|
|
446
|
-
content = content.strip()
|
|
447
|
-
if content:
|
|
448
|
-
logging.info(
|
|
449
|
-
f"Successfully fetched Google Doc content (length: {len(content)})"
|
|
450
|
-
)
|
|
451
|
-
return content
|
|
452
|
-
|
|
453
|
-
logging.warning(f"Google Doc {doc_id} appears to be empty")
|
|
454
|
-
return None
|
|
455
|
-
|
|
456
|
-
except Exception as e:
|
|
457
|
-
logging.error(f"Error in sync Google Doc fetch for {doc_id}: {e}")
|
|
458
|
-
return None
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
async def get_webhook_base_url() -> Optional[str]:
|
|
462
|
-
"""
|
|
463
|
-
Get the webhook base URL from the botrun info API.
|
|
464
|
-
|
|
465
|
-
Returns:
|
|
466
|
-
The botrun_flow_lang_url from the API response, or None if failed
|
|
467
|
-
"""
|
|
468
|
-
try:
|
|
469
|
-
botrun_back_api_base = os.getenv("BOTRUN_BACK_API_BASE")
|
|
470
|
-
if not botrun_back_api_base:
|
|
471
|
-
logging.error("BOTRUN_BACK_API_BASE environment variable not set")
|
|
472
|
-
return None
|
|
473
|
-
|
|
474
|
-
info_url = f"{botrun_back_api_base}/botrun/info"
|
|
475
|
-
|
|
476
|
-
loop = asyncio.get_event_loop()
|
|
477
|
-
response = await loop.run_in_executor(None, requests.get, info_url)
|
|
478
|
-
|
|
479
|
-
if response.status_code == 200:
|
|
480
|
-
data = response.json()
|
|
481
|
-
botrun_flow_lang_url = data.get("botrun_flow_lang_url")
|
|
482
|
-
if botrun_flow_lang_url:
|
|
483
|
-
logging.info(f"Retrieved webhook base URL: {botrun_flow_lang_url}")
|
|
484
|
-
return botrun_flow_lang_url
|
|
485
|
-
else:
|
|
486
|
-
logging.error("botrun_flow_lang_url not found in API response")
|
|
487
|
-
return None
|
|
488
|
-
else:
|
|
489
|
-
logging.error(f"Failed to get botrun info: HTTP {response.status_code}")
|
|
490
|
-
return None
|
|
491
|
-
|
|
492
|
-
except Exception as e:
|
|
493
|
-
logging.error(f"Error getting webhook base URL: {e}")
|
|
494
|
-
return None
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
async def register_google_drive_webhook(
|
|
498
|
-
doc_link: str, hatch_id: str
|
|
499
|
-
) -> Tuple[bool, Optional[str], Optional[str]]:
|
|
500
|
-
"""
|
|
501
|
-
Register a webhook with Google Drive for a specific document.
|
|
502
|
-
|
|
503
|
-
Args:
|
|
504
|
-
doc_link: Google Doc link or ID
|
|
505
|
-
hatch_id: The hatch ID to associate with this webhook
|
|
506
|
-
|
|
507
|
-
Returns:
|
|
508
|
-
Tuple of (success, channel_id, resource_id)
|
|
509
|
-
"""
|
|
510
|
-
try:
|
|
511
|
-
# Get credentials path
|
|
512
|
-
credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_FOR_BOTRUN_DOC")
|
|
513
|
-
if not credentials_path:
|
|
514
|
-
logging.error("GOOGLE_APPLICATION_CREDENTIALS_FOR_BOTRUN_DOC not set")
|
|
515
|
-
return False, None, None
|
|
516
|
-
|
|
517
|
-
if not os.path.exists(credentials_path):
|
|
518
|
-
logging.error(f"Credentials file not found: {credentials_path}")
|
|
519
|
-
return False, None, None
|
|
520
|
-
|
|
521
|
-
# Extract document ID
|
|
522
|
-
doc_id = extract_google_doc_id_from_link(doc_link)
|
|
523
|
-
if not doc_id:
|
|
524
|
-
logging.error(f"Unable to extract Google Doc ID from: {doc_link}")
|
|
525
|
-
return False, None, None
|
|
526
|
-
|
|
527
|
-
# Get webhook base URL
|
|
528
|
-
base_url = await get_webhook_base_url()
|
|
529
|
-
if not base_url:
|
|
530
|
-
return False, None, None
|
|
531
|
-
|
|
532
|
-
# Construct webhook URL
|
|
533
|
-
webhook_url = f"{base_url}/api/hatch/webhook/google-drive"
|
|
534
|
-
|
|
535
|
-
# Run webhook registration in executor
|
|
536
|
-
loop = asyncio.get_event_loop()
|
|
537
|
-
result = await loop.run_in_executor(
|
|
538
|
-
None,
|
|
539
|
-
register_google_drive_webhook_sync,
|
|
540
|
-
credentials_path,
|
|
541
|
-
doc_id,
|
|
542
|
-
webhook_url,
|
|
543
|
-
hatch_id,
|
|
544
|
-
)
|
|
545
|
-
|
|
546
|
-
return result
|
|
547
|
-
|
|
548
|
-
except Exception as e:
|
|
549
|
-
logging.error(f"Error registering Google Drive webhook: {e}")
|
|
550
|
-
return False, None, None
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
def register_google_drive_webhook_sync(
|
|
554
|
-
credentials_path: str, doc_id: str, webhook_url: str, hatch_id: str
|
|
555
|
-
) -> Tuple[bool, Optional[str], Optional[str]]:
|
|
556
|
-
"""
|
|
557
|
-
Synchronous helper to register Google Drive webhook.
|
|
558
|
-
"""
|
|
559
|
-
try:
|
|
560
|
-
# Authenticate with Google Drive
|
|
561
|
-
drive_service, _ = authenticate_google_services(credentials_path)
|
|
562
|
-
|
|
563
|
-
# Generate unique channel ID
|
|
564
|
-
channel_id = f"hatch-{hatch_id}-{uuid.uuid4()}"
|
|
565
|
-
|
|
566
|
-
# Prepare webhook registration request
|
|
567
|
-
body = {
|
|
568
|
-
"id": channel_id,
|
|
569
|
-
"type": "web_hook",
|
|
570
|
-
"address": webhook_url,
|
|
571
|
-
"token": hatch_id, # Use hatch_id as token for identification
|
|
572
|
-
}
|
|
573
|
-
|
|
574
|
-
# Register the webhook
|
|
575
|
-
response = drive_service.files().watch(fileId=doc_id, body=body).execute()
|
|
576
|
-
|
|
577
|
-
channel_id = response.get("id")
|
|
578
|
-
resource_id = response.get("resourceId")
|
|
579
|
-
|
|
580
|
-
logging.info(
|
|
581
|
-
f"Successfully registered webhook for doc {doc_id}, channel: {channel_id}"
|
|
582
|
-
)
|
|
583
|
-
return True, channel_id, resource_id
|
|
584
|
-
|
|
585
|
-
except HttpError as e:
|
|
586
|
-
logging.error(f"Google API error registering webhook: {e}")
|
|
587
|
-
return False, None, None
|
|
588
|
-
except Exception as e:
|
|
589
|
-
logging.error(f"Error in sync webhook registration: {e}")
|
|
590
|
-
return False, None, None
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
async def unregister_google_drive_webhook(channel_id: str, resource_id: str) -> bool:
|
|
594
|
-
"""
|
|
595
|
-
Unregister a Google Drive webhook.
|
|
596
|
-
|
|
597
|
-
Args:
|
|
598
|
-
channel_id: The channel ID to stop
|
|
599
|
-
resource_id: The resource ID associated with the channel
|
|
600
|
-
|
|
601
|
-
Returns:
|
|
602
|
-
True if successful, False otherwise
|
|
603
|
-
"""
|
|
604
|
-
try:
|
|
605
|
-
credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_FOR_BOTRUN_DOC")
|
|
606
|
-
if not credentials_path:
|
|
607
|
-
logging.error("GOOGLE_APPLICATION_CREDENTIALS_FOR_BOTRUN_DOC not set")
|
|
608
|
-
return False
|
|
609
|
-
|
|
610
|
-
if not os.path.exists(credentials_path):
|
|
611
|
-
logging.error(f"Credentials file not found: {credentials_path}")
|
|
612
|
-
return False
|
|
613
|
-
|
|
614
|
-
loop = asyncio.get_event_loop()
|
|
615
|
-
result = await loop.run_in_executor(
|
|
616
|
-
None,
|
|
617
|
-
unregister_google_drive_webhook_sync,
|
|
618
|
-
credentials_path,
|
|
619
|
-
channel_id,
|
|
620
|
-
resource_id,
|
|
621
|
-
)
|
|
622
|
-
|
|
623
|
-
return result
|
|
624
|
-
|
|
625
|
-
except Exception as e:
|
|
626
|
-
logging.error(f"Error unregistering Google Drive webhook: {e}")
|
|
627
|
-
return False
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
def unregister_google_drive_webhook_sync(
|
|
631
|
-
credentials_path: str, channel_id: str, resource_id: str
|
|
632
|
-
) -> bool:
|
|
633
|
-
"""
|
|
634
|
-
Synchronous helper to unregister Google Drive webhook.
|
|
635
|
-
"""
|
|
636
|
-
try:
|
|
637
|
-
# Authenticate with Google Drive
|
|
638
|
-
drive_service, _ = authenticate_google_services(credentials_path)
|
|
639
|
-
|
|
640
|
-
# Prepare stop request
|
|
641
|
-
body = {"id": channel_id, "resourceId": resource_id}
|
|
642
|
-
|
|
643
|
-
# Stop the webhook
|
|
644
|
-
drive_service.channels().stop(body=body).execute()
|
|
645
|
-
|
|
646
|
-
logging.info(f"Successfully unregistered webhook channel: {channel_id}")
|
|
647
|
-
return True
|
|
648
|
-
|
|
649
|
-
except HttpError as e:
|
|
650
|
-
logging.error(f"Google API error unregistering webhook: {e}")
|
|
651
|
-
return False
|
|
652
|
-
except Exception as e:
|
|
653
|
-
logging.error(f"Error in sync webhook unregistration: {e}")
|
|
654
|
-
return False
|
|
1
|
+
import io
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
import asyncio
|
|
5
|
+
import logging
|
|
6
|
+
import requests
|
|
7
|
+
import uuid
|
|
8
|
+
from typing import Optional, Tuple
|
|
9
|
+
|
|
10
|
+
import chardet
|
|
11
|
+
from dotenv import load_dotenv
|
|
12
|
+
from google.oauth2 import service_account
|
|
13
|
+
from google.auth.credentials import Credentials
|
|
14
|
+
from googleapiclient.discovery import build
|
|
15
|
+
from googleapiclient.errors import HttpError
|
|
16
|
+
from googleapiclient.http import MediaIoBaseDownload
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
load_dotenv()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def authenticate_google_services(service_account_file: str):
|
|
23
|
+
credentials = service_account.Credentials.from_service_account_file(
|
|
24
|
+
service_account_file,
|
|
25
|
+
scopes=[
|
|
26
|
+
"https://www.googleapis.com/auth/drive",
|
|
27
|
+
"https://www.googleapis.com/auth/documents",
|
|
28
|
+
"https://www.googleapis.com/auth/spreadsheets",
|
|
29
|
+
],
|
|
30
|
+
)
|
|
31
|
+
drive_service = build("drive", "v3", credentials=credentials)
|
|
32
|
+
docs_service = build("docs", "v1", credentials=credentials)
|
|
33
|
+
return drive_service, docs_service
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# def service_account_authentication(service_name, version, scopes):
|
|
37
|
+
# service_account_file: str = os.getenv(
|
|
38
|
+
# "GOOGLE_APPLICATION_CREDENTIALS", "./keys/google_service_account_key.json"
|
|
39
|
+
# )
|
|
40
|
+
# credentials: Credentials = service_account.Credentials.from_service_account_file(
|
|
41
|
+
# service_account_file, scopes=scopes
|
|
42
|
+
# )
|
|
43
|
+
# return build(service_name, version, credentials=credentials)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_google_doc_content_with_service(
|
|
47
|
+
file_id: str, mime_type, service, with_decode=True
|
|
48
|
+
):
|
|
49
|
+
request = None
|
|
50
|
+
if mime_type == "application/vnd.google-apps.document":
|
|
51
|
+
request = service.files().export_media(fileId=file_id, mimeType="text/plain")
|
|
52
|
+
elif mime_type == "application/octet-stream":
|
|
53
|
+
request = service.files().get_media(fileId=file_id)
|
|
54
|
+
elif (
|
|
55
|
+
mime_type
|
|
56
|
+
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
57
|
+
):
|
|
58
|
+
request = service.files().get_media(fileId=file_id)
|
|
59
|
+
else:
|
|
60
|
+
request = service.files().get_media(fileId=file_id)
|
|
61
|
+
|
|
62
|
+
if request is None:
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
fh = io.BytesIO()
|
|
66
|
+
downloader = MediaIoBaseDownload(fh, request)
|
|
67
|
+
done = False
|
|
68
|
+
while done is False:
|
|
69
|
+
status, done = downloader.next_chunk()
|
|
70
|
+
fh.seek(0)
|
|
71
|
+
|
|
72
|
+
if with_decode:
|
|
73
|
+
raw_content = fh.getvalue()
|
|
74
|
+
detected_encoding = chardet.detect(raw_content)
|
|
75
|
+
content = raw_content.decode(detected_encoding["encoding"])
|
|
76
|
+
if content.startswith("\ufeff"):
|
|
77
|
+
content = content[1:]
|
|
78
|
+
content = content.replace("\r\n", "\n")
|
|
79
|
+
return content
|
|
80
|
+
content = fh.getvalue()
|
|
81
|
+
content = content.replace("\r\n", "\n")
|
|
82
|
+
return content
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def get_google_doc_mime_type(file_id: str, drive_service) -> str:
|
|
86
|
+
"""
|
|
87
|
+
取得指定 Google 文件的 MIME 類型
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
file_id (str): Google 文件的 ID
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
str: 文件的 MIME 類型,例如 'application/vnd.google-apps.document'
|
|
94
|
+
|
|
95
|
+
Raises:
|
|
96
|
+
HttpError: 當無法取得檔案資訊時拋出
|
|
97
|
+
"""
|
|
98
|
+
# scopes = ['https://www.googleapis.com/auth/drive']
|
|
99
|
+
try:
|
|
100
|
+
# service = service_account_authentication(
|
|
101
|
+
# service_name="drive",
|
|
102
|
+
# version="v3",
|
|
103
|
+
# scopes=scopes
|
|
104
|
+
# )
|
|
105
|
+
|
|
106
|
+
# 取得檔案的中繼資料
|
|
107
|
+
file_metadata = (
|
|
108
|
+
drive_service.files().get(fileId=file_id, fields="mimeType").execute()
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return file_metadata.get("mimeType", "")
|
|
112
|
+
except HttpError as error:
|
|
113
|
+
print(f"An error occurred: {error}")
|
|
114
|
+
raise
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def get_sheets_service(service_account_file: str):
|
|
118
|
+
"""
|
|
119
|
+
取得 Google Sheets 服務物件
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
service_account_file (str): Google 服務帳戶金鑰檔案路徑
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
googleapiclient.discovery.Resource: Google Sheets 服務物件
|
|
126
|
+
"""
|
|
127
|
+
try:
|
|
128
|
+
from googleapiclient.discovery import build
|
|
129
|
+
from google.oauth2 import service_account
|
|
130
|
+
|
|
131
|
+
credentials = service_account.Credentials.from_service_account_file(
|
|
132
|
+
service_account_file,
|
|
133
|
+
scopes=["https://www.googleapis.com/auth/spreadsheets"],
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
return build("sheets", "v4", credentials=credentials)
|
|
137
|
+
|
|
138
|
+
except Exception as e:
|
|
139
|
+
import logging
|
|
140
|
+
|
|
141
|
+
logging.error(f"[Google Sheets] 建立服務物件失敗: {e}")
|
|
142
|
+
raise
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def create_sheet_if_not_exists(
|
|
146
|
+
service_account_file: str,
|
|
147
|
+
spreadsheet_id: str,
|
|
148
|
+
sheet_name: str,
|
|
149
|
+
headers: list = None,
|
|
150
|
+
):
|
|
151
|
+
"""
|
|
152
|
+
檢查工作表是否存在,不存在則建立
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
service_account_file (str): Google 服務帳戶金鑰檔案路徑
|
|
156
|
+
spreadsheet_id (str): Google Sheets 的 ID
|
|
157
|
+
sheet_name (str): 工作表名稱
|
|
158
|
+
headers (list): 可選,要加入的標題列
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
bool: 成功返回 True,失敗返回 False
|
|
162
|
+
"""
|
|
163
|
+
try:
|
|
164
|
+
service = get_sheets_service(service_account_file)
|
|
165
|
+
sheet = service.spreadsheets()
|
|
166
|
+
|
|
167
|
+
# 檢查工作表是否存在
|
|
168
|
+
spreadsheet = sheet.get(spreadsheetId=spreadsheet_id).execute()
|
|
169
|
+
worksheet_names = [ws["properties"]["title"] for ws in spreadsheet["sheets"]]
|
|
170
|
+
|
|
171
|
+
if sheet_name not in worksheet_names:
|
|
172
|
+
# 建立新工作表
|
|
173
|
+
requests = [{"addSheet": {"properties": {"title": sheet_name}}}]
|
|
174
|
+
sheet.batchUpdate(
|
|
175
|
+
spreadsheetId=spreadsheet_id, body={"requests": requests}
|
|
176
|
+
).execute()
|
|
177
|
+
|
|
178
|
+
# 如果提供了標題列,則加入
|
|
179
|
+
if headers:
|
|
180
|
+
sheet.values().update(
|
|
181
|
+
spreadsheetId=spreadsheet_id,
|
|
182
|
+
range=f"{sheet_name}!1:1",
|
|
183
|
+
valueInputOption="RAW",
|
|
184
|
+
body={"values": [headers]},
|
|
185
|
+
).execute()
|
|
186
|
+
|
|
187
|
+
return True
|
|
188
|
+
|
|
189
|
+
return True # 工作表已存在
|
|
190
|
+
|
|
191
|
+
except Exception as e:
|
|
192
|
+
import logging
|
|
193
|
+
|
|
194
|
+
logging.error(f"[Google Sheets] 建立工作表失敗: {e}")
|
|
195
|
+
return False
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def get_sheet_content(service_account_file: str, spreadsheet_id: str, sheet_name: str):
|
|
199
|
+
"""
|
|
200
|
+
讀取指定 Google Sheet 的內容,回傳 dict(key 為欄位名稱,value 為 list)。
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
service_account_file (str): Google 服務帳戶金鑰檔案路徑
|
|
204
|
+
spreadsheet_id (str): Google Sheets 的 ID
|
|
205
|
+
sheet_name (str): 工作表名稱
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
dict: key 為欄位名稱,value 為 list
|
|
209
|
+
|
|
210
|
+
Raises:
|
|
211
|
+
Exception: 讀取失敗時拋出
|
|
212
|
+
"""
|
|
213
|
+
try:
|
|
214
|
+
service = get_sheets_service(service_account_file)
|
|
215
|
+
range_name = f"{sheet_name}"
|
|
216
|
+
result = (
|
|
217
|
+
service.spreadsheets()
|
|
218
|
+
.values()
|
|
219
|
+
.get(spreadsheetId=spreadsheet_id, range=range_name)
|
|
220
|
+
.execute()
|
|
221
|
+
)
|
|
222
|
+
values = result.get("values", [])
|
|
223
|
+
if not values:
|
|
224
|
+
return {}
|
|
225
|
+
header, *rows = values
|
|
226
|
+
columns = {col: [] for col in header}
|
|
227
|
+
for row in rows:
|
|
228
|
+
for idx, col in enumerate(header):
|
|
229
|
+
columns[col].append(row[idx] if idx < len(row) else None)
|
|
230
|
+
return columns
|
|
231
|
+
except Exception as e:
|
|
232
|
+
raise Exception(f"讀取 Google Sheet 內容失敗: {e}")
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def append_data_to_gsheet(
|
|
236
|
+
service_account_file: str,
|
|
237
|
+
spreadsheet_id: str,
|
|
238
|
+
sheet_name: str,
|
|
239
|
+
data_dict: dict,
|
|
240
|
+
sort_order: str = "new_to_old",
|
|
241
|
+
):
|
|
242
|
+
"""
|
|
243
|
+
插入資料到 Google Sheets,支援新到舊和舊到新兩種排序方式
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
service_account_file (str): Google 服務帳戶金鑰檔案路徑
|
|
247
|
+
spreadsheet_id (str): Google Sheets 的 ID
|
|
248
|
+
sheet_name (str): 工作表名稱
|
|
249
|
+
data_dict (dict): 要插入的資料,key 為欄位名稱,value 為資料
|
|
250
|
+
sort_order (str): 排序方式,"new_to_old" (預設) 或 "old_to_new"
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
dict: API 回應結果
|
|
254
|
+
|
|
255
|
+
Raises:
|
|
256
|
+
Exception: 當操作失敗時拋出例外
|
|
257
|
+
"""
|
|
258
|
+
try:
|
|
259
|
+
service = get_sheets_service(service_account_file)
|
|
260
|
+
sheet = service.spreadsheets()
|
|
261
|
+
|
|
262
|
+
# 讀取第一行標題列
|
|
263
|
+
header_range = f"{sheet_name}!1:1"
|
|
264
|
+
result = (
|
|
265
|
+
sheet.values()
|
|
266
|
+
.get(spreadsheetId=spreadsheet_id, range=header_range)
|
|
267
|
+
.execute()
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
headers = result.get("values", [[]])[0]
|
|
271
|
+
if not headers:
|
|
272
|
+
raise Exception(f"工作表 {sheet_name} 沒有標題列")
|
|
273
|
+
|
|
274
|
+
# 根據標題列建立資料陣列
|
|
275
|
+
row_data = []
|
|
276
|
+
for header in headers:
|
|
277
|
+
row_data.append(data_dict.get(header, ""))
|
|
278
|
+
|
|
279
|
+
if sort_order == "new_to_old":
|
|
280
|
+
# 新到舊:插入到第2行
|
|
281
|
+
# 取得工作表資訊以獲得 sheetId
|
|
282
|
+
spreadsheet = sheet.get(spreadsheetId=spreadsheet_id).execute()
|
|
283
|
+
sheet_id = None
|
|
284
|
+
for ws in spreadsheet["sheets"]:
|
|
285
|
+
if ws["properties"]["title"] == sheet_name:
|
|
286
|
+
sheet_id = ws["properties"]["sheetId"]
|
|
287
|
+
break
|
|
288
|
+
|
|
289
|
+
if sheet_id is None:
|
|
290
|
+
raise Exception(f"找不到工作表: {sheet_name}")
|
|
291
|
+
|
|
292
|
+
# 先插入一個空行在第2行
|
|
293
|
+
insert_request = {
|
|
294
|
+
"insertDimension": {
|
|
295
|
+
"range": {
|
|
296
|
+
"sheetId": sheet_id,
|
|
297
|
+
"dimension": "ROWS",
|
|
298
|
+
"startIndex": 1, # 第2行(0-based索引)
|
|
299
|
+
"endIndex": 2,
|
|
300
|
+
},
|
|
301
|
+
"inheritFromBefore": False,
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
# 執行插入空行操作
|
|
306
|
+
sheet.batchUpdate(
|
|
307
|
+
spreadsheetId=spreadsheet_id, body={"requests": [insert_request]}
|
|
308
|
+
).execute()
|
|
309
|
+
|
|
310
|
+
# 然後將資料寫入第2行
|
|
311
|
+
range_name = f"{sheet_name}!2:2"
|
|
312
|
+
result = (
|
|
313
|
+
sheet.values()
|
|
314
|
+
.update(
|
|
315
|
+
spreadsheetId=spreadsheet_id,
|
|
316
|
+
range=range_name,
|
|
317
|
+
valueInputOption="RAW",
|
|
318
|
+
body={"values": [row_data]},
|
|
319
|
+
)
|
|
320
|
+
.execute()
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
elif sort_order == "old_to_new":
|
|
324
|
+
# 舊到新:附加到最後一行
|
|
325
|
+
range_name = f"{sheet_name}!A:A" # 使用 A:A 讓 Google Sheets 自動定位到最後
|
|
326
|
+
result = (
|
|
327
|
+
sheet.values()
|
|
328
|
+
.append(
|
|
329
|
+
spreadsheetId=spreadsheet_id,
|
|
330
|
+
range=range_name,
|
|
331
|
+
valueInputOption="RAW",
|
|
332
|
+
insertDataOption="INSERT_ROWS",
|
|
333
|
+
body={"values": [row_data]},
|
|
334
|
+
)
|
|
335
|
+
.execute()
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
else:
|
|
339
|
+
raise ValueError(
|
|
340
|
+
f"不支援的排序方式: {sort_order},請使用 'new_to_old' 或 'old_to_new'"
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
return result
|
|
344
|
+
|
|
345
|
+
except Exception as e:
|
|
346
|
+
import logging
|
|
347
|
+
|
|
348
|
+
logging.error(f"[Google Sheets] 插入資料失敗 (排序方式: {sort_order}): {e}")
|
|
349
|
+
raise
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def extract_google_doc_id_from_link(google_doc_link: str) -> Optional[str]:
|
|
353
|
+
"""
|
|
354
|
+
Extract Google Doc ID from various Google Doc URL formats.
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
google_doc_link: Google Doc URL in various formats
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
Google Doc ID if found, None otherwise
|
|
361
|
+
"""
|
|
362
|
+
if not google_doc_link:
|
|
363
|
+
return None
|
|
364
|
+
|
|
365
|
+
# Common patterns for Google Doc URLs
|
|
366
|
+
patterns = [
|
|
367
|
+
r"/document/d/([a-zA-Z0-9-_]+)",
|
|
368
|
+
r"id=([a-zA-Z0-9-_]+)",
|
|
369
|
+
r"/file/d/([a-zA-Z0-9-_]+)",
|
|
370
|
+
]
|
|
371
|
+
|
|
372
|
+
for pattern in patterns:
|
|
373
|
+
match = re.search(pattern, google_doc_link)
|
|
374
|
+
if match:
|
|
375
|
+
return match.group(1)
|
|
376
|
+
|
|
377
|
+
# If it's already just an ID (alphanumeric string)
|
|
378
|
+
if re.match(r"^[a-zA-Z0-9-_]+$", google_doc_link.strip()):
|
|
379
|
+
return google_doc_link.strip()
|
|
380
|
+
|
|
381
|
+
return None
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
async def fetch_google_doc_content(doc_link: str) -> Optional[str]:
|
|
385
|
+
"""
|
|
386
|
+
Fetch content from Google Doc using service account credentials.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
doc_link: Google Doc link or ID
|
|
390
|
+
|
|
391
|
+
Returns:
|
|
392
|
+
Document content as plain text, or None if failed
|
|
393
|
+
"""
|
|
394
|
+
try:
|
|
395
|
+
# Check for required environment variables
|
|
396
|
+
credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_FOR_BOTRUN_DOC")
|
|
397
|
+
if not credentials_path:
|
|
398
|
+
logging.error(
|
|
399
|
+
"GOOGLE_APPLICATION_CREDENTIALS_FOR_BOTRUN_DOC environment variable not set"
|
|
400
|
+
)
|
|
401
|
+
return None
|
|
402
|
+
|
|
403
|
+
if not os.path.exists(credentials_path):
|
|
404
|
+
logging.error(
|
|
405
|
+
f"Google service account credentials file not found: {credentials_path}"
|
|
406
|
+
)
|
|
407
|
+
return None
|
|
408
|
+
|
|
409
|
+
# Extract document ID from the link
|
|
410
|
+
doc_id = extract_google_doc_id_from_link(doc_link)
|
|
411
|
+
if not doc_id:
|
|
412
|
+
logging.error(f"Unable to extract Google Doc ID from link: {doc_link}")
|
|
413
|
+
return None
|
|
414
|
+
|
|
415
|
+
# Run the Google API calls in an executor to avoid blocking
|
|
416
|
+
loop = asyncio.get_event_loop()
|
|
417
|
+
content = await loop.run_in_executor(
|
|
418
|
+
None, fetch_google_doc_content_sync, credentials_path, doc_id
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
return content
|
|
422
|
+
|
|
423
|
+
except Exception as e:
|
|
424
|
+
logging.error(f"Error fetching Google Doc content from {doc_link}: {e}")
|
|
425
|
+
return None
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
def fetch_google_doc_content_sync(credentials_path: str, doc_id: str) -> Optional[str]:
|
|
429
|
+
"""
|
|
430
|
+
Synchronous helper method to fetch Google Doc content.
|
|
431
|
+
"""
|
|
432
|
+
try:
|
|
433
|
+
# Authenticate with Google services
|
|
434
|
+
drive_service, docs_service = authenticate_google_services(credentials_path)
|
|
435
|
+
|
|
436
|
+
# Get document MIME type
|
|
437
|
+
mime_type = get_google_doc_mime_type(doc_id, drive_service)
|
|
438
|
+
|
|
439
|
+
# Fetch document content
|
|
440
|
+
content = get_google_doc_content_with_service(
|
|
441
|
+
doc_id, mime_type, drive_service, with_decode=True
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
if content and isinstance(content, str):
|
|
445
|
+
# Clean up the content
|
|
446
|
+
content = content.strip()
|
|
447
|
+
if content:
|
|
448
|
+
logging.info(
|
|
449
|
+
f"Successfully fetched Google Doc content (length: {len(content)})"
|
|
450
|
+
)
|
|
451
|
+
return content
|
|
452
|
+
|
|
453
|
+
logging.warning(f"Google Doc {doc_id} appears to be empty")
|
|
454
|
+
return None
|
|
455
|
+
|
|
456
|
+
except Exception as e:
|
|
457
|
+
logging.error(f"Error in sync Google Doc fetch for {doc_id}: {e}")
|
|
458
|
+
return None
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
async def get_webhook_base_url() -> Optional[str]:
|
|
462
|
+
"""
|
|
463
|
+
Get the webhook base URL from the botrun info API.
|
|
464
|
+
|
|
465
|
+
Returns:
|
|
466
|
+
The botrun_flow_lang_url from the API response, or None if failed
|
|
467
|
+
"""
|
|
468
|
+
try:
|
|
469
|
+
botrun_back_api_base = os.getenv("BOTRUN_BACK_API_BASE")
|
|
470
|
+
if not botrun_back_api_base:
|
|
471
|
+
logging.error("BOTRUN_BACK_API_BASE environment variable not set")
|
|
472
|
+
return None
|
|
473
|
+
|
|
474
|
+
info_url = f"{botrun_back_api_base}/botrun/info"
|
|
475
|
+
|
|
476
|
+
loop = asyncio.get_event_loop()
|
|
477
|
+
response = await loop.run_in_executor(None, requests.get, info_url)
|
|
478
|
+
|
|
479
|
+
if response.status_code == 200:
|
|
480
|
+
data = response.json()
|
|
481
|
+
botrun_flow_lang_url = data.get("botrun_flow_lang_url")
|
|
482
|
+
if botrun_flow_lang_url:
|
|
483
|
+
logging.info(f"Retrieved webhook base URL: {botrun_flow_lang_url}")
|
|
484
|
+
return botrun_flow_lang_url
|
|
485
|
+
else:
|
|
486
|
+
logging.error("botrun_flow_lang_url not found in API response")
|
|
487
|
+
return None
|
|
488
|
+
else:
|
|
489
|
+
logging.error(f"Failed to get botrun info: HTTP {response.status_code}")
|
|
490
|
+
return None
|
|
491
|
+
|
|
492
|
+
except Exception as e:
|
|
493
|
+
logging.error(f"Error getting webhook base URL: {e}")
|
|
494
|
+
return None
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
async def register_google_drive_webhook(
|
|
498
|
+
doc_link: str, hatch_id: str
|
|
499
|
+
) -> Tuple[bool, Optional[str], Optional[str]]:
|
|
500
|
+
"""
|
|
501
|
+
Register a webhook with Google Drive for a specific document.
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
doc_link: Google Doc link or ID
|
|
505
|
+
hatch_id: The hatch ID to associate with this webhook
|
|
506
|
+
|
|
507
|
+
Returns:
|
|
508
|
+
Tuple of (success, channel_id, resource_id)
|
|
509
|
+
"""
|
|
510
|
+
try:
|
|
511
|
+
# Get credentials path
|
|
512
|
+
credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_FOR_BOTRUN_DOC")
|
|
513
|
+
if not credentials_path:
|
|
514
|
+
logging.error("GOOGLE_APPLICATION_CREDENTIALS_FOR_BOTRUN_DOC not set")
|
|
515
|
+
return False, None, None
|
|
516
|
+
|
|
517
|
+
if not os.path.exists(credentials_path):
|
|
518
|
+
logging.error(f"Credentials file not found: {credentials_path}")
|
|
519
|
+
return False, None, None
|
|
520
|
+
|
|
521
|
+
# Extract document ID
|
|
522
|
+
doc_id = extract_google_doc_id_from_link(doc_link)
|
|
523
|
+
if not doc_id:
|
|
524
|
+
logging.error(f"Unable to extract Google Doc ID from: {doc_link}")
|
|
525
|
+
return False, None, None
|
|
526
|
+
|
|
527
|
+
# Get webhook base URL
|
|
528
|
+
base_url = await get_webhook_base_url()
|
|
529
|
+
if not base_url:
|
|
530
|
+
return False, None, None
|
|
531
|
+
|
|
532
|
+
# Construct webhook URL
|
|
533
|
+
webhook_url = f"{base_url}/api/hatch/webhook/google-drive"
|
|
534
|
+
|
|
535
|
+
# Run webhook registration in executor
|
|
536
|
+
loop = asyncio.get_event_loop()
|
|
537
|
+
result = await loop.run_in_executor(
|
|
538
|
+
None,
|
|
539
|
+
register_google_drive_webhook_sync,
|
|
540
|
+
credentials_path,
|
|
541
|
+
doc_id,
|
|
542
|
+
webhook_url,
|
|
543
|
+
hatch_id,
|
|
544
|
+
)
|
|
545
|
+
|
|
546
|
+
return result
|
|
547
|
+
|
|
548
|
+
except Exception as e:
|
|
549
|
+
logging.error(f"Error registering Google Drive webhook: {e}")
|
|
550
|
+
return False, None, None
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
def register_google_drive_webhook_sync(
|
|
554
|
+
credentials_path: str, doc_id: str, webhook_url: str, hatch_id: str
|
|
555
|
+
) -> Tuple[bool, Optional[str], Optional[str]]:
|
|
556
|
+
"""
|
|
557
|
+
Synchronous helper to register Google Drive webhook.
|
|
558
|
+
"""
|
|
559
|
+
try:
|
|
560
|
+
# Authenticate with Google Drive
|
|
561
|
+
drive_service, _ = authenticate_google_services(credentials_path)
|
|
562
|
+
|
|
563
|
+
# Generate unique channel ID
|
|
564
|
+
channel_id = f"hatch-{hatch_id}-{uuid.uuid4()}"
|
|
565
|
+
|
|
566
|
+
# Prepare webhook registration request
|
|
567
|
+
body = {
|
|
568
|
+
"id": channel_id,
|
|
569
|
+
"type": "web_hook",
|
|
570
|
+
"address": webhook_url,
|
|
571
|
+
"token": hatch_id, # Use hatch_id as token for identification
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
# Register the webhook
|
|
575
|
+
response = drive_service.files().watch(fileId=doc_id, body=body).execute()
|
|
576
|
+
|
|
577
|
+
channel_id = response.get("id")
|
|
578
|
+
resource_id = response.get("resourceId")
|
|
579
|
+
|
|
580
|
+
logging.info(
|
|
581
|
+
f"Successfully registered webhook for doc {doc_id}, channel: {channel_id}"
|
|
582
|
+
)
|
|
583
|
+
return True, channel_id, resource_id
|
|
584
|
+
|
|
585
|
+
except HttpError as e:
|
|
586
|
+
logging.error(f"Google API error registering webhook: {e}")
|
|
587
|
+
return False, None, None
|
|
588
|
+
except Exception as e:
|
|
589
|
+
logging.error(f"Error in sync webhook registration: {e}")
|
|
590
|
+
return False, None, None
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
async def unregister_google_drive_webhook(channel_id: str, resource_id: str) -> bool:
|
|
594
|
+
"""
|
|
595
|
+
Unregister a Google Drive webhook.
|
|
596
|
+
|
|
597
|
+
Args:
|
|
598
|
+
channel_id: The channel ID to stop
|
|
599
|
+
resource_id: The resource ID associated with the channel
|
|
600
|
+
|
|
601
|
+
Returns:
|
|
602
|
+
True if successful, False otherwise
|
|
603
|
+
"""
|
|
604
|
+
try:
|
|
605
|
+
credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_FOR_BOTRUN_DOC")
|
|
606
|
+
if not credentials_path:
|
|
607
|
+
logging.error("GOOGLE_APPLICATION_CREDENTIALS_FOR_BOTRUN_DOC not set")
|
|
608
|
+
return False
|
|
609
|
+
|
|
610
|
+
if not os.path.exists(credentials_path):
|
|
611
|
+
logging.error(f"Credentials file not found: {credentials_path}")
|
|
612
|
+
return False
|
|
613
|
+
|
|
614
|
+
loop = asyncio.get_event_loop()
|
|
615
|
+
result = await loop.run_in_executor(
|
|
616
|
+
None,
|
|
617
|
+
unregister_google_drive_webhook_sync,
|
|
618
|
+
credentials_path,
|
|
619
|
+
channel_id,
|
|
620
|
+
resource_id,
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
return result
|
|
624
|
+
|
|
625
|
+
except Exception as e:
|
|
626
|
+
logging.error(f"Error unregistering Google Drive webhook: {e}")
|
|
627
|
+
return False
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
def unregister_google_drive_webhook_sync(
|
|
631
|
+
credentials_path: str, channel_id: str, resource_id: str
|
|
632
|
+
) -> bool:
|
|
633
|
+
"""
|
|
634
|
+
Synchronous helper to unregister Google Drive webhook.
|
|
635
|
+
"""
|
|
636
|
+
try:
|
|
637
|
+
# Authenticate with Google Drive
|
|
638
|
+
drive_service, _ = authenticate_google_services(credentials_path)
|
|
639
|
+
|
|
640
|
+
# Prepare stop request
|
|
641
|
+
body = {"id": channel_id, "resourceId": resource_id}
|
|
642
|
+
|
|
643
|
+
# Stop the webhook
|
|
644
|
+
drive_service.channels().stop(body=body).execute()
|
|
645
|
+
|
|
646
|
+
logging.info(f"Successfully unregistered webhook channel: {channel_id}")
|
|
647
|
+
return True
|
|
648
|
+
|
|
649
|
+
except HttpError as e:
|
|
650
|
+
logging.error(f"Google API error unregistering webhook: {e}")
|
|
651
|
+
return False
|
|
652
|
+
except Exception as e:
|
|
653
|
+
logging.error(f"Error in sync webhook unregistration: {e}")
|
|
654
|
+
return False
|