dts-dance 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dtsdance/dsyncer.py ADDED
@@ -0,0 +1,301 @@
1
+ from .bytecloud import ByteCloudHelper
2
+ from typing import Any, NamedTuple, Tuple, cast, Optional
3
+ from loguru import logger
4
+ import requests
5
+ import re
6
+ from datetime import datetime
7
+
8
+ DSyncer_Task_Detail_URL = "{endpoint}/dsyncer/tasks/all?task_id_list={task_id}"
9
+ DSyncer_Backlog_Dashboard_URL = "https://grafana.sretools.bytedance.net/d/b1cad70c-8ce3-490c-be48-fbcab32c7a2e/dsyncer-backlog?orgId=1&from=now-1h&to=now&timezone=browser&refresh=1m"
10
+ DSyncer_N8N_OPT_URL = "https://n8n.sretools.bytedance.net/webhook/Xt73CZPpKjMu?opt={opt}&env={env}&task_id={task_id}"
11
+
12
+
13
+ class DSyncerEnvInfo(NamedTuple):
14
+ name: str
15
+ token: str
16
+
17
+
18
+ class DSyncerHelper:
19
+
20
+ def __init__(self, envs: dict[str, DSyncerEnvInfo], bytecloud_helper: ByteCloudHelper) -> None:
21
+ self.envs = envs
22
+ self.bytecloud_helper = bytecloud_helper
23
+
24
+ def _build_headers(self, env: str, secret_api: bool = False) -> dict[str, str]:
25
+ """
26
+ 构建请求头
27
+
28
+ Args:
29
+ env: 环境名称
30
+ include_auth: 是否包含 Authorization 头
31
+
32
+ Returns:
33
+ dict[str, str]: 请求头字典
34
+ """
35
+ jwt_token = self.bytecloud_helper.get_jwt_token(env)
36
+ headers = {"X-Jwt-Token": jwt_token, "x-bcgw-vregion": env}
37
+
38
+ if secret_api:
39
+ token = self.envs[env].token
40
+ headers["Authorization"] = f"Token {token}"
41
+
42
+ return headers
43
+
44
+ def _make_request(self, method: str, url: str, headers: dict[str, str], json_data: Optional[dict] = None) -> dict[str, Any]:
45
+ """
46
+ 发送 HTTP 请求的通用方法
47
+
48
+ Args:
49
+ method: HTTP 方法 (GET/POST)
50
+ url: 请求 URL
51
+ headers: 请求头
52
+ json_data: POST 请求的 JSON 数据
53
+ operation_name: 操作名称,用于日志记录
54
+
55
+ Returns:
56
+ dict[str, Any]: 解析后的 JSON 响应
57
+ """
58
+ response = None
59
+ try:
60
+ if method.upper() == "GET":
61
+ response = requests.get(url, headers=headers)
62
+ elif method.upper() == "POST":
63
+ response = requests.post(url, json=json_data, headers=headers)
64
+ else:
65
+ raise ValueError(f"不支持的 HTTP 方法: {method}")
66
+
67
+ # 检查响应状态码
68
+ # response.raise_for_status()
69
+
70
+ # 解析 JSON 响应
71
+ return response.json()
72
+
73
+ except Exception as e:
74
+ error_msg = f"_make_request occur error, error: {e}"
75
+ if response is not None:
76
+ error_msg += f", response.text: {response.text}"
77
+ logger.warning(error_msg)
78
+ raise
79
+
80
+ def _acquire_task_info(self, env: str, task_id: str) -> dict[str, str]:
81
+ """
82
+ 获取 DSyncer 任务信息
83
+
84
+ Args:
85
+ env: 环境名称
86
+ task_id: DSyncer 任务 ID
87
+
88
+ Returns:
89
+ dict[str, str]: DSyncer 任务的 rocket_mq_connection 信息,只包含 cluster、topic 和 group 字段
90
+ """
91
+ # 构建 API URL
92
+ env_info = self.bytecloud_helper.get_env_info(env)
93
+ url = f"{env_info.endpoint}/api/v1/dsyncer/openapi/taskinfo/{task_id}/"
94
+
95
+ # 准备请求头
96
+ headers = self._build_headers(env)
97
+
98
+ return self._make_request("GET", url, headers)
99
+
100
+ def get_dflow_task_info(self, env: str, task_id: str) -> tuple[str, str]:
101
+ """
102
+ 获取迁移后的 DFlow 任务信息
103
+ """
104
+ # 构建 API URL
105
+ env_info = self.bytecloud_helper.get_env_info(env)
106
+ url = f"{env_info.endpoint}/api/v1/dsyncer/openapi/taskinfo/{task_id}/migrate/"
107
+
108
+ # 准备请求头
109
+ headers = self._build_headers(env)
110
+
111
+ json_data = self._make_request("GET", url, headers)
112
+
113
+ message = json_data.get("message", "")
114
+ logger.debug(f"get task migrate info {env} {task_id}, message: {message}")
115
+
116
+ # 从消息中提取 DFlow 任务 URL 和 ID
117
+ # 消息格式: "任务已迁移至ByteDTS平台,请前往[https://cloud.bytedance.net/bytedts/datasync/detail/93127366537986?scope=China-North&tabKey=DetailInfo]查看详情"
118
+
119
+ # 提取完整的 URL
120
+ url_pattern = r"\[(https://[^\]]+)\]"
121
+ url_match = re.search(url_pattern, message)
122
+
123
+ # 提取任务 ID
124
+ id_pattern = r"/bytedts/datasync/detail/(\d+)"
125
+ id_match = re.search(id_pattern, message)
126
+
127
+ if url_match and id_match:
128
+ dflow_task_url = url_match.group(1)
129
+ dflow_task_id = id_match.group(1)
130
+ logger.info(f"extracted dflow task_url: {dflow_task_url}, task_id: {dflow_task_id}")
131
+ return (dflow_task_url, dflow_task_id)
132
+ else:
133
+ logger.warning(f"could not extract dflow task info from message: {message}")
134
+ return ("", "")
135
+
136
+ def generate_task_url(self, env: str, task_id: str) -> str:
137
+ """
138
+ 获取 DSyncer 任务详情页面的 URL
139
+
140
+ Args:
141
+ env: 环境名称
142
+ task_id: DSyncer 任务 ID
143
+
144
+ Returns:
145
+ str: DSyncer 任务详情页面的 URL
146
+ """
147
+ env_info = self.bytecloud_helper.get_env_info(env)
148
+ return DSyncer_Task_Detail_URL.format(endpoint=env_info.endpoint, task_id=task_id)
149
+
150
+ def generate_task_grafana_url(self, task_id: str, change_time: str) -> str:
151
+ """
152
+ 获取 DSyncer 任务的 Grafana 监控页面 URL
153
+ """
154
+ try:
155
+ # 将 change_time 字符串转换为毫秒时间戳
156
+ dt = datetime.strptime(change_time, "%Y-%m-%d %H:%M:%S")
157
+ change_time_timestamp = int(dt.timestamp() * 1000)
158
+ except ValueError as e:
159
+ logger.warning(f"无法解析时间格式 '{change_time}': {e},使用当前时间戳")
160
+ change_time_timestamp = int(datetime.now().timestamp() * 1000)
161
+
162
+ return f"{DSyncer_Backlog_Dashboard_URL}&var-change_time={change_time_timestamp}&var-task_id={task_id}"
163
+
164
+ def get_task_info(self, env: str, task_id: str) -> dict[str, Any]:
165
+ """
166
+ 获取 DSyncer 任务状态
167
+
168
+ Args:
169
+ env: 环境名称
170
+ task_id: DSyncer 任务 ID
171
+
172
+ Returns:
173
+ dict[str, str]: DSyncer 任务状态
174
+ """
175
+ json_data = self._acquire_task_info(env, task_id)
176
+ if json_data.get("code") == 400 and "message" in json_data:
177
+ raise Exception(json_data.get("message", "未知错误"))
178
+
179
+ logger.info(f"get_task_info success {env} {task_id}")
180
+
181
+ try:
182
+ data = cast(dict, json_data.get("data", {}))
183
+ increment_task = cast(dict, data.get("increment_task", {}))
184
+ rocket_mq_connection = cast(dict, increment_task.get("rocket_mq_connection", {}))
185
+ filtered_data = {
186
+ "task_id": data.get("task_id", ""),
187
+ "status": data.get("status", ""),
188
+ "desc": data.get("desc", ""),
189
+ "scene": data.get("scene", ""),
190
+ "psm": data.get("psm", ""),
191
+ "increment_task_id": increment_task.get("task_id", ""),
192
+ "mq_info": {
193
+ "cluster": rocket_mq_connection.get("cluster", ""),
194
+ "topic": rocket_mq_connection.get("topic", ""),
195
+ "group": rocket_mq_connection.get("group", ""),
196
+ },
197
+ }
198
+
199
+ return filtered_data
200
+ except (KeyError, AttributeError, Exception) as e:
201
+ raise Exception(f"无法从响应中提取 DSyncer 任务状态数据: {str(e)}")
202
+
203
+ def is_task_migrate_running(self, env: str, task_id: str) -> Tuple[bool, str]:
204
+ """
205
+ 检查 DSyncer 任务是否正在迁移中
206
+
207
+ Returns:
208
+ bool: 如果任务正在迁移中,返回 True;否则返回 False
209
+ """
210
+ # 构建 API URL
211
+ env_info = self.bytecloud_helper.get_env_info(env)
212
+ url = f"{env_info.endpoint}/api/v1/dsyncer/secret_api/task/migrate/check"
213
+
214
+ # 准备请求头
215
+ headers = self._build_headers(env, secret_api=True)
216
+
217
+ json_data = {"task_id": task_id}
218
+
219
+ response_data = self._make_request("POST", url, headers, json_data)
220
+
221
+ message = response_data.get("message", "")
222
+ logger.debug(f"get task migrate status {env} {task_id}, message: {message}")
223
+ if "task migrate is running" in message:
224
+ return True, ""
225
+ else:
226
+ return False, response_data.get("data", {}).get("msg", {})
227
+
228
+ def migration_rollback(self, env: str, task_id: str) -> bool:
229
+ """
230
+ 执行回滚
231
+ """
232
+ # 构建 API URL
233
+ env_info = self.bytecloud_helper.get_env_info(env)
234
+ url = f"{env_info.endpoint}/api/v1/dsyncer/secret_api/task/rollback_migrate2dsyncer/"
235
+
236
+ # 准备请求头
237
+ headers = self._build_headers(env, secret_api=True)
238
+
239
+ json_data = {"task_id_list": [task_id]}
240
+
241
+ response_data = self._make_request("POST", url, headers, json_data)
242
+
243
+ logger.debug(f"migration_mark_rollback return {env} {task_id}, json_data: {response_data}")
244
+ success_task = response_data.get("data", {}).get("success_task", [])
245
+ return task_id in success_task
246
+
247
+ def migration_mark_success(self, env: str, task_id: str) -> bool:
248
+ """
249
+ 标记迁移成功
250
+ """
251
+ # 构建 API URL
252
+ env_info = self.bytecloud_helper.get_env_info(env)
253
+ url = f"{env_info.endpoint}/api/v1/dsyncer/secret_api/task/mark_migrate_success/"
254
+
255
+ # 准备请求头
256
+ headers = self._build_headers(env, secret_api=True)
257
+
258
+ json_data = {"task_id": task_id}
259
+
260
+ response_data = self._make_request("POST", url, headers, json_data)
261
+
262
+ logger.debug(f"migration_mark_success return {env} {task_id}, json_data: {response_data}")
263
+ success_task = response_data.get("data", {}).get("success_task", [])
264
+ return task_id in success_task
265
+
266
+ def migrate_task(self, env: str, task_id: str, app_parallel: int) -> Optional[str]:
267
+ """
268
+ 迁移任务到DFlow
269
+
270
+ Returns:
271
+ Optional[str]: 错误信息,成功时返回None,失败时返回错误信息
272
+ """
273
+ # 构建 API URL
274
+ env_info = self.bytecloud_helper.get_env_info(env)
275
+ url = f"{env_info.endpoint}/api/v1/dsyncer/secret_api/task/migrate2dflow/single"
276
+
277
+ # 准备请求头
278
+ headers = self._build_headers(env, secret_api=True)
279
+
280
+ json_data = {
281
+ "task_id": task_id,
282
+ "delay_threshold": "20s",
283
+ "dsyncer_task_pause_threshold": "180s",
284
+ "dflow_delay_threshold": "1m",
285
+ "dflow_diff_threshold": 100,
286
+ "check_minutes": 10,
287
+ "enable_psm": True,
288
+ "disable_check": True,
289
+ "app_parallel": app_parallel,
290
+ }
291
+
292
+ response_data = self._make_request("POST", url, headers, json_data)
293
+
294
+ logger.debug(f"migrate_task return {env} {task_id}, json_data: {response_data}")
295
+ message = response_data.get("message")
296
+ if response_data.get("code") == 0 and message == "ok":
297
+ err_message = None
298
+ else:
299
+ err_message = message
300
+
301
+ return err_message
@@ -0,0 +1,107 @@
1
+ from datetime import datetime, timedelta
2
+ import threading
3
+ from typing import Optional
4
+ from loguru import logger
5
+ import requests
6
+
7
+ URL_FEISHU_OPEN_API: str = "https://fsopen.bytedance.net/open-apis"
8
+
9
+
10
+ class FeishuBase:
11
+ """飞书基础类,传入机器人认证信息,支持自动token续期管理"""
12
+
13
+ def __init__(self, bot_app_id: str, bot_app_secret: str):
14
+ """
15
+ 初始化飞书基础类
16
+ """
17
+ self.tenant_access_token: Optional[str] = None
18
+ self.token_expire_time: Optional[datetime] = None
19
+ self._token_lock = threading.Lock()
20
+
21
+ self.bot_app_id = bot_app_id
22
+ self.bot_app_secret = bot_app_secret
23
+
24
+ def _get_tenant_access_token(self) -> str:
25
+ """
26
+ 获取tenant_access_token
27
+
28
+ Returns:
29
+ tenant_access_token字符串
30
+
31
+ Raises:
32
+ Exception: 获取token失败时抛出异常
33
+ """
34
+ url = f"{URL_FEISHU_OPEN_API}/auth/v3/tenant_access_token/internal"
35
+ payload = {"app_id": self.bot_app_id, "app_secret": self.bot_app_secret}
36
+ headers = {"Content-Type": "application/json"}
37
+
38
+ try:
39
+ response = requests.post(url, json=payload, headers=headers)
40
+ response.raise_for_status()
41
+ result = response.json()
42
+ if result.get("code") != 0:
43
+ raise Exception(f"获取tenant_access_token失败: {result.get('msg')}")
44
+
45
+ token = result.get("tenant_access_token")
46
+ expire_in = result.get("expire", 7200) # 默认2小时过期
47
+
48
+ # 设置过期时间,提前5分钟刷新
49
+ self.token_expire_time = datetime.now() + timedelta(seconds=expire_in - 300)
50
+
51
+ logger.info(f"成功获取tenant_access_token,过期时间: {self.token_expire_time}")
52
+ return token
53
+
54
+ except requests.RequestException as e:
55
+ logger.warning(f"请求tenant_access_token失败: {e}")
56
+ raise Exception(f"网络请求失败: {e}")
57
+ except Exception as e:
58
+ logger.warning(f"获取tenant_access_token失败: {e}")
59
+ raise
60
+
61
+ def get_access_token(self) -> str:
62
+ """
63
+ 获取有效的access token,自动处理续期
64
+
65
+ Returns:
66
+ 有效的tenant_access_token
67
+ """
68
+ with self._token_lock:
69
+ # 检查token是否存在或已过期
70
+ if self.tenant_access_token is None or self.token_expire_time is None or datetime.now() >= self.token_expire_time:
71
+
72
+ logger.info("Token不存在或已过期,重新获取")
73
+ self.tenant_access_token = self._get_tenant_access_token()
74
+
75
+ return self.tenant_access_token
76
+
77
+ def make_request(self, method: str, endpoint: str, **kwargs) -> requests.Response:
78
+ """
79
+ 发起API请求的通用方法
80
+
81
+ Args:
82
+ method: HTTP方法
83
+ endpoint: API端点
84
+ **kwargs: 其他请求参数
85
+
86
+ Returns:
87
+ Response对象
88
+ """
89
+ url = f"{URL_FEISHU_OPEN_API}{endpoint}"
90
+
91
+ # 获取有效token
92
+ token = self.get_access_token()
93
+ # logger.debug(f"使用token: {token} 发送请求: {method} {url}")
94
+
95
+ # 设置认证头
96
+ headers = kwargs.get("headers", {"Content-Type": "application/json"})
97
+ headers["Authorization"] = f"Bearer {token}"
98
+ kwargs["headers"] = headers
99
+
100
+ try:
101
+ response = requests.request(method, url, **kwargs)
102
+ # logger.debug(f"response_json: {response.json()}")
103
+ response.raise_for_status()
104
+ return response
105
+ except requests.RequestException as e:
106
+ logger.warning(f"API请求失败: {method} {url}, error: {e}")
107
+ raise
@@ -0,0 +1,224 @@
1
+ from datetime import datetime
2
+ import threading
3
+ from typing import Callable, Optional, Dict, Any
4
+ from loguru import logger
5
+ import requests
6
+
7
+ from .feishu_base import FeishuBase
8
+
9
+ URL_FEISHU_OPEN_API: str = "https://fsopen.bytedance.net/open-apis"
10
+
11
+
12
+ class FeishuTable:
13
+ """飞书表格类,支持自动token续期管理"""
14
+
15
+ def __init__(self, feishu_base: FeishuBase, table_app_token: str, table_id: str):
16
+ """
17
+ 初始化飞书表格
18
+ """
19
+ self.tenant_access_token: Optional[str] = None
20
+ self.token_expire_time: Optional[datetime] = None
21
+ self._token_lock = threading.Lock()
22
+
23
+ self.feishu_base = feishu_base
24
+ self.table_app_token = table_app_token
25
+ self.table_id = table_id
26
+
27
+ def get_app_table_record_id(self, table_view_id: str, task_id: str) -> Optional[str]:
28
+ """
29
+ 根据任务ID查询飞书多维表格记录,返回记录ID
30
+ """
31
+ # 构建请求体
32
+ payload = {
33
+ "view_id": table_view_id,
34
+ "filter": {"conditions": [{"field_name": "任务id", "operator": "is", "value": [task_id]}], "conjunction": "and"},
35
+ }
36
+
37
+ try:
38
+ endpoint = f"/bitable/v1/apps/{self.table_app_token}/tables/{self.table_id}/records/search"
39
+ response = self.feishu_base.make_request("POST", endpoint, json=payload)
40
+ result = response.json()
41
+ if result.get("code") != 0:
42
+ logger.warning(f"get_app_table_record_id {task_id} 失败: {result}")
43
+ return None
44
+
45
+ # 提取记录数据
46
+ data = result.get("data", {})
47
+ items = data.get("items", [])
48
+ if not items:
49
+ logger.warning(f"get_app_table_record_id {task_id} 失败: items为空")
50
+ return None
51
+
52
+ # 返回第一个匹配记录的record_id
53
+ record_id = items[0].get("record_id")
54
+ logger.info(f"成功查询到任务ID {task_id} 对应的记录ID: {record_id}")
55
+
56
+ return record_id
57
+
58
+ except Exception as e:
59
+ logger.warning(f"查询 {task_id} 表格记录失败: {e}")
60
+ return None
61
+
62
+ def update_app_table_record(self, record_id: str, fields: Dict[str, Any]) -> bool:
63
+ """
64
+ 更新飞书多维表格记录
65
+
66
+ Args:
67
+ record_id: 记录ID
68
+ fields: 要更新的字段,格式为 {"字段名": "字段值"}
69
+
70
+ Returns:
71
+ 是否更新成功
72
+ """
73
+ # 构建请求体
74
+ payload = {"fields": fields}
75
+
76
+ try:
77
+ endpoint = f"/bitable/v1/apps/{self.table_app_token}/tables/{self.table_id}/records/{record_id}"
78
+ response = self.feishu_base.make_request("PUT", endpoint, json=payload)
79
+ result = response.json()
80
+ if result.get("code") != 0:
81
+ logger.warning(f"更新表格记录失败: {result.get('msg')}")
82
+ return False
83
+
84
+ logger.info(f"成功更新记录ID {record_id},字段: {fields}")
85
+ return True
86
+
87
+ except Exception as e:
88
+ logger.warning(f"更新表格记录失败: {e}")
89
+ return False
90
+
91
+ def fetch_records(self, table_view_id: str, field_names: list[str], page_size: int = 100, page_token: Optional[str] = None) -> Dict[str, Any]:
92
+ """
93
+ 获取表格记录
94
+
95
+ Args:
96
+ page_size: 每页记录数,最大500
97
+ page_token: 分页标记
98
+
99
+ Returns:
100
+ API响应数据
101
+ """
102
+ endpoint = f"/bitable/v1/apps/{self.table_app_token}/tables/{self.table_id}/records/search"
103
+ params = {"page_size": page_size, "user_id_type": "open_id"}
104
+ if page_token:
105
+ params["page_token"] = page_token
106
+
107
+ payload = {"field_names": field_names, "view_id": table_view_id}
108
+
109
+ try:
110
+ response = self.feishu_base.make_request("POST", endpoint, json=payload, params=params)
111
+ result = response.json()
112
+ if result.get("code") != 0:
113
+ raise Exception(f"API请求失败: {result.get('msg')}")
114
+
115
+ return result.get("data", {})
116
+
117
+ except requests.RequestException as e:
118
+ logger.error(f"请求失败: {e}")
119
+ raise
120
+ except Exception as e:
121
+ logger.error(f"获取记录失败: {e}")
122
+ raise
123
+
124
+ def parse_record(self, record: Dict[str, Any], field_names: Optional[list[str]] = None) -> Dict[str, str]:
125
+ """
126
+ 解析单条记录,动态提取指定字段
127
+
128
+ Args:
129
+ record: 飞书表格记录
130
+ field_names: 需要解析的字段名列表,如果为None则解析所有字段
131
+
132
+ Returns:
133
+ 解析后的记录信息,包含record_id和指定的字段
134
+ """
135
+ fields = record.get("fields", {})
136
+ result = {"record_id": record.get("record_id", "")}
137
+
138
+ # 如果没有指定字段名,则解析所有字段
139
+ target_fields = field_names if field_names else list(fields.keys())
140
+
141
+ # 动态解析每个字段
142
+ for field_name in target_fields:
143
+ field_value = fields.get(field_name, [])
144
+
145
+ # 处理文本类型字段(列表格式)
146
+ if field_value and isinstance(field_value, list) and len(field_value) > 0:
147
+ # 如果是字典且包含text字段
148
+ if isinstance(field_value[0], dict) and "text" in field_value[0]:
149
+ result[field_name] = field_value[0].get("text", "")
150
+ else:
151
+ # 其他类型直接取第一个元素
152
+ result[field_name] = str(field_value[0])
153
+ else:
154
+ # 空值或非列表类型
155
+ result[field_name] = field_value
156
+
157
+ return result
158
+
159
+ def loop_all(
160
+ self, table_view_id: str, field_names: list[str], callback: Optional[Callable[[Dict[str, str]], None]] = None, limit: Optional[int] = None
161
+ ):
162
+ """
163
+ 遍历视图中的所有记录
164
+
165
+ Args:
166
+ callback: 可选的回调函数,对每条记录进行处理
167
+ limit: 可选的记录处理限制数量
168
+ """
169
+ logger.info("开始获取飞书多维表格数据...")
170
+
171
+ page_count = 0
172
+ total_count = 0
173
+ processed_count = 0
174
+ page_token = None
175
+
176
+ while True:
177
+ page_count += 1
178
+ logger.info(f"正在获取第 {page_count} 页数据...")
179
+
180
+ try:
181
+ data = self.fetch_records(table_view_id, field_names, page_size=200, page_token=page_token)
182
+ items = data.get("items", [])
183
+ size = len(items)
184
+ total_count += size
185
+ page_token = data.get("page_token")
186
+ logger.info(f"第 {page_count} 页获取到 {size} 条记录,next page_token: {page_token}")
187
+
188
+ # 处理每条记录
189
+ for item in items:
190
+ logger.info(f"正在处理第 {processed_count + 1} 条数据...")
191
+ # 检查是否达到处理限制
192
+ if limit and processed_count >= limit:
193
+ logger.info(f"已达到处理限制 {limit},停止处理")
194
+ break
195
+
196
+ try:
197
+ parsed_record = self.parse_record(item)
198
+ processed_count += 1
199
+
200
+ # 如果提供了回调函数,则调用它
201
+ if callback:
202
+ callback(parsed_record)
203
+ else:
204
+ # 默认行为:打印记录信息
205
+ logger.info(f"任务ID: {parsed_record['task_id']}")
206
+
207
+ except Exception as e:
208
+ logger.error(f"处理记录失败: {e}, 记录: {item}")
209
+ continue
210
+
211
+ # 如果达到处理限制,退出外层循环
212
+ if limit and processed_count >= limit:
213
+ break
214
+
215
+ # 检查是否还有更多数据
216
+ if not data.get("has_more", False) or not page_token:
217
+ break
218
+
219
+ except Exception as e:
220
+ logger.error(f"获取第 {page_count} 页数据失败: {e}")
221
+ break
222
+
223
+ logger.info(f"总共获取到 {total_count} 条记录,成功处理 {processed_count} 条")
224
+ logger.info("数据遍历完成")