alibaba-cloud-ops-mcp-server 0.9.9__py3-none-any.whl → 0.9.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,748 @@
1
+ import re
2
+ import logging
3
+
4
+ from alibaba_cloud_ops_mcp_server.tools.api_tools import _tools_api_call
5
+ from pathlib import Path
6
+
7
+ from pydantic import Field
8
+ from typing import Optional, Tuple
9
+ import json
10
+ import time
11
+ from alibabacloud_oos20190601.client import Client as oos20190601Client
12
+ from alibabacloud_oos20190601 import models as oos_20190601_models
13
+ from alibabacloud_ecs20140526 import models as ecs_20140526_models
14
+ from alibabacloud_ecs20140526.client import Client as ecs20140526Client
15
+ from alibaba_cloud_ops_mcp_server.tools import oss_tools
16
+ from alibaba_cloud_ops_mcp_server.alibabacloud.utils import (
17
+ ensure_code_deploy_dirs,
18
+ load_application_info,
19
+ save_application_info,
20
+ get_release_path,
21
+ create_client,
22
+ create_ecs_client,
23
+ put_bucket_tagging,
24
+ find_bucket_by_tag,
25
+ get_or_create_bucket_for_code_deploy,
26
+ set_project_path,
27
+ )
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ APPLICATION_MANAGEMENT_REGION_ID = 'cn-hangzhou'
32
+ DEPLOYING_STATUSES = ['Deploying', 'Releasing']
33
+ SUCCESS_STATUSES = ['Deployed', 'Released']
34
+ FAILED_STATUSES = ['DeployFailed', 'ReleaseFailed']
35
+ END_STATUSES = SUCCESS_STATUSES + FAILED_STATUSES
36
+
37
+ tools = []
38
+
39
+
40
+ def _append_tool(func):
41
+ tools.append(func)
42
+ return func
43
+
44
+
45
+ @_append_tool
46
+ def OOS_CodeDeploy(
47
+ name: str = Field(description='name of the application'),
48
+ deploy_region_id: str = Field(description='Region ID for deployment'),
49
+ application_group_name: str = Field(description='name of the application group'),
50
+ region_id_oss: str = Field(description='OSS region ID'),
51
+ object_name: str = Field(description='OSS object name'),
52
+ file_path: str = Field(description='Local file path to upload. If the file is not in '
53
+ '.code_deploy/release directory, it will be copied there.'),
54
+ is_internal_oss: bool = Field(description='Whether to download OSS files through internal network. Note: '
55
+ 'If you choose internal network download, you must ensure that '
56
+ 'the ECS to be deployed and OSS are in the same region.'),
57
+ application_start: str = Field(
58
+ description='Application start command script. IMPORTANT: If the uploaded artifact '
59
+ 'is a tar archive or compressed package (e.g., .tar, .tar.gz, .zip), '
60
+ 'you MUST first extract it and navigate into the corresponding directory'
61
+ ' before executing the start command. The start command must correspond '
62
+ 'to the actual structure of the extracted artifact. For example, if you '
63
+ 'upload a tar.gz file containing a Java application, first extract it '
64
+ 'with "tar -xzf <filename>.tar.gz", then cd into the extracted '
65
+ 'directory, and then run the start command (e.g., "java -jar app.jar" '
66
+ 'or "./start.sh"). Ensure the start command matches the actual '
67
+ 'executable or script in the extracted artifact to avoid deployment '
68
+ 'failures.'),
69
+ application_stop: str = Field(description='Application stop command script'),
70
+ deploy_language: str = Field(description='Deploy language, like:docker, java, python, nodejs, golang'),
71
+ port: int = Field(description='Application listening port'),
72
+ project_path: Optional[str] = Field(description='Root path of the project. The .code_deploy '
73
+ 'directory will be created in this path. '
74
+ 'If not provided, will try to infer from file_path '
75
+ 'or use current working directory.'),
76
+ instance_ids: list = Field(description='AlibabaCloud ECS instance ID List. If empty or not provided, user '
77
+ 'will be prompted to create ECS instances.', default=None)
78
+
79
+ ):
80
+ """
81
+ 通过应用管理 API 部署应用到 ECS 实例。
82
+
83
+ 完整部署流程(在调用此工具之前):
84
+
85
+ 步骤 1:识别部署方式
86
+ - 通过本地文件操作工具读取项目文件(package.json、requirements.txt、pom.xml 等)
87
+ - 识别项目的部署方式和技术栈(npm、python、java、go 等),项目的部署语言可以作为参数:deploy_language 传入
88
+ - 生成构建命令,注意,该构建命令不需要生成构建脚本,不要因此新增sh文件,任何情况下都不要,因为构建命令是CodeDeploy的参数,不需要生成文件
89
+
90
+ 步骤 2:构建或压缩文件,并记录文件路径
91
+ - 在本地执行构建命令,生成部署产物(tar.gz、zip 等压缩包)
92
+ - 将构建产物保存到 .code_deploy/release 目录下
93
+ - 记录文件路径,留待后续CodeDeploy使用
94
+
95
+ 步骤 3:调用此工具进行部署
96
+ - 此工具会依次调用:CreateApplication(如果不存在)、CreateApplicationGroup(如果不存在)、
97
+ TagResources(可选,如果是已有资源需要打 tag 导入应用分组)、DeployApplicationGroup
98
+
99
+ 重要提示:
100
+ 1. 启动脚本(application_start)必须与上传的产物对应。如果产物是压缩包(tar、tar.gz、zip等),
101
+ 需要先解压并进入对应目录后再执行启动命令。
102
+ 2. 示例:如果上传的是 app.tar.gz,启动脚本应该类似,一般压缩包就在当前目录下,直接解压即可:
103
+ "tar -xzf app.tar.gz && ./start.sh"
104
+ 或者如果解压后是Java应用:
105
+ "tar -xzf app.tar.gz && java -jar app.jar"
106
+ 3. 确保启动命令能够正确找到并执行解压后的可执行文件或脚本,避免部署失败。启动命令应该将程序运行在后台并打印日志到指定文件,
107
+ 注意使用非交互式命令,比如unzip -o等自动覆盖的命令,无需交互
108
+ 例如:
109
+ - npm 程序示例:
110
+ "tar -xzf app.tar.gz && nohup npm start > /root/app.log 2>&1 &"
111
+ 或者分别输出标准输出和错误日志:
112
+ "tar -xzf app.tar.gz && nohup npm start > /root/app.log 2> /root/app.error.log &"
113
+ - Java 程序示例:
114
+ "tar -xzf app.tar.gz && nohup java -jar app.jar > /root/app.log 2>&1 &"
115
+ - Python 程序示例:
116
+ "tar -xzf app.tar.gz && nohup python app.py > /root/app.log 2>&1 &"
117
+ 说明:使用 nohup 命令可以让程序在后台运行,即使终端关闭也不会终止;> 重定向标准输出到日志文件;2>&1 将标准错误也重定向到同一文件;& 符号让命令在后台执行。
118
+ 4. 应用和应用分组会自动检查是否存在,如果存在则跳过创建,避免重复创建错误。
119
+ 5. 如果未提供 ECS 实例 ID,工具会返回提示信息,引导用户到 ECS 控制台创建实例。
120
+ 6. 部署完成后,部署信息会保存到项目根目录下的 .code_deploy/.application.json 文件中。
121
+ 7. project_path 参数用于指定项目根目录,.code_deploy 目录将在此路径下创建。如果不提供,将尝试从 file_path 推断或使用当前工作目录。
122
+
123
+ 创建完成后,你应该以markdown的形式向用户展示你获取的service link,方便用户跳转
124
+ """
125
+ # Set project path if provided
126
+ if project_path:
127
+ set_project_path(project_path)
128
+ logger.info(f"[code_deploy] Project path set to: {project_path}")
129
+ else:
130
+ # Try to infer project path from file_path
131
+ file_path_obj = Path(file_path)
132
+ if not file_path_obj.is_absolute():
133
+ file_path_obj = Path.cwd() / file_path_obj
134
+ file_path_resolved = file_path_obj.resolve()
135
+
136
+ # Try to find project root by looking for common project files
137
+ current_dir = file_path_resolved.parent
138
+ project_root = None
139
+ project_indicators = ['package.json', 'pom.xml', 'requirements.txt', 'go.mod', 'Cargo.toml', '.git']
140
+
141
+ # Search up to 5 levels for project root
142
+ for _ in range(5):
143
+ if any((current_dir / indicator).exists() for indicator in project_indicators):
144
+ project_root = current_dir
145
+ break
146
+ parent = current_dir.parent
147
+ if parent == current_dir: # Reached filesystem root
148
+ break
149
+ current_dir = parent
150
+
151
+ if project_root:
152
+ set_project_path(str(project_root))
153
+ logger.info(f"[code_deploy] Inferred project path from file_path: {project_root}")
154
+ else:
155
+ # Use the directory containing the file as project root
156
+ set_project_path(str(file_path_resolved.parent))
157
+ logger.info(f"[code_deploy] Using file directory as project path: {file_path_resolved.parent}")
158
+
159
+ # Check ECS instance ID
160
+ if not instance_ids or len(instance_ids) == 0:
161
+ ecs_purchase_link = f'https://ecs-buy.aliyun.com/ecs#/custom/prepay/{deploy_region_id}?orderSource=buyWizard-console-list'
162
+ security_group_link = f'https://ecs.console.aliyun.com/securityGroup?regionId={deploy_region_id}'
163
+ port_info = f'port {port}' if port else 'application port'
164
+ return {
165
+ 'error': 'ECS_INSTANCE_REQUIRED',
166
+ 'message': f'ECS instance ID not provided. Please create ECS instances first before deployment.',
167
+ 'region_id': deploy_region_id,
168
+ 'ecs_purchase_link': ecs_purchase_link,
169
+ 'security_group_link': security_group_link,
170
+ 'instructions': f'''
171
+ ## ECS Instance Creation Required
172
+
173
+ **Deployment Region**: {deploy_region_id}
174
+
175
+ ### Step 1: Create ECS Instances
176
+ Please visit the following link to create ECS instances:
177
+ [{ecs_purchase_link}]({ecs_purchase_link})
178
+
179
+ After creation, please provide the ECS instance ID list.
180
+
181
+ ### Step 2: Configure Security Group (Post-deployment Operation)
182
+ After deployment, you need to open {port_info} for the ECS instance's security group. Please visit:
183
+ [{security_group_link}]({security_group_link})
184
+
185
+ Add inbound rules in the security group rules:
186
+ - Port range: {port}/{port} (if port is specified)
187
+ - Protocol type: TCP
188
+ - Authorized object: 0.0.0.0/0 (or restrict access source as needed)
189
+ '''
190
+ }
191
+
192
+ # 校验 ECS 实例是否存在
193
+ logger.info(f"[code_deploy] Validating ECS instances: {instance_ids}")
194
+ all_exist, missing_instance_ids = _check_ecs_instances_exist(deploy_region_id, instance_ids)
195
+ if not all_exist:
196
+ return {
197
+ 'error': 'ECS_INSTANCE_NOT_FOUND',
198
+ 'message': f'Some ECS instances do not exist in region {deploy_region_id}.',
199
+ 'region_id': deploy_region_id,
200
+ 'missing_instance_ids': missing_instance_ids,
201
+ 'provided_instance_ids': instance_ids,
202
+ 'instructions': f'''
203
+ ## ECS Instance Validation Failed
204
+
205
+ **Deployment Region**: {deploy_region_id}
206
+
207
+ **Missing Instance IDs**: {', '.join(missing_instance_ids)}
208
+
209
+ **All Provided Instance IDs**: {', '.join(instance_ids)}
210
+
211
+ Please verify that:
212
+ 1. The instance IDs are correct
213
+ 2. The instances exist in region {deploy_region_id}
214
+ 3. You have permission to access these instances
215
+
216
+ You can check your instances at:
217
+ https://ecs.console.aliyun.com/?regionId={deploy_region_id}#/server/instance
218
+ '''
219
+ }
220
+
221
+ ensure_code_deploy_dirs()
222
+
223
+ # Process file path: if file is not in release directory, copy it to release directory
224
+ file_path_obj = Path(file_path)
225
+ if not file_path_obj.is_absolute():
226
+ file_path_obj = Path.cwd() / file_path_obj
227
+
228
+ # Check if file exists
229
+ if not file_path_obj.exists():
230
+ raise FileNotFoundError(f"File does not exist: {file_path_obj}")
231
+
232
+ # Normalize path (resolve Windows path case and separator issues)
233
+ file_path_resolved = file_path_obj.resolve()
234
+ release_path = get_release_path(file_path_obj.name)
235
+ release_path_resolved = release_path.resolve()
236
+
237
+ # If file is not in release directory, copy it there (using Path object comparison, cross-platform compatible)
238
+ if file_path_resolved != release_path_resolved:
239
+ import shutil
240
+ shutil.copy2(file_path_resolved, release_path_resolved)
241
+ logger.info(f"[code_deploy] Copied file from {file_path_resolved} to {release_path_resolved}")
242
+ file_path = str(release_path_resolved)
243
+ else:
244
+ logger.info(f"[code_deploy] File already in release directory: {file_path}")
245
+
246
+ # Log input parameters
247
+ logger.info(f"[code_deploy] Input parameters: name={name}, deploy_region_id={deploy_region_id}, "
248
+ f"application_group_name={application_group_name}, instance_ids={instance_ids}, "
249
+ f"region_id_oss={region_id_oss}, object_name={object_name}, "
250
+ f"is_internal_oss={is_internal_oss}, port={port}")
251
+
252
+ # Upload file to OSS
253
+ bucket_name = get_or_create_bucket_for_code_deploy(name, region_id_oss)
254
+ logger.info(f"[code_deploy] Auto selected/created bucket: {bucket_name}")
255
+
256
+ put_object_resp = oss_tools.OSS_PutObject(
257
+ BucketName=bucket_name,
258
+ ObjectKey=object_name,
259
+ FilePath=file_path,
260
+ RegionId=region_id_oss,
261
+ ContentType="application/octet-stream",
262
+ )
263
+ version_id = put_object_resp.get('version_id')
264
+ logger.info(f"[code_deploy] Put Object Response: {put_object_resp}")
265
+
266
+ client = create_client(region_id=APPLICATION_MANAGEMENT_REGION_ID)
267
+
268
+ if not _check_application_exists(client, name):
269
+ logger.info(f"[code_deploy] Application '{name}' does not exist, creating it...")
270
+ alarm_config = oos_20190601_models.CreateApplicationRequestAlarmConfig()
271
+ create_application_request = oos_20190601_models.CreateApplicationRequest(
272
+ region_id=APPLICATION_MANAGEMENT_REGION_ID,
273
+ name=name,
274
+ alarm_config=alarm_config
275
+ )
276
+ client.create_application(create_application_request)
277
+ logger.info(f"[code_deploy] Application '{name}' created successfully")
278
+ else:
279
+ logger.info(f"[code_deploy] Application '{name}' already exists, skipping creation")
280
+
281
+ if not _check_application_group_exists(client, name, application_group_name):
282
+ deploy_request = _handle_new_application_group(client, name, application_group_name,
283
+ deploy_region_id, region_id_oss, bucket_name,
284
+ object_name, version_id, is_internal_oss,
285
+ port, instance_ids, application_start,
286
+ application_stop, deploy_language)
287
+ else:
288
+ deploy_request = _handle_existing_application_group(name, application_group_name,
289
+ deploy_region_id, region_id_oss, bucket_name,
290
+ object_name, version_id, application_start,
291
+ application_stop, instance_ids)
292
+
293
+ response = client.deploy_application_group(deploy_request)
294
+ logger.info(f"[code_deploy] Response: {json.dumps(str(response), ensure_ascii=False)}")
295
+
296
+ # Save deployment info to .application.json
297
+ deploy_info = {
298
+ 'last_deployment': {
299
+ 'application_name': name,
300
+ 'application_group_name': application_group_name,
301
+ 'deploy_region_id': deploy_region_id,
302
+ 'port': port,
303
+ 'instance_ids': instance_ids,
304
+ 'deploy_time': time.strftime('%Y-%m-%d %H:%M:%S')
305
+ }
306
+ }
307
+ save_application_info(deploy_info)
308
+ service_link = f'https://ecs.console.aliyun.com/app/detail?tabKey=overview&appName={name}&groupName={application_group_name}'
309
+ security_group_link = f'https://ecs.console.aliyun.com/securityGroup?regionId={deploy_region_id}'
310
+
311
+ return {
312
+ 'response': response,
313
+ 'service_link': service_link,
314
+ 'security_group_link': security_group_link,
315
+ 'port': port,
316
+ 'deploy_region_id': deploy_region_id,
317
+ 'security_group_instructions': f'''
318
+ ## Deployment Successful!
319
+
320
+ **Service Link**: [View Deployment Details]({service_link})
321
+
322
+ ### Important: Configure Security Group Rules
323
+
324
+ After the application is deployed, you need to open port **{port}** for the ECS instance's security group, otherwise the application will not be accessible from outside.
325
+
326
+ **Security Group Management Link**: [{security_group_link}]({security_group_link})
327
+
328
+ **Configuration Steps**:
329
+ 1. Visit the security group management link above
330
+ 2. Find the security group to which your ECS instance belongs
331
+ 3. Click "Configure Rules" → "Add Security Group Rule"
332
+ 4. Configure inbound rule:
333
+ - **Port range**: {port}/{port}
334
+ - **Protocol type**: TCP
335
+ - **Authorized object**: 0.0.0.0/0 (allow all sources, or restrict access source as needed)
336
+ - **Description**: Application port {port}
337
+
338
+ After configuration, the application can be accessed via the ECS instance's public IP and port {port}.
339
+ '''
340
+ }
341
+
342
+
343
+ @_append_tool
344
+ def OOS_GetLastDeploymentInfo(
345
+ random_string: Optional[str] = Field(default=None, description='')
346
+ ):
347
+ """
348
+ 获取上次部署的应用信息
349
+ """
350
+ logger.info("[GetLastDeploymentInfo] Reading last deployment info")
351
+ info = load_application_info()
352
+ last_deployment = info.get('last_deployment', {})
353
+
354
+ if not last_deployment:
355
+ return {
356
+ 'message': 'No information found about the last deployment',
357
+ 'info': {}
358
+ }
359
+
360
+ logger.info(f"[GetLastDeploymentInfo] Found last deployment: {last_deployment}")
361
+ return {
362
+ 'message': 'Successfully retrieved last deployment information',
363
+ 'info': last_deployment
364
+ }
365
+
366
+
367
+ @_append_tool
368
+ def OOS_GetDeployStatus(
369
+ name: str = Field(description='name of the application'),
370
+ application_group_name: str = Field(description='name of the application group'),
371
+ ):
372
+ """
373
+ 查询应用分组的部署状态
374
+ """
375
+ logger.info(f"[GetDeployStatus] Input parameters: name={name}, application_group_name={application_group_name}")
376
+ client = create_client(region_id=APPLICATION_MANAGEMENT_REGION_ID)
377
+ response = _list_application_group_deployment(client, name, application_group_name, END_STATUSES)
378
+ logger.info(f"[GetDeployStatus] Response: {json.dumps(str(response), ensure_ascii=False)}")
379
+ return response
380
+
381
+
382
+ def _handle_new_application_group(client, name, application_group_name, deploy_region_id,
383
+ region_id_oss, bucket_name, object_name, version_id,
384
+ is_internal_oss, port, instance_ids, application_start,
385
+ application_stop, deploy_language):
386
+ logger.info(f"[code_deploy] Application group '{application_group_name}' does not exist, creating it...")
387
+ create_application_group_request = oos_20190601_models.CreateApplicationGroupRequest(
388
+ region_id=APPLICATION_MANAGEMENT_REGION_ID,
389
+ application_name=name,
390
+ deploy_region_id=deploy_region_id,
391
+ name=application_group_name
392
+ )
393
+ client.create_application_group(create_application_group_request)
394
+ logger.info(f"[code_deploy] Application group '{application_group_name}' created successfully")
395
+
396
+ # 确保所有实例都打上 tag(包括第一个实例)
397
+ _ensure_instances_tagged(deploy_region_id, name, application_group_name, instance_ids)
398
+
399
+ deploy_parameters = _create_deploy_parameters(
400
+ name, application_group_name, region_id_oss, bucket_name,
401
+ object_name, version_id, is_internal_oss, port, instance_ids,
402
+ application_start, application_stop, deploy_language
403
+ )
404
+
405
+ return oos_20190601_models.DeployApplicationGroupRequest(
406
+ region_id=APPLICATION_MANAGEMENT_REGION_ID,
407
+ application_name=name,
408
+ name=application_group_name,
409
+ deploy_parameters=json.dumps(deploy_parameters)
410
+ )
411
+
412
+
413
+ def _handle_existing_application_group(name, application_group_name, deploy_region_id, region_id_oss, bucket_name,
414
+ object_name, version_id, application_start, application_stop, instance_ids):
415
+ logger.info(f"[code_deploy] Application group '{application_group_name}' already exists, skipping creation")
416
+
417
+ # 确保所有实例都打上 tag(应用分组已存在的情况)
418
+ _ensure_instances_tagged(deploy_region_id, name, application_group_name, instance_ids)
419
+
420
+ location_hooks = _create_location_and_hooks(
421
+ region_id_oss, bucket_name, object_name, version_id,
422
+ deploy_region_id, application_start, application_stop
423
+ )
424
+
425
+ create_deploy_parameters = {
426
+ 'ApplicationName': name,
427
+ 'Description': '',
428
+ 'RevisionType': 'Oss',
429
+ 'Location': json.dumps(location_hooks["location"]),
430
+ 'Hooks': json.dumps(location_hooks["hooks"])
431
+ }
432
+
433
+ create_deploy_revision_response = _tools_api_call(
434
+ 'oos',
435
+ 'CreateDeployRevision',
436
+ create_deploy_parameters,
437
+ ctx=None
438
+ )
439
+ logger.info(f"[code_deploy] create_deploy_revision_response {create_deploy_revision_response}")
440
+ revision_id = str(create_deploy_revision_response.get('body', {}).get('Revision', {}).get('RevisionId'))
441
+
442
+ start_execution_parameters = json.dumps({
443
+ "Parameters": json.dumps({
444
+ "applicationName": name,
445
+ "applicationGroupName": application_group_name,
446
+ "deployRevisionId": revision_id,
447
+ "deployMethod": "all",
448
+ "batchNumber": 2,
449
+ "batchPauseOption": "Automatic"
450
+ }),
451
+ "Mode": "FailurePause"
452
+ })
453
+ deploy_parameters = json.dumps({
454
+ "StartExecutionParameters": start_execution_parameters
455
+ })
456
+ logger.info(f"[code_deploy] deploy_parameters {deploy_parameters}")
457
+ return oos_20190601_models.DeployApplicationGroupRequest(
458
+ region_id=APPLICATION_MANAGEMENT_REGION_ID,
459
+ application_name=name,
460
+ name=application_group_name,
461
+ deploy_parameters=deploy_parameters,
462
+ revision_id=revision_id
463
+ )
464
+
465
+
466
+ def _describe_instances_with_retry(deploy_region_id: str, describe_instances_request):
467
+ """
468
+ 带重试逻辑的 describe_instances 调用
469
+ 处理 UnretryableException 和 "Bad file descriptor" 错误,最多重试3次
470
+
471
+ Args:
472
+ deploy_region_id: 部署区域ID
473
+ describe_instances_request: DescribeInstancesRequest 对象
474
+
475
+ Returns:
476
+ describe_instances 的响应对象
477
+
478
+ Raises:
479
+ 如果所有重试都失败,抛出最后一次的异常
480
+ """
481
+ max_retries = 3
482
+ last_exception = None
483
+
484
+ for attempt in range(max_retries):
485
+ try:
486
+ ecs_client = create_ecs_client(region_id=deploy_region_id)
487
+ response = ecs_client.describe_instances(describe_instances_request)
488
+ return response
489
+ except Exception as e:
490
+ last_exception = e
491
+ error_msg = str(e)
492
+ error_type = type(e).__name__
493
+ # 检查是否是 UnretryableException 且包含 "Bad file descriptor"
494
+ is_unretryable = 'UnretryableException' in error_type or 'UnretryableException' in error_msg
495
+ has_bad_fd = 'Bad file descriptor' in error_msg or 'bad file descriptor' in error_msg.lower()
496
+
497
+ if is_unretryable and has_bad_fd and attempt < max_retries - 1:
498
+ wait_time = (attempt + 1) * 1 # 递增等待时间:1秒、2秒、3秒
499
+ logger.warning(f"[_describe_instances_with_retry] UnretryableException with Bad file descriptor (attempt {attempt + 1}/{max_retries}), retrying after {wait_time}s: {e}")
500
+ time.sleep(wait_time)
501
+ else:
502
+ # 如果不是可重试的错误,或者已经重试了3次,直接抛出异常
503
+ logger.error(f"[_describe_instances_with_retry] Error calling describe_instances: {e}")
504
+ raise
505
+
506
+ # 如果所有重试都失败了,抛出最后一次的异常
507
+ if last_exception:
508
+ logger.error(f"[_describe_instances_with_retry] All retries failed, raising last exception: {last_exception}")
509
+ raise last_exception
510
+
511
+
512
+ def _check_ecs_instances_exist(deploy_region_id: str, instance_ids: list) -> Tuple[bool, list]:
513
+ """
514
+ 检查 ECS 实例是否存在
515
+
516
+ Returns:
517
+ (all_exist, missing_instance_ids): 如果所有实例都存在返回 (True, []),否则返回 (False, [缺失的实例ID列表])
518
+ """
519
+ if not instance_ids:
520
+ return True, []
521
+
522
+ describe_instances_request = ecs_20140526_models.DescribeInstancesRequest(
523
+ region_id=deploy_region_id,
524
+ instance_ids=json.dumps(instance_ids)
525
+ )
526
+
527
+ response = _describe_instances_with_retry(deploy_region_id, describe_instances_request)
528
+
529
+ existing_instance_ids = set()
530
+ if response.body and response.body.instances:
531
+ for instance in response.body.instances.instance:
532
+ if instance.instance_id:
533
+ existing_instance_ids.add(instance.instance_id)
534
+
535
+ missing_instance_ids = [inst_id for inst_id in instance_ids if inst_id not in existing_instance_ids]
536
+
537
+ if missing_instance_ids:
538
+ logger.warning(f"[_check_ecs_instances_exist] Missing instances: {missing_instance_ids}")
539
+ return False, missing_instance_ids
540
+ else:
541
+ logger.info(f"[_check_ecs_instances_exist] All instances exist: {instance_ids}")
542
+ return True, []
543
+
544
+
545
+ def _check_instance_has_tag(deploy_region_id: str, instance_id: str, tag_key: str, tag_value: str) -> bool:
546
+ """
547
+ 检查 ECS 实例是否已经有指定的 tag
548
+
549
+ Returns:
550
+ bool: 如果实例已经有指定的 tag 返回 True,否则返回 False
551
+ """
552
+ describe_instances_request = ecs_20140526_models.DescribeInstancesRequest(
553
+ region_id=deploy_region_id,
554
+ instance_ids=json.dumps([instance_id])
555
+ )
556
+
557
+ try:
558
+ response = _describe_instances_with_retry(deploy_region_id, describe_instances_request)
559
+ if response.body and response.body.instances and response.body.instances.instance:
560
+ instance = response.body.instances.instance[0]
561
+ if instance.tags and instance.tags.tag:
562
+ for tag in instance.tags.tag:
563
+ if tag.tag_key == tag_key and tag.tag_value == tag_value:
564
+ logger.info(f"[_check_instance_has_tag] Instance {instance_id} already has tag {tag_key}={tag_value}")
565
+ return True
566
+ logger.info(f"[_check_instance_has_tag] Instance {instance_id} does not have tag {tag_key}={tag_value}")
567
+ return False
568
+ except Exception as e:
569
+ # 如果查询失败,假设没有 tag,继续打 tag
570
+ logger.warning(f"[_check_instance_has_tag] Error checking tag for instance {instance_id}: {e}")
571
+ return False
572
+
573
+
574
+ def _ensure_instances_tagged(deploy_region_id: str, name: str, application_group_name: str, instance_ids: list):
575
+ """
576
+ 确保所有 ECS 实例都打上了指定的 tag
577
+ 如果实例没有 tag,则为其打上 tag
578
+ """
579
+ if not instance_ids:
580
+ return
581
+
582
+ tag_key = f'app-{name}'
583
+ tag_value = application_group_name
584
+
585
+ # 找出需要打 tag 的实例
586
+ instances_to_tag = []
587
+ for instance_id in instance_ids:
588
+ if not _check_instance_has_tag(deploy_region_id, instance_id, tag_key, tag_value):
589
+ instances_to_tag.append(instance_id)
590
+
591
+ if not instances_to_tag:
592
+ logger.info(f"[_ensure_instances_tagged] All instances already have tag {tag_key}={tag_value}")
593
+ return
594
+
595
+ # 为需要打 tag 的实例打 tag
596
+ logger.info(f"[_ensure_instances_tagged] Tagging instances: {instances_to_tag}")
597
+ ecs_client = create_ecs_client(region_id=deploy_region_id)
598
+ tag_resources_request = ecs_20140526_models.TagResourcesRequest(
599
+ region_id=deploy_region_id,
600
+ resource_type='Instance',
601
+ resource_id=instances_to_tag,
602
+ tag=[ecs_20140526_models.TagResourcesRequestTag(
603
+ key=tag_key,
604
+ value=tag_value
605
+ )]
606
+ )
607
+ ecs_client.tag_resources(tag_resources_request)
608
+ logger.info(f"[_ensure_instances_tagged] Successfully tagged instances: {instances_to_tag}")
609
+
610
+
611
+ def _tag_multiple_instances(deploy_region_id, name, application_group_name, instance_ids):
612
+ """
613
+ 为多个实例打 tag(已废弃,使用 _ensure_instances_tagged 代替)
614
+ """
615
+ remaining_instance_ids = instance_ids[1:]
616
+ if remaining_instance_ids:
617
+ _ensure_instances_tagged(deploy_region_id, name, application_group_name, remaining_instance_ids)
618
+
619
+
620
+ def _list_application_group_deployment(client, name, application_group_name, status_list):
621
+ """
622
+ View application group deployment status
623
+ """
624
+
625
+ get_application_group_request = oos_20190601_models.GetApplicationGroupRequest(
626
+ region_id=APPLICATION_MANAGEMENT_REGION_ID,
627
+ application_name=name,
628
+ name=application_group_name
629
+ )
630
+ response = client.get_application_group(get_application_group_request)
631
+ status = response.body.application_group.status
632
+ if status in status_list:
633
+ return response.body
634
+
635
+
636
+ def _check_application_exists(client: oos20190601Client, name: str) -> bool:
637
+ try:
638
+ get_application_request = oos_20190601_models.GetApplicationRequest(
639
+ region_id=APPLICATION_MANAGEMENT_REGION_ID,
640
+ name=name
641
+ )
642
+ client.get_application(get_application_request)
643
+ return True
644
+ except Exception as e:
645
+ error_code = getattr(e, 'code', None)
646
+ if error_code == 'EntityNotExists.Application':
647
+ return False
648
+ logger.warning(f"[_check_application_exists] Error checking application {name}: {e}")
649
+ raise
650
+
651
+
652
+ def _check_application_group_exists(client: oos20190601Client, application_name: str, group_name: str) -> bool:
653
+ try:
654
+ get_application_group_request = oos_20190601_models.GetApplicationGroupRequest(
655
+ region_id=APPLICATION_MANAGEMENT_REGION_ID,
656
+ application_name=application_name,
657
+ name=group_name
658
+ )
659
+ client.get_application_group(get_application_group_request)
660
+ return True
661
+ except Exception as e:
662
+ error_code = getattr(e, 'code', None)
663
+ if error_code == 'EntityNotExists.ApplicationGroup':
664
+ return False
665
+ logger.warning(
666
+ f"[_check_application_group_exists] Error checking application group {application_name}/{group_name}: {e}")
667
+ raise
668
+
669
+
670
+ def _create_deploy_parameters(name, application_group_name, region_id_oss, bucket_name, object_name, version_id,
671
+ is_internal_oss, port, instance_ids, application_start, application_stop, deploy_language):
672
+ """
673
+ Create deployment parameters
674
+ """
675
+ PACKAGE_MAP = {
676
+ 'docker': 'ACS-Extension-DockerCE-1853370294850618',
677
+ 'java': 'ACS-Extension-java-1853370294850618',
678
+ 'python': 'ACS-Extension-python-1853370294850618',
679
+ 'nodejs': 'ACS-Extension-node-1853370294850618',
680
+ 'golang': 'ACS-Extension-golang-1853370294850618',
681
+ 'nginx': 'ACS-Extension-nginx-1853370294850618',
682
+ 'git': 'ACS-Extension-Git-1853370294850618',
683
+ }
684
+ package_name = PACKAGE_MAP.get(deploy_language, PACKAGE_MAP['docker'])
685
+
686
+ return {
687
+ "Parameters": {
688
+ "CreateEcsOption": "ExistECS" if instance_ids else "NewECS",
689
+ "InstanceId": instance_ids[0] if instance_ids else None,
690
+ "ApplicationName": name,
691
+ "Description": "",
692
+ "ZoneId": "cn-hangzhou-b",
693
+ "Port": port,
694
+ "RevisionType": "Oss",
695
+ "RegionIdOSS": region_id_oss,
696
+ "BucketName": bucket_name,
697
+ "ObjectName": object_name,
698
+ "VersionId": version_id,
699
+ "IsInternalOSS": is_internal_oss,
700
+ "ApplicationGroupName": application_group_name,
701
+ "WorkingDir": "/root",
702
+ "ApplicationStart": application_start,
703
+ "ApplicationStop": application_stop,
704
+ "PackageName": package_name
705
+ },
706
+ "TemplateName": "oss-revision",
707
+ "ServiceId": "service-af8acc2d6f4044f4b5ea"
708
+ }
709
+
710
+
711
+ def _create_location_and_hooks(region_id_oss, bucket_name, object_name, version_id, deploy_region_id,
712
+ application_start, application_stop):
713
+ """
714
+ Create location and hook configuration
715
+ """
716
+ return {
717
+ "location": {
718
+ "regionId": region_id_oss,
719
+ "bucketName": bucket_name,
720
+ "objectName": object_name,
721
+ "versionId": version_id,
722
+ "isInternal": "true" if region_id_oss == deploy_region_id else "false"
723
+ },
724
+ "hooks": {
725
+ "workingDir": "/root",
726
+ "applicationStart": application_start,
727
+ "applicationStop": application_stop
728
+ }
729
+ }
730
+
731
+
732
+ def _create_revision_deploy_parameters():
733
+ """
734
+ Create revised deployment parameters
735
+ """
736
+ return {
737
+ "StartExecutionParameters": {
738
+ "Parameters": {
739
+ "applicationName": "",
740
+ "applicationGroupName": "",
741
+ "deployRevisionId": "",
742
+ "deployMethod": "all",
743
+ "batchNumber": 2,
744
+ "batchPauseOption": "Automatic"
745
+ },
746
+ "Mode": "FailurePause"
747
+ }
748
+ }