@comate/zulu 1.3.5 → 1.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/comate-engine/server.js +21 -21
- package/dist/bundle/index.js +2 -2
- package/package.json +1 -1
- package/comate-engine/assets/skills/auto-commit/SKILL.md +0 -436
- package/comate-engine/assets/skills/auto-commit/references/issue_type_mapping.json +0 -19
- package/comate-engine/assets/skills/auto-commit/references/new_version_instruction.md +0 -196
- package/comate-engine/assets/skills/auto-commit/references/old_version_instruction.md +0 -189
- package/comate-engine/assets/skills/auto-commit/references/query_reference.md +0 -176
- package/comate-engine/assets/skills/auto-commit/scripts/compat.py +0 -86
- package/comate-engine/assets/skills/auto-commit/scripts/create_card_cli.py +0 -67
- package/comate-engine/assets/skills/auto-commit/scripts/git_diff_cli.py +0 -196
- package/comate-engine/assets/skills/auto-commit/scripts/git_utils.py +0 -230
- package/comate-engine/assets/skills/auto-commit/scripts/icafe/__init__.py +0 -66
- package/comate-engine/assets/skills/auto-commit/scripts/icafe/client.py +0 -473
- package/comate-engine/assets/skills/auto-commit/scripts/icafe/farseer.py +0 -52
- package/comate-engine/assets/skills/auto-commit/scripts/icafe/matching.py +0 -781
- package/comate-engine/assets/skills/auto-commit/scripts/logger.py +0 -32
- package/comate-engine/assets/skills/auto-commit/scripts/match_card_cli.py +0 -37
- package/comate-engine/assets/skills/auto-commit/scripts/recognize_card_cli.py +0 -63
- package/comate-engine/assets/skills/auto-commit-comate/references/new_version_instruction.md +0 -209
- package/comate-engine/assets/skills/auto-commit-comate/references/old_version_instruction.md +0 -208
- package/comate-engine/assets/skills/auto-commit-comate/scripts/compat.py +0 -86
- package/comate-engine/assets/skills/build-web-page-comate/SKILL.md +0 -160
- package/comate-engine/assets/skills/build-web-page-comate/setup-html-scaffold.md +0 -49
- package/comate-engine/assets/skills/build-web-page-comate/setup-react-scaffold.md +0 -103
- package/comate-engine/assets/skills/build-web-page-comate/work-with-user-intent.md +0 -112
- package/comate-engine/assets/skills/code-security/SKILL.md +0 -176
- package/comate-engine/assets/skills/code-security/references/credential_hosting.md +0 -102
- package/comate-engine/assets/skills/code-security/references/vul_repair_sensitive.md +0 -219
- package/comate-engine/assets/skills/code-security/scripts/build_repair_info.py +0 -0
- package/comate-engine/assets/skills/code-security/scripts/credential_hosting.py +0 -99
- package/comate-engine/assets/skills/code-security/scripts/credential_poll.py +0 -350
- package/comate-engine/assets/skills/code-security/scripts/http_client.py +0 -173
- package/comate-engine/assets/skills/code-security/scripts/parse_scan_result.py +0 -301
- package/comate-engine/assets/skills/code-security/scripts/repair_vulnerability.py +0 -261
- package/comate-engine/assets/skills/code-security/scripts/report_chat.py +0 -198
- package/comate-engine/assets/skills/code-security/scripts/scan_vulnerability.py +0 -316
- package/comate-engine/assets/skills/comate-docs-comate/references/query_content.md +0 -83
- package/comate-engine/assets/skills/comate-docs-comate/references/query_repo.md +0 -57
- package/comate-engine/assets/skills/comate-docs-comate/scripts/ku_operator.py +0 -1575
- package/comate-engine/assets/skills/create-skill-comate/references/output-patterns.md +0 -82
- package/comate-engine/assets/skills/create-skill-comate/references/workflows.md +0 -28
- package/comate-engine/assets/skills/create-skill-comate/scripts/init_skill.py +0 -308
- package/comate-engine/node_modules/@comate/plugin-host/dist/index-B8VdZIx4.js +0 -1
- package/comate-engine/node_modules/@comate/plugin-host/dist/index-QEN4ay0E.js +0 -1
- package/comate-engine/node_modules/@comate/plugin-host/dist/user-DAIE9qbz.js +0 -44
- package/comate-engine/node_modules/@comate/plugin-host/dist/user-vP8ulngb.js +0 -44
|
@@ -1,198 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
对话结果上报工具 - 扫描完成后向服务端上报本次对话信息。
|
|
4
|
-
|
|
5
|
-
用法:
|
|
6
|
-
python3 report_chat.py --username <用户名> --chat-id <对话ID> --scan-result <扫描结果JSON文件> [--status <状态码>] [--err-message <错误信息>] [--git-url <仓库URL>] [--git-branch <分支>] [--ide <IDE名称>] [--query <用户输入>]
|
|
7
|
-
|
|
8
|
-
接口: POST /api/v1/chats
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
import argparse
|
|
12
|
-
import hashlib
|
|
13
|
-
import json
|
|
14
|
-
import logging
|
|
15
|
-
import os
|
|
16
|
-
import subprocess
|
|
17
|
-
import sys
|
|
18
|
-
import uuid
|
|
19
|
-
|
|
20
|
-
import http_client # noqa: F401 (triggers shared logging config)
|
|
21
|
-
|
|
22
|
-
HOST = "https://comate-sec.baidu-int.com"
|
|
23
|
-
|
|
24
|
-
USERNAME = ""
|
|
25
|
-
USER_ID = ""
|
|
26
|
-
|
|
27
|
-
logger = logging.getLogger("report")
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def build_headers(chat_id):
|
|
31
|
-
# type: (str) -> dict
|
|
32
|
-
return {
|
|
33
|
-
"Comate-Username": USERNAME,
|
|
34
|
-
"Comate-User-Id": USER_ID,
|
|
35
|
-
"SAST-Request-ID": str(uuid.uuid4()),
|
|
36
|
-
"SAST-Chat-ID": chat_id,
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def calc_file_sha256(file_path):
|
|
41
|
-
# type: (str) -> str
|
|
42
|
-
"""计算文件 SHA256。"""
|
|
43
|
-
sha256_hash = hashlib.sha256()
|
|
44
|
-
try:
|
|
45
|
-
with open(file_path, "rb") as f:
|
|
46
|
-
for chunk in iter(lambda: f.read(4096), b""):
|
|
47
|
-
sha256_hash.update(chunk)
|
|
48
|
-
return sha256_hash.hexdigest()
|
|
49
|
-
except Exception:
|
|
50
|
-
return ""
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def build_vuls_from_scan_result(scan_result, root_path=""):
|
|
54
|
-
# type: (dict, str) -> list
|
|
55
|
-
"""从扫描结果中提取漏洞信息,构造 vuls 数组。"""
|
|
56
|
-
vuls = []
|
|
57
|
-
data = scan_result.get("data", {})
|
|
58
|
-
# 兼容两种结构: data.sarif.runs 或 data.runs
|
|
59
|
-
runs = data.get("sarif", {}).get("runs", []) or data.get("runs", [])
|
|
60
|
-
if not runs:
|
|
61
|
-
return vuls
|
|
62
|
-
|
|
63
|
-
# 按文件聚合漏洞
|
|
64
|
-
file_vuls = {} # type: dict
|
|
65
|
-
for run in runs:
|
|
66
|
-
# 兼容 "results"(标准SARIF) 和 "result"(服务端实际返回)
|
|
67
|
-
results = run.get("results", []) or run.get("result", [])
|
|
68
|
-
for result in results:
|
|
69
|
-
locations = result.get("locations", [])
|
|
70
|
-
vul_hash = result.get("properties", {}).get("hash", "")
|
|
71
|
-
if not vul_hash:
|
|
72
|
-
continue
|
|
73
|
-
for loc in locations:
|
|
74
|
-
file_path = loc.get("physicalLocation", {}).get("artifactLocation", {}).get("uri", "")
|
|
75
|
-
if not file_path:
|
|
76
|
-
continue
|
|
77
|
-
if file_path not in file_vuls:
|
|
78
|
-
file_vuls[file_path] = {
|
|
79
|
-
"fileName": file_path,
|
|
80
|
-
"fileHash": "",
|
|
81
|
-
"vuls": [],
|
|
82
|
-
}
|
|
83
|
-
file_vuls[file_path]["vuls"].append({"vulHash": vul_hash})
|
|
84
|
-
|
|
85
|
-
# 填充 fileHash
|
|
86
|
-
bundle_files = scan_result.get("data", {}).get("bundleFiles", {})
|
|
87
|
-
for file_path, file_info in file_vuls.items():
|
|
88
|
-
fh = bundle_files.get(file_path, "")
|
|
89
|
-
if not fh and root_path:
|
|
90
|
-
fh = calc_file_sha256(os.path.join(root_path, file_path))
|
|
91
|
-
file_info["fileHash"] = fh
|
|
92
|
-
|
|
93
|
-
vuls = list(file_vuls.values())
|
|
94
|
-
return vuls
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def get_git_info(root_path):
|
|
98
|
-
# type: (str) -> dict
|
|
99
|
-
"""从项目目录获取 git URL 和分支。"""
|
|
100
|
-
info = {"url": "", "branch": ""}
|
|
101
|
-
if not root_path:
|
|
102
|
-
return info
|
|
103
|
-
try:
|
|
104
|
-
info["url"] = subprocess.check_output(
|
|
105
|
-
["git", "remote", "get-url", "origin"], cwd=root_path, stderr=subprocess.DEVNULL
|
|
106
|
-
).decode("utf-8").strip()
|
|
107
|
-
except Exception:
|
|
108
|
-
pass
|
|
109
|
-
try:
|
|
110
|
-
info["branch"] = subprocess.check_output(
|
|
111
|
-
["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd=root_path, stderr=subprocess.DEVNULL
|
|
112
|
-
).decode("utf-8").strip()
|
|
113
|
-
except Exception:
|
|
114
|
-
pass
|
|
115
|
-
return info
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def report_chat(chat_id, scan_result, status, err_message, git_url, git_branch, ide, query, root_path=""):
|
|
119
|
-
# type: (str, dict, int, str, str, str, str, str, str) -> dict
|
|
120
|
-
"""上报对话结果到服务端。"""
|
|
121
|
-
# git_url/git_branch 为空时,自动从 root_path 获取
|
|
122
|
-
if root_path and (not git_url or not git_branch):
|
|
123
|
-
git_info = get_git_info(root_path)
|
|
124
|
-
if not git_url:
|
|
125
|
-
git_url = git_info["url"]
|
|
126
|
-
if not git_branch:
|
|
127
|
-
git_branch = git_info["branch"]
|
|
128
|
-
|
|
129
|
-
vuls = build_vuls_from_scan_result(scan_result, root_path)
|
|
130
|
-
|
|
131
|
-
body = {
|
|
132
|
-
"type": 1,
|
|
133
|
-
"gitInfo": {
|
|
134
|
-
"url": git_url,
|
|
135
|
-
"branch": git_branch,
|
|
136
|
-
},
|
|
137
|
-
"ide": ide,
|
|
138
|
-
"query": query,
|
|
139
|
-
"status": status,
|
|
140
|
-
"errMessage": err_message,
|
|
141
|
-
"vuls": vuls,
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
headers = build_headers(chat_id)
|
|
145
|
-
logger.info("report_chat: chat_id=%s, vuls_count=%d, git_url=%s, git_branch=%s",
|
|
146
|
-
chat_id, len(vuls), git_url, git_branch)
|
|
147
|
-
return http_client.post("{}/api/v1/chats".format(HOST), headers=headers, json_body=body)
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
def main():
|
|
151
|
-
global USERNAME, USER_ID
|
|
152
|
-
parser = argparse.ArgumentParser(description="对话结果上报工具")
|
|
153
|
-
parser.add_argument("--username", required=True, help="Comate 用户名")
|
|
154
|
-
parser.add_argument("--chat-id", required=True, help="对话唯一标识 (COMATE_SESSION_ID)")
|
|
155
|
-
parser.add_argument("--scan-result", required=True, help="扫描结果 JSON 文件路径")
|
|
156
|
-
parser.add_argument("--status", type=int, default=0, help="执行状态码,0=成功,1=失败 (默认 0)")
|
|
157
|
-
parser.add_argument("--err-message", default="", help="错误信息 (默认空)")
|
|
158
|
-
parser.add_argument("--root-path", default="", help="项目根目录,用于计算文件哈希")
|
|
159
|
-
parser.add_argument("--git-url", default="", help="Git 仓库 URL")
|
|
160
|
-
parser.add_argument("--git-branch", default="", help="Git 分支")
|
|
161
|
-
parser.add_argument("--ide", default="", help="IDE 名称")
|
|
162
|
-
parser.add_argument("--query", default="", help="用户输入的查询文本")
|
|
163
|
-
args = parser.parse_args()
|
|
164
|
-
|
|
165
|
-
USERNAME = args.username
|
|
166
|
-
USER_ID = hashlib.md5(USERNAME.encode("utf-8")).hexdigest()[:12]
|
|
167
|
-
|
|
168
|
-
logger.info("report_chat start: chat_id=%s, username=%s, scan_result=%s",
|
|
169
|
-
args.chat_id, USERNAME, args.scan_result)
|
|
170
|
-
|
|
171
|
-
# 读取扫描结果
|
|
172
|
-
scan_result = {}
|
|
173
|
-
try:
|
|
174
|
-
with open(args.scan_result, "r", encoding="utf-8") as f:
|
|
175
|
-
scan_result = json.load(f)
|
|
176
|
-
except Exception as e:
|
|
177
|
-
print("警告: 读取扫描结果失败 {}: {}".format(args.scan_result, e), file=sys.stderr)
|
|
178
|
-
|
|
179
|
-
try:
|
|
180
|
-
result = report_chat(
|
|
181
|
-
chat_id=args.chat_id,
|
|
182
|
-
scan_result=scan_result,
|
|
183
|
-
status=args.status,
|
|
184
|
-
err_message=args.err_message,
|
|
185
|
-
git_url=args.git_url,
|
|
186
|
-
git_branch=args.git_branch,
|
|
187
|
-
ide=args.ide,
|
|
188
|
-
query=args.query,
|
|
189
|
-
root_path=args.root_path,
|
|
190
|
-
)
|
|
191
|
-
print(json.dumps(result, ensure_ascii=False))
|
|
192
|
-
except Exception as e:
|
|
193
|
-
print("上报失败: {}".format(e), file=sys.stderr)
|
|
194
|
-
sys.exit(1)
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
if __name__ == "__main__":
|
|
198
|
-
main()
|
|
@@ -1,316 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
漏洞扫描工具 - 将项目代码上传至 Comate 安全服务端进行 SAST 扫描。
|
|
4
|
-
|
|
5
|
-
用法:
|
|
6
|
-
python3 scan_vulnerability.py --root-path <项目目录> --username <用户名>
|
|
7
|
-
|
|
8
|
-
流程:
|
|
9
|
-
1. 获取扫描配置(支持的文件类型)
|
|
10
|
-
2. 遍历项目目录,收集文件哈希
|
|
11
|
-
3. 创建 bundle 并上传缺失文件
|
|
12
|
-
4. 发起扫描并轮询结果
|
|
13
|
-
5. 输出 SARIF 格式漏洞报告
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
import argparse
|
|
17
|
-
import base64
|
|
18
|
-
import hashlib
|
|
19
|
-
import json
|
|
20
|
-
import logging
|
|
21
|
-
import os
|
|
22
|
-
import re
|
|
23
|
-
import sys
|
|
24
|
-
import time
|
|
25
|
-
import uuid
|
|
26
|
-
from typing import Any, Dict, List, Optional, Tuple
|
|
27
|
-
|
|
28
|
-
import http_client # noqa: F401 (triggers shared logging config)
|
|
29
|
-
|
|
30
|
-
HOST = "https://comate-sec.baidu-int.com"
|
|
31
|
-
|
|
32
|
-
USERNAME = ""
|
|
33
|
-
USER_ID = ""
|
|
34
|
-
|
|
35
|
-
logger = logging.getLogger("scan")
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def build_headers(chat_id=""):
|
|
39
|
-
# type: (str) -> Dict[str, str]
|
|
40
|
-
headers = {
|
|
41
|
-
"Comate-Username": USERNAME,
|
|
42
|
-
"Comate-User-Id": USER_ID,
|
|
43
|
-
"SAST-Request-ID": str(uuid.uuid4()),
|
|
44
|
-
}
|
|
45
|
-
if chat_id:
|
|
46
|
-
headers["SAST-Chat-ID"] = chat_id
|
|
47
|
-
return headers
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def calc_sha256(file_path):
|
|
51
|
-
# type: (str) -> str
|
|
52
|
-
sha256_hash = hashlib.sha256()
|
|
53
|
-
with open(file_path, "rb") as f:
|
|
54
|
-
for chunk in iter(lambda: f.read(4096), b""):
|
|
55
|
-
sha256_hash.update(chunk)
|
|
56
|
-
return sha256_hash.hexdigest()
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def parse_gitignore(gitignore_path):
|
|
60
|
-
# type: (str) -> List[Any]
|
|
61
|
-
"""解析 .gitignore 文件,返回正则表达式列表。"""
|
|
62
|
-
patterns = []
|
|
63
|
-
try:
|
|
64
|
-
with open(gitignore_path, "r", encoding="utf-8") as f:
|
|
65
|
-
for line in f:
|
|
66
|
-
line = line.strip()
|
|
67
|
-
if not line or line.startswith("#"):
|
|
68
|
-
continue
|
|
69
|
-
regex = line
|
|
70
|
-
regex = regex.replace(".", r"\.")
|
|
71
|
-
regex = regex.replace("*", ".*")
|
|
72
|
-
regex = regex.replace("?", ".")
|
|
73
|
-
if line.startswith("/"):
|
|
74
|
-
regex = "^" + regex[1:]
|
|
75
|
-
if line.endswith("/"):
|
|
76
|
-
regex = regex[:-1] + r"($|/.*)"
|
|
77
|
-
patterns.append(re.compile(regex))
|
|
78
|
-
except FileNotFoundError:
|
|
79
|
-
pass
|
|
80
|
-
return patterns
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
def is_ignored(path, gitignore_patterns, root_dir):
|
|
84
|
-
# type: (str, List[Any], str) -> bool
|
|
85
|
-
rel_path = os.path.relpath(path, root_dir)
|
|
86
|
-
for pattern in gitignore_patterns:
|
|
87
|
-
if pattern.search(rel_path) or pattern.search(path):
|
|
88
|
-
return True
|
|
89
|
-
return False
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
def walk_dir(directory, extensions=None, gitignore_patterns=None):
|
|
93
|
-
# type: (str, Optional[List[str]], Optional[List[Any]]) -> List[str]
|
|
94
|
-
"""递归遍历目录,按扩展名过滤并排除 .gitignore 中的文件。"""
|
|
95
|
-
files = []
|
|
96
|
-
resolved_dir = os.path.realpath(directory)
|
|
97
|
-
for root, dirs, filenames in os.walk(resolved_dir):
|
|
98
|
-
if gitignore_patterns:
|
|
99
|
-
dirs[:] = [
|
|
100
|
-
d for d in dirs
|
|
101
|
-
if not is_ignored(os.path.join(root, d), gitignore_patterns, resolved_dir)
|
|
102
|
-
]
|
|
103
|
-
for name in filenames:
|
|
104
|
-
if name.lower().endswith(".json"):
|
|
105
|
-
continue
|
|
106
|
-
full_path = os.path.join(root, name)
|
|
107
|
-
if gitignore_patterns and is_ignored(full_path, gitignore_patterns, resolved_dir):
|
|
108
|
-
continue
|
|
109
|
-
if extensions:
|
|
110
|
-
if not any(name.lower().endswith(ext.lower()) for ext in extensions):
|
|
111
|
-
continue
|
|
112
|
-
files.append(full_path)
|
|
113
|
-
return files
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
def read_file_content(file_path):
|
|
117
|
-
# type: (str) -> str
|
|
118
|
-
"""读取文件内容,二进制文件用 base64 编码。"""
|
|
119
|
-
try:
|
|
120
|
-
with open(file_path, "r", encoding="utf-8") as f:
|
|
121
|
-
return f.read()
|
|
122
|
-
except UnicodeDecodeError:
|
|
123
|
-
with open(file_path, "rb") as f:
|
|
124
|
-
return base64.b64encode(f.read()).decode("ascii")
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
def get_settings():
|
|
128
|
-
# type: () -> Dict
|
|
129
|
-
"""获取扫描配置。"""
|
|
130
|
-
return http_client.get("{}/api/v2/analysis/settings".format(HOST), headers=build_headers())
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
def create_bundle(file_hashes):
|
|
134
|
-
# type: (Dict[str, str]) -> Tuple[str, List[str]]
|
|
135
|
-
"""创建扫描 bundle,返回 (bundle_hash, missing_files)。"""
|
|
136
|
-
data = http_client.post("{}/api/v1/bundle".format(HOST), headers=build_headers(), json_body=file_hashes)
|
|
137
|
-
|
|
138
|
-
if isinstance(data.get("data"), list):
|
|
139
|
-
return data.get("bundleHash", ""), data["data"]
|
|
140
|
-
return (
|
|
141
|
-
data.get("data", {}).get("bundleHash", ""),
|
|
142
|
-
data.get("data", {}).get("missingFiles", []),
|
|
143
|
-
)
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
def upload_files(bundle_hash, root_path, missing_files, file_hashes, upload_type="scan"):
|
|
147
|
-
# type: (str, str, List[str], Dict[str, str], str) -> Tuple[str, List[str]]
|
|
148
|
-
"""上传缺失文件,返回 (bundle_hash, remaining_missing_files)。"""
|
|
149
|
-
payload = {"files": {}}
|
|
150
|
-
for name in missing_files:
|
|
151
|
-
file_path = os.path.join(root_path, name)
|
|
152
|
-
try:
|
|
153
|
-
content = read_file_content(file_path)
|
|
154
|
-
payload["files"][name] = {"hash": file_hashes.get(name, ""), "content": content}
|
|
155
|
-
except Exception as e:
|
|
156
|
-
print("警告: 读取文件失败 {}: {}".format(file_path, e), file=sys.stderr)
|
|
157
|
-
|
|
158
|
-
if upload_type == "scan":
|
|
159
|
-
url = "{}/api/v1/bundle/{}".format(HOST, bundle_hash)
|
|
160
|
-
else:
|
|
161
|
-
url = "{}/api/v1/upload".format(HOST)
|
|
162
|
-
|
|
163
|
-
data = http_client.put(url, headers=build_headers(), json_body=payload)
|
|
164
|
-
|
|
165
|
-
if isinstance(data.get("data"), list):
|
|
166
|
-
return data.get("bundleHash", bundle_hash), data["data"]
|
|
167
|
-
return (
|
|
168
|
-
data.get("data", {}).get("bundleHash", bundle_hash),
|
|
169
|
-
data.get("data", {}).get("missingFiles", []),
|
|
170
|
-
)
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
def scan_init(root_path):
|
|
174
|
-
# type: (str) -> Tuple[str, Dict[str, str]]
|
|
175
|
-
"""初始化扫描:获取配置、收集文件、创建 bundle、上传文件。返回 (bundle_hash, file_hashes)。"""
|
|
176
|
-
# 获取配置
|
|
177
|
-
cfg = get_settings()
|
|
178
|
-
scan_config = cfg.get("data", {}).get("scanConfiguration", {})
|
|
179
|
-
extensions = list(
|
|
180
|
-
set(scan_config.get("sca", {}).get("supportedLanguages", []))
|
|
181
|
-
| set(scan_config.get("sast", {}).get("supportedLanguages", []))
|
|
182
|
-
)
|
|
183
|
-
|
|
184
|
-
# 解析 .gitignore
|
|
185
|
-
gitignore_path = os.path.join(root_path, ".gitignore")
|
|
186
|
-
gitignore_patterns = parse_gitignore(gitignore_path)
|
|
187
|
-
|
|
188
|
-
# 遍历目录,收集文件哈希
|
|
189
|
-
all_files = walk_dir(root_path, extensions, gitignore_patterns)
|
|
190
|
-
file_hashes = {} # type: Dict[str, str]
|
|
191
|
-
for f in all_files:
|
|
192
|
-
rel = os.path.relpath(f, root_path)
|
|
193
|
-
try:
|
|
194
|
-
file_hashes[rel] = calc_sha256(f)
|
|
195
|
-
except Exception as e:
|
|
196
|
-
print("警告: 计算哈希失败 {}: {}".format(f, e), file=sys.stderr)
|
|
197
|
-
|
|
198
|
-
print("收集文件: {} 个".format(len(file_hashes)), file=sys.stderr)
|
|
199
|
-
|
|
200
|
-
# 创建 bundle
|
|
201
|
-
bundle_hash, missing_files = create_bundle(file_hashes)
|
|
202
|
-
print("Bundle: {}, 待上传: {} 个文件".format(bundle_hash, len(missing_files)), file=sys.stderr)
|
|
203
|
-
|
|
204
|
-
# 循环上传缺失文件
|
|
205
|
-
while missing_files:
|
|
206
|
-
bundle_hash, missing_files = upload_files(
|
|
207
|
-
bundle_hash, root_path, missing_files, file_hashes, "scan"
|
|
208
|
-
)
|
|
209
|
-
if missing_files:
|
|
210
|
-
print("剩余待上传: {} 个文件".format(len(missing_files)), file=sys.stderr)
|
|
211
|
-
|
|
212
|
-
return bundle_hash, file_hashes
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
def get_git_info(root_path):
|
|
216
|
-
# type: (str) -> Dict[str, str]
|
|
217
|
-
"""从项目目录获取 git 信息(URL、分支、commitID)。"""
|
|
218
|
-
import subprocess
|
|
219
|
-
info = {"gitURL": "", "gitBranch": "", "gitCommitID": ""}
|
|
220
|
-
try:
|
|
221
|
-
info["gitURL"] = subprocess.check_output(
|
|
222
|
-
["git", "remote", "get-url", "origin"], cwd=root_path, stderr=subprocess.DEVNULL
|
|
223
|
-
).decode("utf-8").strip()
|
|
224
|
-
except Exception:
|
|
225
|
-
pass
|
|
226
|
-
try:
|
|
227
|
-
info["gitBranch"] = subprocess.check_output(
|
|
228
|
-
["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd=root_path, stderr=subprocess.DEVNULL
|
|
229
|
-
).decode("utf-8").strip()
|
|
230
|
-
except Exception:
|
|
231
|
-
pass
|
|
232
|
-
try:
|
|
233
|
-
info["gitCommitID"] = subprocess.check_output(
|
|
234
|
-
["git", "rev-parse", "HEAD"], cwd=root_path, stderr=subprocess.DEVNULL
|
|
235
|
-
).decode("utf-8").strip()
|
|
236
|
-
except Exception:
|
|
237
|
-
pass
|
|
238
|
-
return info
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
def scan_vulnerability(root_path, chat_id=""):
|
|
242
|
-
# type: (str, str) -> Tuple[Dict, str]
|
|
243
|
-
"""执行漏洞扫描,返回 (SARIF 格式结果, bundleHash)。"""
|
|
244
|
-
bundle_hash, file_hashes = scan_init(root_path)
|
|
245
|
-
|
|
246
|
-
git_info = get_git_info(root_path)
|
|
247
|
-
scan_info = {
|
|
248
|
-
"key": {"hash": bundle_hash, "type": "file"},
|
|
249
|
-
"scan": 3,
|
|
250
|
-
"analysisContext": {
|
|
251
|
-
"initiator": "",
|
|
252
|
-
"trigger": "manual",
|
|
253
|
-
"workspaceName": "",
|
|
254
|
-
"workspacePath": "",
|
|
255
|
-
"gitInfo": [{
|
|
256
|
-
"path": "",
|
|
257
|
-
"gitURL": git_info["gitURL"],
|
|
258
|
-
"gitBranch": git_info["gitBranch"],
|
|
259
|
-
"gitCommitID": git_info["gitCommitID"],
|
|
260
|
-
}],
|
|
261
|
-
},
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
print("开始扫描...", file=sys.stderr)
|
|
265
|
-
logger.info("scan start: chat_id=%s, bundle_hash=%s", chat_id, bundle_hash)
|
|
266
|
-
while True:
|
|
267
|
-
result = http_client.post(
|
|
268
|
-
"{}/api/v2/analysis".format(HOST), headers=build_headers(chat_id), json_body=scan_info
|
|
269
|
-
)
|
|
270
|
-
if result.get("status") != 1:
|
|
271
|
-
return result, bundle_hash
|
|
272
|
-
print("扫描中,等待结果...", file=sys.stderr)
|
|
273
|
-
time.sleep(3)
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
def main():
|
|
277
|
-
global USERNAME, USER_ID
|
|
278
|
-
parser = argparse.ArgumentParser(description="代码安全漏洞扫描工具")
|
|
279
|
-
parser.add_argument("--root-path", required=True, help="待扫描项目根目录")
|
|
280
|
-
parser.add_argument("--username", required=True, help="Comate 用户名")
|
|
281
|
-
parser.add_argument("--chat-id", default="", help="对话唯一标识 (COMATE_SESSION_ID)")
|
|
282
|
-
parser.add_argument("--output-dir", default=None, help="结果输出目录,默认为 skill 临时目录")
|
|
283
|
-
args = parser.parse_args()
|
|
284
|
-
|
|
285
|
-
USERNAME = args.username
|
|
286
|
-
USER_ID = hashlib.md5(USERNAME.encode("utf-8")).hexdigest()[:12]
|
|
287
|
-
|
|
288
|
-
logger.info("scan_vulnerability start: chat_id=%s, username=%s, root_path=%s",
|
|
289
|
-
args.chat_id, USERNAME, args.root_path)
|
|
290
|
-
|
|
291
|
-
root_path = os.path.realpath(args.root_path)
|
|
292
|
-
if not os.path.isdir(root_path):
|
|
293
|
-
print("错误: 目录不存在 {}".format(root_path), file=sys.stderr)
|
|
294
|
-
sys.exit(1)
|
|
295
|
-
|
|
296
|
-
# 默认输出到 skill 临时目录,按项目路径隔离子目录避免并发冲突
|
|
297
|
-
skill_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
298
|
-
project_name = os.path.basename(root_path)
|
|
299
|
-
path_hash = hashlib.md5(root_path.encode("utf-8")).hexdigest()[:8]
|
|
300
|
-
default_output = os.path.join(skill_dir, ".tmp", "{}_{}".format(project_name, path_hash))
|
|
301
|
-
output_dir = os.path.realpath(args.output_dir) if args.output_dir else default_output
|
|
302
|
-
os.makedirs(output_dir, exist_ok=True)
|
|
303
|
-
|
|
304
|
-
result, bundle_hash = scan_vulnerability(root_path, chat_id=args.chat_id)
|
|
305
|
-
|
|
306
|
-
# 将 bundleHash 写入结果,供后续解析和修复使用
|
|
307
|
-
result["bundleHash"] = bundle_hash
|
|
308
|
-
|
|
309
|
-
output_file = os.path.join(output_dir, "scan_result.json")
|
|
310
|
-
with open(output_file, "w", encoding="utf-8") as f:
|
|
311
|
-
json.dump(result, f, ensure_ascii=False, indent=2)
|
|
312
|
-
print(output_file)
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
if __name__ == "__main__":
|
|
316
|
-
main()
|
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
# query-content — 查询文档内容
|
|
2
|
-
|
|
3
|
-
查询知识库文档内容,自动转换为 Markdown 格式输出,并可自动下载文档中引用的图片和附件。
|
|
4
|
-
|
|
5
|
-
## CLI 用法
|
|
6
|
-
|
|
7
|
-
```bash
|
|
8
|
-
python3 scripts/ku_operator.py query-content --doc-id <文档ID>
|
|
9
|
-
python3 scripts/ku_operator.py query-content --url <文档URL>
|
|
10
|
-
|
|
11
|
-
# 禁用自动下载资源
|
|
12
|
-
python3 scripts/ku_operator.py query-content --doc-id <文档ID> --no-download-assets
|
|
13
|
-
|
|
14
|
-
# 指定资源保存目录
|
|
15
|
-
python3 scripts/ku_operator.py query-content --doc-id <文档ID> --save-assets-dir ./my_assets
|
|
16
|
-
|
|
17
|
-
# 输出原始 JSON(含 blockId,用于 edit-content 操作)
|
|
18
|
-
python3 scripts/ku_operator.py query-content --doc-id <文档ID> --raw-json
|
|
19
|
-
```
|
|
20
|
-
|
|
21
|
-
## 参数表
|
|
22
|
-
|
|
23
|
-
| 参数 | 必填 | 说明 |
|
|
24
|
-
|------|------|------|
|
|
25
|
-
| `--doc-id` | 与 `--url` 二选一 | 文档 ID |
|
|
26
|
-
| `--url` | 与 `--doc-id` 二选一 | 文档 URL |
|
|
27
|
-
| `--save-assets-dir` | 否 | 图片/附件保存目录,默认 `./ku_assets` |
|
|
28
|
-
| `--no-download-assets` | 否 | 禁用自动下载图片/附件(默认会自动下载) |
|
|
29
|
-
| `--raw-json` | 否 | 输出原始 JSON block 结构(含 blockId),不转换为 Markdown |
|
|
30
|
-
|
|
31
|
-
## 输出说明
|
|
32
|
-
|
|
33
|
-
### 默认模式(Markdown 输出)
|
|
34
|
-
|
|
35
|
-
- 直接输出 **Markdown 格式** 的文档内容(通过内部 jsonToMd 服务自动转换),可直接阅读
|
|
36
|
-
- **自动资源下载**:脚本会自动提取 Markdown 中的远程图片和附件 URL,下载到本地 `{save-assets-dir}/{doc_id}/` 目录,并将 Markdown 中的远程 URL 替换为本地路径。下载摘要会输出到 stderr
|
|
37
|
-
|
|
38
|
-
> **重要**:读取文档后,请关注 stderr 中输出的资源下载摘要。如果文档包含图片,这些图片已下载到本地,在分析文档时应结合图片内容(使用 Read 工具读取图片文件)进行理解,不要忽略图片中的信息。
|
|
39
|
-
|
|
40
|
-
### --raw-json 模式
|
|
41
|
-
|
|
42
|
-
- 输出文档内容的 **原始 JSON block 数组**,每个 block 包含 `blockId`、`type` 等完整字段
|
|
43
|
-
- 不经过 jsonToMd 转换,不下载资源
|
|
44
|
-
- 主要用于获取 `blockId` 以配合 `edit-content` 命令进行精确编辑(如表格替换、定点插入/删除)
|
|
45
|
-
|
|
46
|
-
输出格式示例:
|
|
47
|
-
|
|
48
|
-
```json
|
|
49
|
-
[
|
|
50
|
-
{
|
|
51
|
-
"type": "heading",
|
|
52
|
-
"level": 2,
|
|
53
|
-
"blockId": "docyg-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
|
|
54
|
-
"children": [{"text": "标题内容"}]
|
|
55
|
-
},
|
|
56
|
-
{
|
|
57
|
-
"type": "paragraph",
|
|
58
|
-
"blockId": "docyg-yyyyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy",
|
|
59
|
-
"children": [{"text": "段落内容"}]
|
|
60
|
-
},
|
|
61
|
-
{
|
|
62
|
-
"type": "table",
|
|
63
|
-
"blockId": "docyg-zzzzzzzz-zzzz-zzzz-zzzz-zzzzzzzzzzzz",
|
|
64
|
-
"data": {"headless": true, "width": [150, 150]},
|
|
65
|
-
"children": [...]
|
|
66
|
-
}
|
|
67
|
-
]
|
|
68
|
-
```
|
|
69
|
-
|
|
70
|
-
## 使用示例
|
|
71
|
-
|
|
72
|
-
### 通过 doc-id 查询
|
|
73
|
-
|
|
74
|
-
```bash
|
|
75
|
-
python3 scripts/ku_operator.py query-content --doc-id OQAehZAod_HVHC
|
|
76
|
-
```
|
|
77
|
-
|
|
78
|
-
### 通过完整 URL 查询
|
|
79
|
-
|
|
80
|
-
```bash
|
|
81
|
-
python3 scripts/ku_operator.py query-content \
|
|
82
|
-
--url "https://ku.baidu-int.com/knowledge/HFVrC2hT9H/pKzYIZbmZx/B8wSneaLSC/OQAehZAod_HVHC"
|
|
83
|
-
```
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
# query-repo — 查询知识库目录
|
|
2
|
-
|
|
3
|
-
查询知识库的目录结构,以文本树形格式输出,支持分页。
|
|
4
|
-
|
|
5
|
-
## CLI 用法
|
|
6
|
-
|
|
7
|
-
```bash
|
|
8
|
-
# 查询根目录(默认行为)
|
|
9
|
-
python3 scripts/ku_operator.py query-repo --repo-guid <知识库ID>
|
|
10
|
-
|
|
11
|
-
# 查询某个目录下的直接子文档
|
|
12
|
-
python3 scripts/ku_operator.py query-repo --repo-guid <知识库ID> --parent-doc-guid <父文档ID>
|
|
13
|
-
|
|
14
|
-
# 大目录翻页
|
|
15
|
-
python3 scripts/ku_operator.py query-repo --repo-guid <知识库ID> --page 2
|
|
16
|
-
```
|
|
17
|
-
|
|
18
|
-
## 参数表
|
|
19
|
-
|
|
20
|
-
| 参数 | 必填 | 说明 |
|
|
21
|
-
|------|------|------|
|
|
22
|
-
| `--repo-guid` | 是 | 知识库 ID |
|
|
23
|
-
| `--parent-doc-guid` | 否 | 父文档 ID,不传则默认查询**根目录**下的直接子文档 |
|
|
24
|
-
| `--page` | 否 | 页码,默认 1(每页固定 100 条) |
|
|
25
|
-
|
|
26
|
-
## 输出说明
|
|
27
|
-
|
|
28
|
-
以文本树形格式输出,示例:
|
|
29
|
-
|
|
30
|
-
```
|
|
31
|
-
知识库 Xw0t0FBvVy 根目录(共 3 项):
|
|
32
|
-
|
|
33
|
-
📄 欢迎使用 (GgD6PTVjSmZEC3)
|
|
34
|
-
📁 浏览器相关 (gsrJkBZ6XlyGRl) [2 子文档]
|
|
35
|
-
📄 接口说明 (vtUh01DEnyYDA6)
|
|
36
|
-
```
|
|
37
|
-
|
|
38
|
-
- `📁` 表示有子文档(可通过其 docGuid 作为 `--parent-doc-guid` 进一步展开查看)
|
|
39
|
-
- `📄` 表示叶子文档
|
|
40
|
-
- 括号内为 `docGuid`,可直接用于后续的 `query-content`、`create-doc`、`move-doc` 等操作
|
|
41
|
-
- 当目录内容超过一页时,末尾会提示使用 `--page` 翻页
|
|
42
|
-
|
|
43
|
-
## 使用示例
|
|
44
|
-
|
|
45
|
-
### 查询根目录
|
|
46
|
-
|
|
47
|
-
```bash
|
|
48
|
-
python3 scripts/ku_operator.py query-repo --repo-guid Xw0t0FBvVy
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
### 展开子目录
|
|
52
|
-
|
|
53
|
-
```bash
|
|
54
|
-
python3 scripts/ku_operator.py query-repo \
|
|
55
|
-
--repo-guid Xw0t0FBvVy \
|
|
56
|
-
--parent-doc-guid gsrJkBZ6XlyGRl
|
|
57
|
-
```
|