jarvis-ai-assistant 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +114 -6
  3. jarvis/jarvis_agent/agent_manager.py +3 -0
  4. jarvis/jarvis_agent/jarvis.py +45 -9
  5. jarvis/jarvis_agent/run_loop.py +6 -1
  6. jarvis/jarvis_agent/task_planner.py +219 -0
  7. jarvis/jarvis_c2rust/__init__.py +13 -0
  8. jarvis/jarvis_c2rust/cli.py +405 -0
  9. jarvis/jarvis_c2rust/collector.py +209 -0
  10. jarvis/jarvis_c2rust/library_replacer.py +933 -0
  11. jarvis/jarvis_c2rust/llm_module_agent.py +1265 -0
  12. jarvis/jarvis_c2rust/scanner.py +1671 -0
  13. jarvis/jarvis_c2rust/transpiler.py +1236 -0
  14. jarvis/jarvis_code_agent/code_agent.py +151 -18
  15. jarvis/jarvis_data/config_schema.json +13 -3
  16. jarvis/jarvis_sec/README.md +180 -0
  17. jarvis/jarvis_sec/__init__.py +674 -0
  18. jarvis/jarvis_sec/checkers/__init__.py +33 -0
  19. jarvis/jarvis_sec/checkers/c_checker.py +1269 -0
  20. jarvis/jarvis_sec/checkers/rust_checker.py +367 -0
  21. jarvis/jarvis_sec/cli.py +110 -0
  22. jarvis/jarvis_sec/prompts.py +324 -0
  23. jarvis/jarvis_sec/report.py +260 -0
  24. jarvis/jarvis_sec/types.py +20 -0
  25. jarvis/jarvis_sec/workflow.py +513 -0
  26. jarvis/jarvis_tools/cli/main.py +1 -0
  27. jarvis/jarvis_tools/execute_script.py +1 -1
  28. jarvis/jarvis_tools/read_code.py +11 -1
  29. jarvis/jarvis_tools/read_symbols.py +129 -0
  30. jarvis/jarvis_tools/registry.py +9 -1
  31. jarvis/jarvis_tools/sub_agent.py +4 -3
  32. jarvis/jarvis_tools/sub_code_agent.py +3 -3
  33. jarvis/jarvis_utils/config.py +28 -6
  34. jarvis/jarvis_utils/git_utils.py +39 -0
  35. jarvis/jarvis_utils/utils.py +150 -7
  36. {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.6.0.dist-info}/METADATA +13 -1
  37. {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.6.0.dist-info}/RECORD +41 -22
  38. {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.6.0.dist-info}/entry_points.txt +4 -0
  39. {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.6.0.dist-info}/WHEEL +0 -0
  40. {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.6.0.dist-info}/licenses/LICENSE +0 -0
  41. {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1269 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ OpenHarmony 安全演进多Agent套件 —— C/C++ 启发式安全检查器(阶段一)
4
+
5
+ 目标与范围:
6
+ - 聚焦内存管理、缓冲区操作、错误处理三类基础安全问题,提供可解释的启发式检测与置信度评估。
7
+ - 面向 C/C++ 与头文件(.c/.cpp/.h/.hpp)。
8
+
9
+ 输出约定:
10
+ - 返回 jarvis.jarvis_sec.workflow.Issue 列表(保持结构化,便于聚合评分与报告生成)。
11
+ - 置信度区间 [0,1],基于命中规则与上下文线索加权计算;严重性(severity)分为 high/medium/low。
12
+
13
+ 使用方式(示例):
14
+ - from jarvis.jarvis_sec.checkers.c_checker import analyze_files
15
+ - issues = analyze_files("./repo", ["src/a.c", "include/a.h"])
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import re
21
+ from pathlib import Path
22
+ from typing import Iterable, List, Optional, Sequence, Tuple
23
+
24
+ from jarvis.jarvis_sec.types import Issue
25
+
26
+
27
+ # ---------------------------
28
+ # 规则库(正则表达式)
29
+ # ---------------------------
30
+
31
+ RE_UNSAFE_API = re.compile(
32
+ r"\b(strcpy|strcat|gets|sprintf|vsprintf|scanf)\s*\(",
33
+ re.IGNORECASE,
34
+ )
35
+ RE_BOUNDARY_FUNCS = re.compile(
36
+ r"\b(memcpy|memmove|strncpy|strncat)\s*\(",
37
+ re.IGNORECASE,
38
+ )
39
+ RE_MEM_MGMT = re.compile(
40
+ r"\b(malloc|calloc|realloc|free|new\s+|delete\b)",
41
+ re.IGNORECASE,
42
+ )
43
+ RE_IO_API = re.compile(
44
+ r"\b(fopen|fclose|fread|fwrite|read|write|open|close)\s*\(",
45
+ re.IGNORECASE,
46
+ )
47
+
48
+ # 新增:格式化字符串/危险临时文件/命令执行等风险 API 模式
49
+ RE_PRINTF_LIKE = re.compile(r"\b(printf|sprintf|snprintf|vsprintf|vsnprintf)\s*\(", re.IGNORECASE)
50
+ RE_FPRINTF = re.compile(r"\bfprintf\s*\(", re.IGNORECASE)
51
+ RE_INSECURE_TMP = re.compile(r"\b(tmpnam|tempnam|mktemp)\s*\(", re.IGNORECASE)
52
+ RE_SYSTEM_LIKE = re.compile(r"\b(system|popen)\s*\(", re.IGNORECASE)
53
+ RE_EXEC_LIKE = re.compile(r"\b(execvp|execlp|execvpe|execl|execve|execv)\s*\(", re.IGNORECASE)
54
+ RE_SCANF_CALL = re.compile(r'\b(?:[fs]?scanf)\s*\(\s*"([^"]*)"', re.IGNORECASE)
55
+ # 线程/锁相关
56
+ RE_PTHREAD_LOCK = re.compile(r"\bpthread_mutex_lock\s*\(\s*&\s*([A-Za-z_]\w*)\s*\)\s*;?", re.IGNORECASE)
57
+ RE_PTHREAD_UNLOCK = re.compile(r"\bpthread_mutex_unlock\s*\(\s*&\s*([A-Za-z_]\w*)\s*\)\s*;?", re.IGNORECASE)
58
+ # 其他危险用法相关
59
+ RE_ATOI_FAMILY = re.compile(r"\b(atoi|atol|atoll|atof)\s*\(", re.IGNORECASE)
60
+ RE_RAND = re.compile(r"\b(rand|srand)\s*\(", re.IGNORECASE)
61
+ RE_STRTOK = re.compile(r"\bstrtok\s*\(", re.IGNORECASE)
62
+ RE_OPEN_PERMISSIVE = re.compile(r"\bopen\s*\(\s*[^,]+,\s*[^,]*O_CREAT[^,]*,\s*(0[0-7]{3,4})\s*\)", re.IGNORECASE)
63
+ RE_FOPEN_MODE = re.compile(r'\bfopen\s*\(\s*[^,]+,\s*"([^"]+)"\s*\)', re.IGNORECASE)
64
+ RE_GENERIC_ASSIGN = re.compile(r"\b([A-Za-z_]\w*)\s*=\s*")
65
+ RE_FREE_CALL_ANY = re.compile(r"\bfree\s*\(\s*([^)]+?)\s*\)", re.IGNORECASE)
66
+ # 扩展:更多危险用法相关
67
+ RE_ALLOCA = re.compile(r"\balloca\s*\(\s*([^)]+)\s*\)", re.IGNORECASE)
68
+ RE_VLA_DECL = re.compile(
69
+ r"\b(?:const\s+|volatile\s+|static\s+|register\s+|unsigned\s+|signed\s+)?[A-Za-z_]\w*(?:\s+\*|\s+)+[A-Za-z_]\w*\s*\[\s*([^\]]+)\s*\]\s*;",
70
+ re.IGNORECASE,
71
+ )
72
+ RE_PTHREAD_RET = re.compile(
73
+ r"\b(pthread_(?:mutex_(?:lock|trylock|timedlock)|create|cond_(?:wait|timedwait)|join|detach))\s*\(",
74
+ re.IGNORECASE,
75
+ )
76
+ RE_PTHREAD_COND_WAIT = re.compile(r"\bpthread_cond_(?:timed)?wait\s*\(", re.IGNORECASE)
77
+ RE_PTHREAD_CREATE = re.compile(r"\bpthread_create\s*\(\s*&\s*([A-Za-z_]\w*)\s*,", re.IGNORECASE)
78
+ RE_PTHREAD_JOIN = re.compile(r"\bpthread_join\s*\(\s*([A-Za-z_]\w*)\s*,", re.IGNORECASE)
79
+ RE_PTHREAD_DETACH = re.compile(r"\bpthread_detach\s*\(\s*([A-Za-z_]\w*)\s*\)", re.IGNORECASE)
80
+ RE_INET_LEGACY = re.compile(r"\b(inet_addr|inet_aton)\s*\(", re.IGNORECASE)
81
+ RE_TIME_UNSAFE = re.compile(r"\b(asctime|ctime|localtime|gmtime)\s*\(", re.IGNORECASE)
82
+ RE_GETENV = re.compile(r'\bgetenv\s*\(\s*"[^"]*"\s*\)', re.IGNORECASE)
83
+
84
+ # 辅助正则
85
+ RE_REALLOC_ASSIGN_BACK = re.compile(
86
+ r"\b([A-Za-z_]\w*)\s*=\s*realloc\s*\(\s*\1\s*,", re.IGNORECASE
87
+ )
88
+ RE_MALLOC_ASSIGN = re.compile(
89
+ r"\b([A-Za-z_]\w*)\s*=\s*malloc\s*\(", re.IGNORECASE
90
+ )
91
+ RE_CALLOC_ASSIGN = re.compile(
92
+ r"\b([A-Za-z_]\w*)\s*=\s*calloc\s*\(", re.IGNORECASE
93
+ )
94
+ RE_NEW_ASSIGN = re.compile(
95
+ r"\b([A-Za-z_]\w*)\s*=\s*new\b", re.IGNORECASE
96
+ )
97
+ RE_DEREF = re.compile(
98
+ r"(\*|->)\s*[A-Za-z_]\w*|\b[A-Za-z_]\w*\s*\[", re.IGNORECASE
99
+ )
100
+ RE_NULL_CHECK = re.compile(
101
+ r"\bif\s*\(\s*(!\s*)?[A-Za-z_]\w*\s*(==|!=)\s*NULL\s*\)|\bif\s*\(\s*[A-Za-z_]\w*\s*\)", re.IGNORECASE
102
+ )
103
+ RE_FREE_VAR = re.compile(r"free\s*\(\s*([A-Za-z_]\w*)\s*\)\s*;", re.IGNORECASE)
104
+ RE_USE_VAR = re.compile(r"\b([A-Za-z_]\w*)\b")
105
+ RE_STRLEN_IN_SIZE = re.compile(r"\bstrlen\s*\(", re.IGNORECASE)
106
+ RE_SIZEOF_PTR = re.compile(r"\bsizeof\s*\(\s*\*\s*[A-Za-z_]\w*\s*\)", re.IGNORECASE)
107
+ RE_STRNCPY = re.compile(r"\bstrncpy\s*\(", re.IGNORECASE)
108
+ RE_STRNCAT = re.compile(r"\bstrncat\s*\(", re.IGNORECASE)
109
+
110
+
111
+ # ---------------------------
112
+ # 公共工具
113
+ # ---------------------------
114
+
115
+ def _safe_line(lines: Sequence[str], idx: int) -> str:
116
+ if 1 <= idx <= len(lines):
117
+ return lines[idx - 1]
118
+ return ""
119
+
120
+
121
+ def _strip_line(s: str, max_len: int = 200) -> str:
122
+ s = s.strip().replace("\t", " ")
123
+ return s if len(s) <= max_len else s[: max_len - 3] + "..."
124
+
125
+
126
+ def _window(lines: Sequence[str], center: int, before: int = 3, after: int = 3) -> List[Tuple[int, str]]:
127
+ start = max(1, center - before)
128
+ end = min(len(lines), center + after)
129
+ return [(i, _safe_line(lines, i)) for i in range(start, end + 1)]
130
+
131
+
132
+ def _has_null_check_around(var: str, lines: Sequence[str], line_no: int, radius: int = 5) -> bool:
133
+ for i, s in _window(lines, line_no, before=radius, after=radius):
134
+ # 粗略判定:出现 if(ptr) / if(ptr != NULL) / if(NULL != ptr) 等
135
+ if re.search(rf"\bif\s*\(\s*{re.escape(var)}\s*\)", s):
136
+ return True
137
+ if re.search(rf"\bif\s*\(\s*{re.escape(var)}\s*(==|!=)\s*NULL\s*\)", s):
138
+ return True
139
+ if re.search(rf"\bif\s*\(\s*NULL\s*(==|!=)\s*{re.escape(var)}\s*\)", s):
140
+ return True
141
+ return False
142
+
143
+
144
+ def _has_len_bound_around(lines: Sequence[str], line_no: int, radius: int = 3) -> bool:
145
+ for _, s in _window(lines, line_no, before=radius, after=radius):
146
+ # 检测是否出现长度上界/检查(非常粗略)
147
+ if any(k in s for k in ["sizeof(", "BUFFER_SIZE", "MAX_", "min(", "clamp(", "snprintf", "strlcpy", "strlcat"]):
148
+ return True
149
+ return False
150
+
151
+
152
+ def _severity_from_confidence(conf: float, base: str) -> str:
153
+ # 基于基类目提供缺省严重度调整
154
+ if conf >= 0.8:
155
+ return "high"
156
+ if conf >= 0.6:
157
+ return "medium"
158
+ return "low"
159
+
160
+
161
+ # ---------------------------
162
+ # 具体验证规则
163
+ # ---------------------------
164
+
165
+ def _rule_unsafe_api(lines: Sequence[str], relpath: str) -> List[Issue]:
166
+ issues: List[Issue] = []
167
+ for idx, s in enumerate(lines, start=1):
168
+ m = RE_UNSAFE_API.search(s)
169
+ if not m:
170
+ continue
171
+ api = m.group(1)
172
+ conf = 0.85
173
+ if not _has_len_bound_around(lines, idx, radius=2):
174
+ conf += 0.05
175
+ severity = _severity_from_confidence(conf, "unsafe_api")
176
+ issues.append(
177
+ Issue(
178
+ language="c/cpp",
179
+ category="unsafe_api",
180
+ pattern=api,
181
+ file=relpath,
182
+ line=idx,
183
+ evidence=_strip_line(s),
184
+ description="使用不安全/高风险字符串API,可能导致缓冲区溢出或格式化风险。",
185
+ suggestion="替换为带边界的安全API(如 snprintf/strlcpy 等)或加入显式长度检查。",
186
+ confidence=min(conf, 0.95),
187
+ severity=severity,
188
+ )
189
+ )
190
+ return issues
191
+
192
+
193
+ def _rule_boundary_funcs(lines: Sequence[str], relpath: str) -> List[Issue]:
194
+ issues: List[Issue] = []
195
+ for idx, s in enumerate(lines, start=1):
196
+ m = RE_BOUNDARY_FUNCS.search(s)
197
+ if not m:
198
+ continue
199
+ api = m.group(1)
200
+ conf = 0.65
201
+ # 如果参数中包含 strlen 或 sizeof( *ptr ),提高风险(长度来源不稳定/指针大小)
202
+ if RE_STRLEN_IN_SIZE.search(s) or RE_SIZEOF_PTR.search(s):
203
+ conf += 0.15
204
+ # 周围未见边界检查,再提高
205
+ if not _has_len_bound_around(lines, idx, radius=2):
206
+ conf += 0.1
207
+ issues.append(
208
+ Issue(
209
+ language="c/cpp",
210
+ category="buffer_overflow",
211
+ pattern=api,
212
+ file=relpath,
213
+ line=idx,
214
+ evidence=_strip_line(s),
215
+ description="缓冲区操作涉及长度/边界,需确认长度来源是否可靠,避免越界。",
216
+ suggestion="核对目标缓冲区大小与拷贝长度;对外部输入进行校验;优先使用安全封装。",
217
+ confidence=min(conf, 0.95),
218
+ severity=_severity_from_confidence(conf, "buffer_overflow"),
219
+ )
220
+ )
221
+ return issues
222
+
223
+
224
+ def _rule_realloc_assign_back(lines: Sequence[str], relpath: str) -> List[Issue]:
225
+ issues: List[Issue] = []
226
+ for idx, s in enumerate(lines, start=1):
227
+ m = RE_REALLOC_ASSIGN_BACK.search(s)
228
+ if not m:
229
+ continue
230
+ var = m.group(1)
231
+ conf = 0.8
232
+ # 如果附近未见错误处理/NULL检查,置信度更高
233
+ if not _has_null_check_around(var, lines, idx, radius=3):
234
+ conf += 0.1
235
+ issues.append(
236
+ Issue(
237
+ language="c/cpp",
238
+ category="memory_mgmt",
239
+ pattern="realloc_overwrite",
240
+ file=relpath,
241
+ line=idx,
242
+ evidence=_strip_line(s),
243
+ description=f"realloc 直接覆盖原指针 {var},若失败将导致原内存泄漏。",
244
+ suggestion="使用临时指针接收 realloc 返回值,判空成功后再赋值回原指针。",
245
+ confidence=min(conf, 0.95),
246
+ severity=_severity_from_confidence(conf, "memory_mgmt"),
247
+ )
248
+ )
249
+ return issues
250
+
251
+
252
+ def _rule_malloc_no_null_check(lines: Sequence[str], relpath: str) -> List[Issue]:
253
+ issues: List[Issue] = []
254
+ for idx, s in enumerate(lines, start=1):
255
+ for pat in (RE_MALLOC_ASSIGN, RE_CALLOC_ASSIGN, RE_NEW_ASSIGN):
256
+ m = pat.search(s)
257
+ if not m:
258
+ continue
259
+ var = m.group(1)
260
+ # 在后续若干行中存在明显解引用/使用但未见 NULL 检查,提示
261
+ conf = 0.55
262
+ has_check = _has_null_check_around(var, lines, idx, radius=4)
263
+ # 搜索后续 6 行是否出现变量使用(粗略)
264
+ used = False
265
+ for j, sj in _window(lines, idx, before=0, after=6):
266
+ if j == idx:
267
+ continue
268
+ if re.search(rf"\b{re.escape(var)}\b(\s*(->|\[|\())", sj):
269
+ used = True
270
+ break
271
+ if used and not has_check:
272
+ conf += 0.25
273
+ elif not has_check:
274
+ conf += 0.1
275
+ issues.append(
276
+ Issue(
277
+ language="c/cpp",
278
+ category="memory_mgmt",
279
+ pattern="alloc_no_null_check",
280
+ file=relpath,
281
+ line=idx,
282
+ evidence=_strip_line(s),
283
+ description=f"内存/对象分配给 {var} 后可能未检查是否成功(NULL 检查缺失)。",
284
+ suggestion="在使用前检查分配结果是否为 NULL,并在错误路径上释放已获取的资源。",
285
+ confidence=min(conf, 0.9),
286
+ severity=_severity_from_confidence(conf, "memory_mgmt"),
287
+ )
288
+ )
289
+ return issues
290
+
291
+
292
+ def _rule_uaf_suspect(lines: Sequence[str], relpath: str) -> List[Issue]:
293
+ # 搜集 free(var) 的变量,再检查后续是否出现变量使用
294
+ issues: List[Issue] = []
295
+ text = "\n".join(lines)
296
+ free_vars = re.findall(RE_FREE_VAR, text)
297
+ for v in set(free_vars):
298
+ # free 后再次出现 v(非常粗糙的线索)
299
+ pattern = re.compile(rf"free\s*\(\s*{re.escape(v)}\s*\)\s*;(.|\n)+?\b{re.escape(v)}\b", re.MULTILINE)
300
+ if pattern.search(text):
301
+ # 取第一次 free 的行号作为证据
302
+ for idx, s in enumerate(lines, start=1):
303
+ if re.search(rf"free\s*\(\s*{re.escape(v)}\s*\)\s*;", s):
304
+ issues.append(
305
+ Issue(
306
+ language="c/cpp",
307
+ category="memory_mgmt",
308
+ pattern="use_after_free_suspect",
309
+ file=relpath,
310
+ line=idx,
311
+ evidence=_strip_line(s),
312
+ description=f"变量 {v} 在 free 后可能仍被使用(UAF 线索,需人工确认)。",
313
+ suggestion="free 后将指针置 NULL;严格管理生命周期;增加动态/静态检测。",
314
+ confidence=0.6,
315
+ severity="high",
316
+ )
317
+ )
318
+ break
319
+ return issues
320
+
321
+
322
+ def _rule_unchecked_io(lines: Sequence[str], relpath: str) -> List[Issue]:
323
+ issues: List[Issue] = []
324
+ for idx, s in enumerate(lines, start=1):
325
+ if not RE_IO_API.search(s):
326
+ continue
327
+ # 简单启发:若本行或紧随其后 2 行没有涉及条件判断/返回值比较,认为可能未检查错误
328
+ conf = 0.5
329
+ nearby = " ".join(_safe_line(lines, i) for i in range(idx, min(idx + 2, len(lines)) + 1))
330
+ if not re.search(r"\bif\s*\(|>=|<=|==|!=|<|>", nearby):
331
+ conf += 0.15
332
+ issues.append(
333
+ Issue(
334
+ language="c/cpp",
335
+ category="error_handling",
336
+ pattern="io_call",
337
+ file=relpath,
338
+ line=idx,
339
+ evidence=_strip_line(s),
340
+ description="I/O/系统调用可能未检查返回值,存在错误处理缺失风险。",
341
+ suggestion="检查返回值/errno;在错误路径上释放资源(句柄/内存/锁)。",
342
+ confidence=min(conf, 0.75),
343
+ severity=_severity_from_confidence(conf, "error_handling"),
344
+ )
345
+ )
346
+ return issues
347
+
348
+
349
+ def _rule_strncpy_no_nullterm(lines: Sequence[str], relpath: str) -> List[Issue]:
350
+ # 使用 strncpy/strncat 后未确保目标缓冲区以 NUL 结尾的常见隐患(启发式)
351
+ issues: List[Issue] = []
352
+ for idx, s in enumerate(lines, start=1):
353
+ if RE_STRNCPY.search(s) or RE_STRNCAT.search(s):
354
+ conf = 0.55
355
+ # 若邻近窗口未出现手动 '\0' 终止或显式长度-1 等处理,提升风险
356
+ window_text = " ".join(t for _, t in _window(lines, idx, before=1, after=2))
357
+ if not re.search(r"\\0|'\0'|\"\\0\"|len\s*-\s*1|sizeof\s*\(\s*\w+\s*\)\s*-\s*1", window_text):
358
+ conf += 0.15
359
+ issues.append(
360
+ Issue(
361
+ language="c/cpp",
362
+ category="buffer_overflow",
363
+ pattern="strncpy/strncat",
364
+ file=relpath,
365
+ line=idx,
366
+ evidence=_strip_line(s),
367
+ description="使用 strncpy/strncat 可能未自动添加 NUL 终止,导致潜在字符串未终止风险。",
368
+ suggestion="确保目标缓冲区以 '\\0' 终止(例如手动结尾或采用更安全 API)。",
369
+ confidence=min(conf, 0.75),
370
+ severity=_severity_from_confidence(conf, "buffer_overflow"),
371
+ )
372
+ )
373
+ return issues
374
+
375
+
376
+ # ---------------------------
377
+ # 对外主入口
378
+ # ---------------------------
379
+
380
+ # ---------------------------
381
+ # 额外规则(新增)
382
+ # ---------------------------
383
+
384
+ def _rule_format_string(lines: Sequence[str], relpath: str) -> List[Issue]:
385
+ """
386
+ 检测格式化字符串漏洞:printf/s(n)printf/v(s)printf 首参数不是字符串字面量;
387
+ fprintf 第二个参数不是字符串字面量。
388
+ """
389
+ issues: List[Issue] = []
390
+ for idx, s in enumerate(lines, start=1):
391
+ # printf/printf-like: 检查第一个参数是否为字面量
392
+ m1 = RE_PRINTF_LIKE.search(s)
393
+ flagged = False
394
+ if m1:
395
+ try:
396
+ start = s.index("(", m1.start())
397
+ j = start + 1
398
+ while j < len(s) and s[j].isspace():
399
+ j += 1
400
+ if j < len(s) and s[j] != '"':
401
+ flagged = True
402
+ except ValueError:
403
+ pass
404
+ # fprintf: 检查第二个参数是否为字面量
405
+ m2 = RE_FPRINTF.search(s)
406
+ if not flagged and m2:
407
+ try:
408
+ start = s.index("(", m2.start())
409
+ comma = s.find(",", start + 1)
410
+ if comma != -1:
411
+ j = comma + 1
412
+ while j < len(s) and s[j].isspace():
413
+ j += 1
414
+ if j < len(s) and s[j] != '"':
415
+ flagged = True
416
+ except ValueError:
417
+ pass
418
+ if flagged:
419
+ issues.append(
420
+ Issue(
421
+ language="c/cpp",
422
+ category="unsafe_usage",
423
+ pattern="format_string",
424
+ file=relpath,
425
+ line=idx,
426
+ evidence=_strip_line(s),
427
+ description="格式化字符串参数不是字面量,可能导致格式化字符串漏洞。",
428
+ suggestion="使用常量格式串并对外部输入进行参数化处理;避免将未验证的输入作为格式串。",
429
+ confidence=0.8,
430
+ severity="high",
431
+ )
432
+ )
433
+ return issues
434
+
435
+
436
+ def _rule_insecure_tmpfile(lines: Sequence[str], relpath: str) -> List[Issue]:
437
+ """
438
+ 检测不安全临时文件API:tmpnam/tempnam/mktemp
439
+ """
440
+ issues: List[Issue] = []
441
+ for idx, s in enumerate(lines, start=1):
442
+ if RE_INSECURE_TMP.search(s):
443
+ issues.append(
444
+ Issue(
445
+ language="c/cpp",
446
+ category="unsafe_usage",
447
+ pattern="insecure_tmpfile",
448
+ file=relpath,
449
+ line=idx,
450
+ evidence=_strip_line(s),
451
+ description="使用不安全的临时文件API(tmpnam/tempnam/mktemp)可能导致竞态条件与劫持风险。",
452
+ suggestion="使用 mkstemp/mkdtemp 或安全封装,并设置合适的权限。",
453
+ confidence=0.85,
454
+ severity="high",
455
+ )
456
+ )
457
+ return issues
458
+
459
+
460
+ def _rule_command_execution(lines: Sequence[str], relpath: str) -> List[Issue]:
461
+ """
462
+ 检测命令执行API:system/popen 和 exec* 系列,其中参数不是字面量(可能引入命令注入风险)
463
+ """
464
+ issues: List[Issue] = []
465
+ for idx, s in enumerate(lines, start=1):
466
+ flagged = False
467
+ m_sys = RE_SYSTEM_LIKE.search(s)
468
+ if m_sys:
469
+ try:
470
+ start = s.index("(", m_sys.start())
471
+ j = start + 1
472
+ while j < len(s) and s[j].isspace():
473
+ j += 1
474
+ if j < len(s) and s[j] != '"':
475
+ flagged = True
476
+ except Exception:
477
+ pass
478
+ if not flagged and RE_EXEC_LIKE.search(s):
479
+ # 对 exec* 系列保守告警:难以可靠判断参数是否安全构造
480
+ flagged = True
481
+ if flagged:
482
+ issues.append(
483
+ Issue(
484
+ language="c/cpp",
485
+ category="unsafe_usage",
486
+ pattern="command_exec",
487
+ file=relpath,
488
+ line=idx,
489
+ evidence=_strip_line(s),
490
+ description="外部命令执行可能使用了非字面量参数,存在命令注入风险。",
491
+ suggestion="避免拼接命令,使用参数化接口或受控白名单;严格校验/转义外部输入。",
492
+ confidence=0.7,
493
+ severity="high",
494
+ )
495
+ )
496
+ return issues
497
+
498
+
499
+ def _rule_scanf_no_width(lines: Sequence[str], relpath: str) -> List[Issue]:
500
+ """
501
+ 检测 scanf/sscanf/fscanf 使用 %s 但未指定最大宽度,存在缓冲区溢出风险。
502
+ 仅对格式串直接字面量的情况进行粗略检查。
503
+ """
504
+ issues: List[Issue] = []
505
+ for idx, s in enumerate(lines, start=1):
506
+ m = RE_SCANF_CALL.search(s)
507
+ if not m:
508
+ continue
509
+ fmt = m.group(1)
510
+ # 若包含 "%s" 但未出现 "%<digits>s" 形式,则告警
511
+ if "%s" in fmt and not re.search(r"%\d+s", fmt):
512
+ issues.append(
513
+ Issue(
514
+ language="c/cpp",
515
+ category="buffer_overflow",
516
+ pattern="scanf_%s_no_width",
517
+ file=relpath,
518
+ line=idx,
519
+ evidence=_strip_line(s),
520
+ description="scanf/sscanf/fscanf 使用 %s 但未限制最大宽度,存在缓冲区溢出风险。",
521
+ suggestion="为 %s 指定最大宽度(如 \"%255s\"),或使用更安全的读取方式。",
522
+ confidence=0.75,
523
+ severity="high",
524
+ )
525
+ )
526
+ return issues
527
+
528
+
529
+ def _rule_alloc_size_overflow(lines: Sequence[str], relpath: str) -> List[Issue]:
530
+ """
531
+ 检测分配大小可能溢出的简单情形:malloc/calloc/realloc 形参存在乘法表达式且未显式使用 sizeof。
532
+ 该规则为启发式,需人工确认。
533
+ """
534
+ issues: List[Issue] = []
535
+ for idx, s in enumerate(lines, start=1):
536
+ m = re.search(r"\bmalloc\s*\(", s, re.IGNORECASE)
537
+ if not m:
538
+ continue
539
+ try:
540
+ start = s.index("(", m.start())
541
+ end = s.find(")", start + 1)
542
+ if end != -1:
543
+ args = s[start + 1 : end]
544
+ if "*" in args and not re.search(r"\bsizeof\s*\(", args):
545
+ issues.append(
546
+ Issue(
547
+ language="c/cpp",
548
+ category="memory_mgmt",
549
+ pattern="alloc_size_overflow",
550
+ file=relpath,
551
+ line=idx,
552
+ evidence=_strip_line(s),
553
+ description="malloc 大小计算包含乘法且未显式使用 sizeof,存在整数溢出或尺寸计算错误的风险。",
554
+ suggestion="使用 sizeof 计算元素大小并检查乘法是否可能溢出;引入范围/上界校验。",
555
+ confidence=0.6,
556
+ severity="medium",
557
+ )
558
+ )
559
+ except Exception:
560
+ pass
561
+ return issues
562
+
563
+
564
+ # ---------------------------
565
+ # 空指针/野指针/死锁 等新增规则
566
+ # ---------------------------
567
+
568
+ def _rule_possible_null_deref(lines: Sequence[str], relpath: str) -> List[Issue]:
569
+ """
570
+ 启发式检测空指针解引用:
571
+ - 出现 p->... 或 *p 访问,且邻近未见明显的 NULL 检查。
572
+ 注:可能存在误报,需结合上下文确认。
573
+ """
574
+ issues: List[Issue] = []
575
+ re_arrow = re.compile(r"\b([A-Za-z_]\w*)\s*->")
576
+ re_star = re.compile(r"(?<!\w)\*\s*([A-Za-z_]\w*)\b")
577
+ type_kw = re.compile(r"\b(typedef|struct|union|enum|class|char|int|long|short|void|size_t|ssize_t|FILE)\b")
578
+ for idx, s in enumerate(lines, start=1):
579
+ vars_hit = []
580
+ # '->' 访问几乎必为解引用
581
+ for m in re_arrow.finditer(s):
582
+ vars_hit.append(m.group(1))
583
+ # '*p' 可能是声明,粗略排除类型声明行与函数指针/形参
584
+ if "*" in s and not type_kw.search(s):
585
+ for m in re_star.finditer(s):
586
+ # 排除赋值左侧的声明模式很困难,保守纳入
587
+ vars_hit.append(m.group(1))
588
+ for v in set(vars_hit):
589
+ if not _has_null_check_around(v, lines, idx, radius=3):
590
+ issues.append(
591
+ Issue(
592
+ language="c/cpp",
593
+ category="memory_mgmt",
594
+ pattern="possible_null_deref",
595
+ file=relpath,
596
+ line=idx,
597
+ evidence=_strip_line(s),
598
+ description=f"可能对指针 {v} 进行了解引用,但附近未见 NULL 检查,存在空指针解引用风险。",
599
+ suggestion="在使用指针前执行 NULL 判定;确保所有返回/赋值路径均进行了合法性检查。",
600
+ confidence=0.6,
601
+ severity="high",
602
+ )
603
+ )
604
+ return issues
605
+
606
+
607
+ def _rule_uninitialized_ptr_use(lines: Sequence[str], relpath: str) -> List[Issue]:
608
+ """
609
+ 检测野指针(未初始化指针)使用的简单情形:
610
+ - 出现形如 `type *p;`(行内不含 '=' 且不含 '(',避免函数指针)后,在后续若干行内出现 p-> 或 *p 访问,
611
+ 且未见 p 的赋值/初始化,则认为可能为野指针解引用。
612
+ """
613
+ issues: List[Issue] = []
614
+ # 收集候选未初始化指针声明
615
+ candidates = [] # (var, decl_line)
616
+ decl_ptr_line = re.compile(r"\*")
617
+ type_prefix = re.compile(r"\b(typedef|struct|union|enum|class|const|volatile|static|register|signed|unsigned|char|int|long|short|void|float|double)\b")
618
+ for idx, s in enumerate(lines, start=1):
619
+ if ";" not in s or "(" in s or "=" in s:
620
+ continue
621
+ if not decl_ptr_line.search(s):
622
+ continue
623
+ if not type_prefix.search(s):
624
+ continue
625
+ # 提取形如 *p, *q
626
+ for m in re.finditer(r"\*\s*([A-Za-z_]\w*)\b", s):
627
+ v = m.group(1)
628
+ candidates.append((v, idx))
629
+
630
+ # 检查候选在接下来的窗口中是否在赋值前被解引用
631
+ for v, decl_line in candidates:
632
+ # 向后查看 20 行
633
+ end = min(len(lines), decl_line + 20)
634
+ initialized = False
635
+ deref_line = None
636
+ for j in range(decl_line + 1, end + 1):
637
+ sj = _safe_line(lines, j)
638
+ # 赋值/初始化:p = ..., p = &x, p = malloc(...)
639
+ if re.search(rf"\b{re.escape(v)}\s*=\s*", sj):
640
+ initialized = True
641
+ break
642
+ # 解引用:p-> 或 *p
643
+ if re.search(rf"\b{re.escape(v)}\s*->", sj) or re.search(rf"(?<!\w)\*\s*{re.escape(v)}\b", sj):
644
+ deref_line = j
645
+ # 若命中,若附近没有 NULL 检查/初始化则认为风险较高
646
+ break
647
+ if deref_line and not initialized:
648
+ issues.append(
649
+ Issue(
650
+ language="c/cpp",
651
+ category="memory_mgmt",
652
+ pattern="wild_pointer_deref",
653
+ file=relpath,
654
+ line=deref_line,
655
+ evidence=_strip_line(_safe_line(lines, deref_line)),
656
+ description=f"指针 {v} 声明后未见初始化即被解引用,可能为野指针使用。",
657
+ suggestion="在声明后立即将指针初始化为 NULL,并在使用前进行显式赋值与有效性校验。",
658
+ confidence=0.65,
659
+ severity="high",
660
+ )
661
+ )
662
+ return issues
663
+
664
+
665
+ def _rule_deadlock_patterns(lines: Sequence[str], relpath: str) -> List[Issue]:
666
+ """
667
+ 检测常见死锁风险:
668
+ - 双重加锁:同一互斥量在未解锁情况下再次加锁
669
+ - 可能缺失解锁:加锁后在后续窗口内未看到对应解锁
670
+ - 锁顺序反转:存在 (A->B) 与 (B->A) 两种加锁顺序
671
+ 实现基于启发式,可能产生误报。
672
+ """
673
+ issues: List[Issue] = []
674
+ lock_stack: list[str] = []
675
+ # 记录出现过的加锁顺序对及其行号
676
+ order_pairs: dict[tuple[str, str], int] = {}
677
+
678
+ # 先行扫描:顺序和双重加锁
679
+ for idx, s in enumerate(lines, start=1):
680
+ m_lock = RE_PTHREAD_LOCK.search(s)
681
+ m_unlock = RE_PTHREAD_UNLOCK.search(s)
682
+ if m_lock:
683
+ mtx = m_lock.group(1)
684
+ # 双重加锁检测
685
+ if mtx in lock_stack:
686
+ issues.append(
687
+ Issue(
688
+ language="c/cpp",
689
+ category="error_handling",
690
+ pattern="double_lock",
691
+ file=relpath,
692
+ line=idx,
693
+ evidence=_strip_line(s),
694
+ description=f"互斥量 {mtx} 在未解锁的情况下被再次加锁,存在死锁风险。",
695
+ suggestion="避免对同一互斥量重复加锁;检查代码路径确保加锁/解锁严格匹配。",
696
+ confidence=0.8,
697
+ severity="high",
698
+ )
699
+ )
700
+ # 锁顺序记录
701
+ if lock_stack and lock_stack[-1] != mtx:
702
+ pair = (lock_stack[-1], mtx)
703
+ order_pairs.setdefault(pair, idx)
704
+ lock_stack.append(mtx)
705
+ elif m_unlock:
706
+ mtx = m_unlock.group(1)
707
+ # 从栈中移除最近的相同锁
708
+ if mtx in lock_stack:
709
+ # 移除最后一次加锁的该互斥量(近似)
710
+ for k in range(len(lock_stack) - 1, -1, -1):
711
+ if lock_stack[k] == mtx:
712
+ del lock_stack[k]
713
+ break
714
+ # 粗略按函数/作用域结束重置
715
+ if "}" in s and not lock_stack:
716
+ lock_stack = []
717
+
718
+ # 锁顺序反转检测
719
+ for (a, b), ln in order_pairs.items():
720
+ if (b, a) in order_pairs:
721
+ # 在第二次发现处报一次
722
+ issues.append(
723
+ Issue(
724
+ language="c/cpp",
725
+ category="error_handling",
726
+ pattern="lock_order_inversion",
727
+ file=relpath,
728
+ line=order_pairs[(b, a)],
729
+ evidence=_strip_line(_safe_line(lines, order_pairs[(b, a)])),
730
+ description=f"检测到互斥量加锁顺序反转:({a} -> {b}) 与 ({b} -> {a}),存在死锁风险。",
731
+ suggestion="统一多锁的获取顺序,制定全局锁等级或严格的加锁顺序规范。",
732
+ confidence=0.7,
733
+ severity="high",
734
+ )
735
+ )
736
+
737
+ # 可能缺失解锁:在加锁后的 50 行窗口内未见对应解锁
738
+ for idx, s in enumerate(lines, start=1):
739
+ m_lock = RE_PTHREAD_LOCK.search(s)
740
+ if not m_lock:
741
+ continue
742
+ mtx = m_lock.group(1)
743
+ end = min(len(lines), idx + 50)
744
+ unlocked = False
745
+ for j in range(idx + 1, end + 1):
746
+ if RE_PTHREAD_UNLOCK.search(_safe_line(lines, j)):
747
+ if RE_PTHREAD_UNLOCK.search(_safe_line(lines, j)).group(1) == mtx:
748
+ unlocked = True
749
+ break
750
+ if not unlocked:
751
+ issues.append(
752
+ Issue(
753
+ language="c/cpp",
754
+ category="error_handling",
755
+ pattern="missing_unlock_suspect",
756
+ file=relpath,
757
+ line=idx,
758
+ evidence=_strip_line(s),
759
+ description=f"在加锁 {mtx} 之后的邻近窗口内未检测到匹配解锁,可能存在缺失解锁的风险。",
760
+ suggestion="确保所有加锁路径都有配对的解锁;考虑使用 RAII/DEFER 风格避免遗漏。",
761
+ confidence=0.55,
762
+ severity="medium",
763
+ )
764
+ )
765
+ return issues
766
+
767
+
768
+ # ---------------------------
769
+ # 其他危险用法规则(新增一批低误报)
770
+ # ---------------------------
771
+
772
+ def _rule_double_free_and_free_non_heap(lines: Sequence[str], relpath: str) -> List[Issue]:
773
+ """
774
+ 检测:
775
+ - double_free:同一指针在未重新赋值/置空情况下被重复 free
776
+ - free_non_heap:free(&x) 或 free("literal") 等明显非堆内存释放
777
+ 说明:启发式实现,复杂场景可能仍需人工确认。
778
+ """
779
+ issues: List[Issue] = []
780
+ last_free_line: dict[str, int] = {}
781
+ last_assign_line: dict[str, int] = {}
782
+
783
+ for idx, s in enumerate(lines, start=1):
784
+ # 记录简单赋值(用于判断 free 之间是否有重新赋值)
785
+ for m in RE_GENERIC_ASSIGN.finditer(s):
786
+ var = m.group(1)
787
+ last_assign_line[var] = idx
788
+
789
+ # 处理 free(...) 调用
790
+ for m in RE_FREE_CALL_ANY.finditer(s):
791
+ arg = m.group(1).strip()
792
+
793
+ # 忽略 free(NULL)/free(0)
794
+ if re.fullmatch(r"\(?\s*(NULL|0|\(void\s*\*\)\s*0)\s*\)?", arg, re.IGNORECASE):
795
+ continue
796
+
797
+ # 明显非堆:&... 或 字符串字面量
798
+ if re.match(r"^\(?\s*&", arg) or arg.lstrip().startswith('"'):
799
+ issues.append(
800
+ Issue(
801
+ language="c/cpp",
802
+ category="memory_mgmt",
803
+ pattern="free_non_heap",
804
+ file=relpath,
805
+ line=idx,
806
+ evidence=_strip_line(s),
807
+ description="检测到对非堆内存的释放(如 &var 或字符串字面量),属于未定义行为。",
808
+ suggestion="仅释放由 malloc/calloc/realloc/new/new[] 获得的堆内存;避免对栈地址/字面量调用 free。",
809
+ confidence=0.85,
810
+ severity="high",
811
+ )
812
+ )
813
+ continue
814
+
815
+ # double_free:仅在参数为单一标识符时检测
816
+ if re.fullmatch(r"[A-Za-z_]\w*", arg):
817
+ var = arg
818
+ prev = last_free_line.get(var)
819
+ if prev is not None:
820
+ assign_after_prev = last_assign_line.get(var, -1)
821
+ if assign_after_prev < prev:
822
+ # 在上次 free 之后没有重新赋值/置空即再次 free,认为 double_free 风险高
823
+ issues.append(
824
+ Issue(
825
+ language="c/cpp",
826
+ category="memory_mgmt",
827
+ pattern="double_free",
828
+ file=relpath,
829
+ line=idx,
830
+ evidence=_strip_line(s),
831
+ description=f"指针 {var} 可能在未重新赋值/置空情况下被重复释放(double free)。",
832
+ suggestion="free 后将指针置 NULL;确保每块内存仅释放一次;理清所有权与释放路径。",
833
+ confidence=0.8,
834
+ severity="high",
835
+ )
836
+ )
837
+ last_free_line[var] = idx
838
+ return issues
839
+
840
+
841
+ def _rule_atoi_family(lines: Sequence[str], relpath: str) -> List[Issue]:
842
+ """
843
+ 检测 atoi/atol/atoll/atof 的使用(缺乏错误与范围检查,易产生解析歧义)。
844
+ 建议改用 strtol/strtoul/strtod 并检查 errno/端点指针。
845
+ """
846
+ issues: List[Issue] = []
847
+ for idx, s in enumerate(lines, start=1):
848
+ if RE_ATOI_FAMILY.search(s):
849
+ issues.append(
850
+ Issue(
851
+ language="c/cpp",
852
+ category="input_validation",
853
+ pattern="atoi_family",
854
+ file=relpath,
855
+ line=idx,
856
+ evidence=_strip_line(s),
857
+ description="使用 atoi/atol/atoll/atof 缺乏错误与范围检查,容易产生解析错误或未定义行为。",
858
+ suggestion="使用 strtol/strtoul/strtod 等并检查 errno 和 endptr;进行范围与格式校验。",
859
+ confidence=0.65,
860
+ severity="medium",
861
+ )
862
+ )
863
+ return issues
864
+
865
+
866
+ def _rule_rand_insecure(lines: Sequence[str], relpath: str) -> List[Issue]:
867
+ """
868
+ 检测 rand/srand 的使用。若上下文包含安全敏感关键词,提升风险。
869
+ """
870
+ issues: List[Issue] = []
871
+ keywords = ("token", "nonce", "secret", "password", "passwd", "key", "auth", "salt", "session", "otp")
872
+ for idx, s in enumerate(lines, start=1):
873
+ if RE_RAND.search(s):
874
+ conf = 0.55
875
+ window_text = " ".join(t for _, t in _window(lines, idx, before=1, after=1)).lower()
876
+ if any(k in window_text for k in keywords):
877
+ conf += 0.2
878
+ issues.append(
879
+ Issue(
880
+ language="c/cpp",
881
+ category="crypto",
882
+ pattern="rand_insecure",
883
+ file=relpath,
884
+ line=idx,
885
+ evidence=_strip_line(s),
886
+ description="检测到 rand/srand,用于安全敏感场景可能不安全,易被预测。",
887
+ suggestion="使用系统级 CSPRNG(如 getrandom/arc4random/openssl RAND_bytes),避免用于密钥/令牌生成。",
888
+ confidence=min(conf, 0.8),
889
+ severity="high" if conf >= 0.7 else "medium",
890
+ )
891
+ )
892
+ return issues
893
+
894
+
895
+ def _rule_strtok_nonreentrant(lines: Sequence[str], relpath: str) -> List[Issue]:
896
+ """
897
+ 检测 strtok 非重入/线程不安全使用。
898
+ """
899
+ issues: List[Issue] = []
900
+ for idx, s in enumerate(lines, start=1):
901
+ if RE_STRTOK.search(s):
902
+ issues.append(
903
+ Issue(
904
+ language="c/cpp",
905
+ category="thread_safety",
906
+ pattern="strtok_nonreentrant",
907
+ file=relpath,
908
+ line=idx,
909
+ evidence=_strip_line(s),
910
+ description="使用 strtok 非重入且线程不安全,可能导致竞态或数据覆盖。",
911
+ suggestion="使用 strtok_r(POSIX)或可重入/线程安全的分割方案。",
912
+ confidence=0.6,
913
+ severity="medium",
914
+ )
915
+ )
916
+ return issues
917
+
918
+
919
+ def _rule_open_permissive_perms(lines: Sequence[str], relpath: str) -> List[Issue]:
920
+ """
921
+ 检测过宽文件权限:
922
+ - open(..., O_CREAT, 0666/0777/...) 直接授予过宽权限
923
+ - fopen(..., "w"/"w+") 在安全敏感上下文可提示收紧权限(基于关键词启发)
924
+ """
925
+ issues: List[Issue] = []
926
+ sensitive_keys = ("key", "secret", "token", "passwd", "password", "cred", "config", "cert", "private", "id_rsa")
927
+ for idx, s in enumerate(lines, start=1):
928
+ m = RE_OPEN_PERMISSIVE.search(s)
929
+ if m:
930
+ mode = m.group(1)
931
+ issues.append(
932
+ Issue(
933
+ language="c/cpp",
934
+ category="insecure_permissions",
935
+ pattern="open_permissive_perms",
936
+ file=relpath,
937
+ line=idx,
938
+ evidence=_strip_line(s),
939
+ description=f"open 使用 O_CREAT 且权限 {mode} 过宽,存在敏感信息泄露风险。",
940
+ suggestion="显式使用更严格的权限(如 0600/0640),或设置合适 umask 后再创建文件。",
941
+ confidence=0.8,
942
+ severity="high",
943
+ )
944
+ )
945
+ # fopen 模式为写入且上下文敏感时,进行提醒
946
+ m2 = RE_FOPEN_MODE.search(s)
947
+ if m2:
948
+ mode = m2.group(1)
949
+ if "w" in mode:
950
+ window = " ".join(t for _, t in _window(lines, idx, before=1, after=1)).lower()
951
+ if any(k in window for k in sensitive_keys):
952
+ issues.append(
953
+ Issue(
954
+ language="c/cpp",
955
+ category="insecure_permissions",
956
+ pattern="fopen_write_sensitive",
957
+ file=relpath,
958
+ line=idx,
959
+ evidence=_strip_line(s),
960
+ description="fopen 以写入模式操作可能的敏感文件,需确认创建权限与 umask 设置是否足够严格。",
961
+ suggestion="确认运行态 umask;必要时使用 open+fchmod/umask 控制权限,或以 0600 创建后再放宽。",
962
+ confidence=0.55,
963
+ severity="medium",
964
+ )
965
+ )
966
+ return issues
967
+
968
+
969
+ # ---------------------------
970
+ # 更多危险用法规则(第二批)
971
+ # ---------------------------
972
+
973
+ def _rule_alloca_unbounded(lines: Sequence[str], relpath: str) -> List[Issue]:
974
+ """
975
+ 检测 alloca 使用非常量/未受控大小,可能导致栈耗尽或崩溃。
976
+ 仅在参数非纯数字常量、且不含 sizeof 时告警。
977
+ """
978
+ issues: List[Issue] = []
979
+ for idx, s in enumerate(lines, start=1):
980
+ m = RE_ALLOCA.search(s)
981
+ if not m:
982
+ continue
983
+ arg = m.group(1).strip()
984
+ # 纯数字常量或包含 sizeof 视为更安全
985
+ if re.fullmatch(r"\d+\s*", arg) or "sizeof" in arg:
986
+ continue
987
+ conf = 0.6
988
+ if re.search(r"(len|size|count|n)\b", arg, re.IGNORECASE):
989
+ conf += 0.1
990
+ issues.append(
991
+ Issue(
992
+ language="c/cpp",
993
+ category="memory_mgmt",
994
+ pattern="alloca_unbounded",
995
+ file=relpath,
996
+ line=idx,
997
+ evidence=_strip_line(s),
998
+ description="alloca 使用的大小不是编译期常量,可能导致未受控的栈分配与崩溃风险。",
999
+ suggestion="避免使用 alloca;改用堆分配并对大小做上界检查与错误处理。",
1000
+ confidence=min(conf, 0.8),
1001
+ severity="high" if conf >= 0.7 else "medium",
1002
+ )
1003
+ )
1004
+ return issues
1005
+
1006
+
1007
+ def _rule_vla_usage(lines: Sequence[str], relpath: str) -> List[Issue]:
1008
+ """
1009
+ 检测可变长度数组(VLA)使用:声明中使用变量/表达式作为数组长度。
1010
+ 仅在长度非纯数字常量时提示。
1011
+ """
1012
+ issues: List[Issue] = []
1013
+ type_prefix = re.compile(r"\b(typedef|struct|union|enum|class|const|volatile|static|register|signed|unsigned|char|int|long|short|void|float|double|size_t|ssize_t)\b")
1014
+ for idx, s in enumerate(lines, start=1):
1015
+ if ";" not in s or "=" in s:
1016
+ continue
1017
+ if not type_prefix.search(s):
1018
+ continue
1019
+ m = RE_VLA_DECL.search(s)
1020
+ if not m:
1021
+ continue
1022
+ length_expr = m.group(1).strip()
1023
+ if re.fullmatch(r"\d+\s*", length_expr):
1024
+ continue
1025
+ issues.append(
1026
+ Issue(
1027
+ language="c/cpp",
1028
+ category="memory_mgmt",
1029
+ pattern="vla_usage",
1030
+ file=relpath,
1031
+ line=idx,
1032
+ evidence=_strip_line(s),
1033
+ description="检测到可变长度数组(VLA),在栈上进行不定大小分配,可能导致栈溢出/不可控内存使用。",
1034
+ suggestion="避免 VLA;改用堆分配并进行上界校验,或使用固定上界的静态分配。",
1035
+ confidence=0.6,
1036
+ severity="medium",
1037
+ )
1038
+ )
1039
+ return issues
1040
+
1041
+
1042
+ def _rule_pthread_returns_unchecked(lines: Sequence[str], relpath: str) -> List[Issue]:
1043
+ """
1044
+ 检测 pthread 常见接口的返回值未检查的情形(同/后一两行缺少 if/比较判断)。
1045
+ """
1046
+ issues: List[Issue] = []
1047
+ for idx, s in enumerate(lines, start=1):
1048
+ if not RE_PTHREAD_RET.search(s):
1049
+ continue
1050
+ nearby = " ".join(_safe_line(lines, i) for i in range(idx, min(idx + 2, len(lines)) + 1))
1051
+ if not re.search(r"\bif\s*\(|>=|<=|==|!=|<|>", nearby):
1052
+ issues.append(
1053
+ Issue(
1054
+ language="c/cpp",
1055
+ category="error_handling",
1056
+ pattern="pthread_ret_unchecked",
1057
+ file=relpath,
1058
+ line=idx,
1059
+ evidence=_strip_line(s),
1060
+ description="pthread 接口返回值可能未检查,错误处理缺失可能导致死锁/资源泄漏。",
1061
+ suggestion="检查 pthread 接口返回码并进行错误路径处理;必要时记录日志与清理资源。",
1062
+ confidence=0.6,
1063
+ severity="medium",
1064
+ )
1065
+ )
1066
+ return issues
1067
+
1068
+
1069
+ def _rule_cond_wait_no_loop(lines: Sequence[str], relpath: str) -> List[Issue]:
1070
+ """
1071
+ 检测 pthread_cond_wait 未在 while 循环中使用(防止虚假唤醒)。
1072
+ """
1073
+ issues: List[Issue] = []
1074
+ for idx, s in enumerate(lines, start=1):
1075
+ if not RE_PTHREAD_COND_WAIT.search(s):
1076
+ continue
1077
+ # 回看 2 行内是否有 while( ... )
1078
+ prev_text = " ".join(_safe_line(lines, j) for j in range(max(1, idx - 2), idx))
1079
+ if not re.search(r"\bwhile\s*\(", prev_text):
1080
+ issues.append(
1081
+ Issue(
1082
+ language="c/cpp",
1083
+ category="thread_safety",
1084
+ pattern="cond_wait_no_loop",
1085
+ file=relpath,
1086
+ line=idx,
1087
+ evidence=_strip_line(s),
1088
+ description="pthread_cond_wait 建议置于条件谓词的 while 循环中,以防止虚假唤醒。",
1089
+ suggestion="使用 while(predicate_not_satisfied) 包裹 pthread_cond_wait 调用并在唤醒后重新检查条件。",
1090
+ confidence=0.6,
1091
+ severity="medium",
1092
+ )
1093
+ )
1094
+ return issues
1095
+
1096
+
1097
+ def _rule_thread_leak_no_join(lines: Sequence[str], relpath: str) -> List[Issue]:
1098
+ """
1099
+ 检测创建线程后未 join/detach 的可能线程泄漏。
1100
+ """
1101
+ issues: List[Issue] = []
1102
+ for idx, s in enumerate(lines, start=1):
1103
+ m = RE_PTHREAD_CREATE.search(s)
1104
+ if not m:
1105
+ continue
1106
+ tid = m.group(1)
1107
+ end = min(len(lines), idx + 80)
1108
+ joined_or_detached = False
1109
+ for j in range(idx + 1, end + 1):
1110
+ sj = _safe_line(lines, j)
1111
+ if RE_PTHREAD_JOIN.search(sj) and RE_PTHREAD_JOIN.search(sj).group(1) == tid:
1112
+ joined_or_detached = True
1113
+ break
1114
+ if RE_PTHREAD_DETACH.search(sj) and RE_PTHREAD_DETACH.search(sj).group(1) == tid:
1115
+ joined_or_detached = True
1116
+ break
1117
+ if not joined_or_detached:
1118
+ issues.append(
1119
+ Issue(
1120
+ language="c/cpp",
1121
+ category="resource_leak",
1122
+ pattern="thread_leak_no_join",
1123
+ file=relpath,
1124
+ line=idx,
1125
+ evidence=_strip_line(s),
1126
+ description=f"pthread_create 创建线程 {tid} 后的邻近窗口内未检测到 join/detach,可能导致线程泄漏或资源占用。",
1127
+ suggestion="确保创建的线程被显式 join 或 detach;遵循统一的线程生命周期管理策略。",
1128
+ confidence=0.6,
1129
+ severity="medium",
1130
+ )
1131
+ )
1132
+ return issues
1133
+
1134
+
1135
+ def _rule_inet_legacy(lines: Sequence[str], relpath: str) -> List[Issue]:
1136
+ """
1137
+ 检测 inet_addr/inet_aton 等旧接口的使用。
1138
+ """
1139
+ issues: List[Issue] = []
1140
+ for idx, s in enumerate(lines, start=1):
1141
+ if RE_INET_LEGACY.search(s):
1142
+ issues.append(
1143
+ Issue(
1144
+ language="c/cpp",
1145
+ category="network_api",
1146
+ pattern="inet_legacy",
1147
+ file=relpath,
1148
+ line=idx,
1149
+ evidence=_strip_line(s),
1150
+ description="使用 inet_addr/inet_aton 等旧接口,错误语义模糊/不一致。",
1151
+ suggestion="使用 inet_pton/inet_ntop 进行地址转换,错误处理更可靠且支持 IPv6。",
1152
+ confidence=0.6,
1153
+ severity="low",
1154
+ )
1155
+ )
1156
+ return issues
1157
+
1158
+
1159
+ def _rule_time_apis_not_threadsafe(lines: Sequence[str], relpath: str) -> List[Issue]:
1160
+ """
1161
+ 检测 asctime/ctime/localtime/gmtime 非线程安全接口(非 *_r)。
1162
+ """
1163
+ issues: List[Issue] = []
1164
+ for idx, s in enumerate(lines, start=1):
1165
+ # 排除 *_r 版本
1166
+ if RE_TIME_UNSAFE.search(s) and not re.search(r"_r\s*\(", s):
1167
+ issues.append(
1168
+ Issue(
1169
+ language="c/cpp",
1170
+ category="thread_safety",
1171
+ pattern="time_api_not_threadsafe",
1172
+ file=relpath,
1173
+ line=idx,
1174
+ evidence=_strip_line(s),
1175
+ description="使用 asctime/ctime/localtime/gmtime 等非重入接口,线程安全性不足。",
1176
+ suggestion="改用 *_r 线程安全版本(如 localtime_r/gmtime_r/ctime_r)。",
1177
+ confidence=0.6,
1178
+ severity="medium",
1179
+ )
1180
+ )
1181
+ return issues
1182
+
1183
+
1184
+ def _rule_getenv_unchecked(lines: Sequence[str], relpath: str) -> List[Issue]:
1185
+ """
1186
+ 检测 getenv 使用(环境变量未校验可能导致配置/路径/命令注入风险)。
1187
+ """
1188
+ issues: List[Issue] = []
1189
+ for idx, s in enumerate(lines, start=1):
1190
+ if RE_GETENV.search(s):
1191
+ issues.append(
1192
+ Issue(
1193
+ language="c/cpp",
1194
+ category="input_validation",
1195
+ pattern="getenv_unchecked",
1196
+ file=relpath,
1197
+ line=idx,
1198
+ evidence=_strip_line(s),
1199
+ description="读取环境变量后未见显式校验,可能被用于构造路径/命令等引入安全风险。",
1200
+ suggestion="对白名单键进行读取;对取值执行格式/长度/字符集校验;避免直接拼接为命令/路径。",
1201
+ confidence=0.55,
1202
+ severity="medium",
1203
+ )
1204
+ )
1205
+ return issues
1206
+
1207
+
1208
+ def analyze_c_cpp_text(relpath: str, text: str) -> List[Issue]:
1209
+ """
1210
+ 基于提供的文本进行 C/C++ 启发式分析。
1211
+ """
1212
+ lines = text.splitlines()
1213
+ issues: List[Issue] = []
1214
+ issues.extend(_rule_unsafe_api(lines, relpath))
1215
+ issues.extend(_rule_boundary_funcs(lines, relpath))
1216
+ issues.extend(_rule_realloc_assign_back(lines, relpath))
1217
+ issues.extend(_rule_malloc_no_null_check(lines, relpath))
1218
+ issues.extend(_rule_uaf_suspect(lines, relpath))
1219
+ issues.extend(_rule_unchecked_io(lines, relpath))
1220
+ issues.extend(_rule_strncpy_no_nullterm(lines, relpath))
1221
+ # 新增规则
1222
+ issues.extend(_rule_format_string(lines, relpath))
1223
+ issues.extend(_rule_insecure_tmpfile(lines, relpath))
1224
+ issues.extend(_rule_command_execution(lines, relpath))
1225
+ issues.extend(_rule_scanf_no_width(lines, relpath))
1226
+ issues.extend(_rule_alloc_size_overflow(lines, relpath))
1227
+ # 新增:其他危险用法(低误报优先)
1228
+ issues.extend(_rule_double_free_and_free_non_heap(lines, relpath))
1229
+ issues.extend(_rule_atoi_family(lines, relpath))
1230
+ issues.extend(_rule_rand_insecure(lines, relpath))
1231
+ issues.extend(_rule_strtok_nonreentrant(lines, relpath))
1232
+ issues.extend(_rule_open_permissive_perms(lines, relpath))
1233
+ # 更多危险用法(第二批)
1234
+ issues.extend(_rule_alloca_unbounded(lines, relpath))
1235
+ issues.extend(_rule_vla_usage(lines, relpath))
1236
+ issues.extend(_rule_pthread_returns_unchecked(lines, relpath))
1237
+ issues.extend(_rule_cond_wait_no_loop(lines, relpath))
1238
+ issues.extend(_rule_thread_leak_no_join(lines, relpath))
1239
+ issues.extend(_rule_inet_legacy(lines, relpath))
1240
+ issues.extend(_rule_time_apis_not_threadsafe(lines, relpath))
1241
+ issues.extend(_rule_getenv_unchecked(lines, relpath))
1242
+ # 新增:空指针/野指针/死锁检测
1243
+ issues.extend(_rule_possible_null_deref(lines, relpath))
1244
+ issues.extend(_rule_uninitialized_ptr_use(lines, relpath))
1245
+ issues.extend(_rule_deadlock_patterns(lines, relpath))
1246
+ return issues
1247
+
1248
+
1249
+ def analyze_c_cpp_file(base: Path, relpath: Path) -> List[Issue]:
1250
+ """
1251
+ 从磁盘读取文件进行分析。
1252
+ """
1253
+ try:
1254
+ text = (base / relpath).read_text(errors="ignore")
1255
+ except Exception:
1256
+ return []
1257
+ return analyze_c_cpp_text(str(relpath), text)
1258
+
1259
+
1260
+ def analyze_files(base_path: str, files: Iterable[str]) -> List[Issue]:
1261
+ """
1262
+ 批量分析文件,相对路径相对于 base_path。
1263
+ """
1264
+ base = Path(base_path).resolve()
1265
+ out: List[Issue] = []
1266
+ for f in files:
1267
+ rel = Path(f)
1268
+ out.extend(analyze_c_cpp_file(base, rel))
1269
+ return out