jarvis-ai-assistant 0.1.222__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_agent/__init__.py +1143 -245
- jarvis/jarvis_agent/agent_manager.py +97 -0
- jarvis/jarvis_agent/builtin_input_handler.py +12 -10
- jarvis/jarvis_agent/config_editor.py +57 -0
- jarvis/jarvis_agent/edit_file_handler.py +392 -99
- jarvis/jarvis_agent/event_bus.py +48 -0
- jarvis/jarvis_agent/events.py +157 -0
- jarvis/jarvis_agent/file_context_handler.py +79 -0
- jarvis/jarvis_agent/file_methodology_manager.py +117 -0
- jarvis/jarvis_agent/jarvis.py +1117 -147
- jarvis/jarvis_agent/main.py +78 -34
- jarvis/jarvis_agent/memory_manager.py +195 -0
- jarvis/jarvis_agent/methodology_share_manager.py +174 -0
- jarvis/jarvis_agent/prompt_manager.py +82 -0
- jarvis/jarvis_agent/prompts.py +46 -9
- jarvis/jarvis_agent/protocols.py +4 -1
- jarvis/jarvis_agent/rewrite_file_handler.py +141 -0
- jarvis/jarvis_agent/run_loop.py +146 -0
- jarvis/jarvis_agent/session_manager.py +9 -9
- jarvis/jarvis_agent/share_manager.py +228 -0
- jarvis/jarvis_agent/shell_input_handler.py +23 -3
- jarvis/jarvis_agent/stdio_redirect.py +295 -0
- jarvis/jarvis_agent/task_analyzer.py +212 -0
- jarvis/jarvis_agent/task_manager.py +154 -0
- jarvis/jarvis_agent/task_planner.py +496 -0
- jarvis/jarvis_agent/tool_executor.py +8 -4
- jarvis/jarvis_agent/tool_share_manager.py +139 -0
- jarvis/jarvis_agent/user_interaction.py +42 -0
- jarvis/jarvis_agent/utils.py +54 -0
- jarvis/jarvis_agent/web_bridge.py +189 -0
- jarvis/jarvis_agent/web_output_sink.py +53 -0
- jarvis/jarvis_agent/web_server.py +751 -0
- jarvis/jarvis_c2rust/__init__.py +26 -0
- jarvis/jarvis_c2rust/cli.py +613 -0
- jarvis/jarvis_c2rust/collector.py +258 -0
- jarvis/jarvis_c2rust/library_replacer.py +1122 -0
- jarvis/jarvis_c2rust/llm_module_agent.py +1300 -0
- jarvis/jarvis_c2rust/optimizer.py +960 -0
- jarvis/jarvis_c2rust/scanner.py +1681 -0
- jarvis/jarvis_c2rust/transpiler.py +2325 -0
- jarvis/jarvis_code_agent/build_validation_config.py +133 -0
- jarvis/jarvis_code_agent/code_agent.py +1605 -178
- jarvis/jarvis_code_agent/code_analyzer/__init__.py +62 -0
- jarvis/jarvis_code_agent/code_analyzer/base_language.py +74 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +44 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +102 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +59 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +125 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +69 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +38 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +44 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +38 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +50 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +93 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +129 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +54 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +154 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator.py +43 -0
- jarvis/jarvis_code_agent/code_analyzer/context_manager.py +363 -0
- jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +18 -0
- jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +132 -0
- jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +330 -0
- jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +781 -0
- jarvis/jarvis_code_agent/code_analyzer/language_registry.py +185 -0
- jarvis/jarvis_code_agent/code_analyzer/language_support.py +89 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +31 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +231 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +183 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +219 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +209 -0
- jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +451 -0
- jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +77 -0
- jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +48 -0
- jarvis/jarvis_code_agent/lint.py +275 -13
- jarvis/jarvis_code_agent/utils.py +142 -0
- jarvis/jarvis_code_analysis/checklists/loader.py +20 -6
- jarvis/jarvis_code_analysis/code_review.py +583 -548
- jarvis/jarvis_data/config_schema.json +339 -28
- jarvis/jarvis_git_squash/main.py +22 -13
- jarvis/jarvis_git_utils/git_commiter.py +171 -55
- jarvis/jarvis_mcp/sse_mcp_client.py +22 -15
- jarvis/jarvis_mcp/stdio_mcp_client.py +4 -4
- jarvis/jarvis_mcp/streamable_mcp_client.py +36 -16
- jarvis/jarvis_memory_organizer/memory_organizer.py +753 -0
- jarvis/jarvis_methodology/main.py +48 -63
- jarvis/jarvis_multi_agent/__init__.py +302 -43
- jarvis/jarvis_multi_agent/main.py +70 -24
- jarvis/jarvis_platform/ai8.py +40 -23
- jarvis/jarvis_platform/base.py +210 -49
- jarvis/jarvis_platform/human.py +11 -1
- jarvis/jarvis_platform/kimi.py +82 -76
- jarvis/jarvis_platform/openai.py +73 -1
- jarvis/jarvis_platform/registry.py +8 -15
- jarvis/jarvis_platform/tongyi.py +115 -101
- jarvis/jarvis_platform/yuanbao.py +89 -63
- jarvis/jarvis_platform_manager/main.py +194 -132
- jarvis/jarvis_platform_manager/service.py +122 -86
- jarvis/jarvis_rag/cli.py +156 -53
- jarvis/jarvis_rag/embedding_manager.py +155 -12
- jarvis/jarvis_rag/llm_interface.py +10 -13
- jarvis/jarvis_rag/query_rewriter.py +63 -12
- jarvis/jarvis_rag/rag_pipeline.py +222 -40
- jarvis/jarvis_rag/reranker.py +26 -3
- jarvis/jarvis_rag/retriever.py +270 -14
- jarvis/jarvis_sec/__init__.py +3605 -0
- jarvis/jarvis_sec/checkers/__init__.py +32 -0
- jarvis/jarvis_sec/checkers/c_checker.py +2680 -0
- jarvis/jarvis_sec/checkers/rust_checker.py +1108 -0
- jarvis/jarvis_sec/cli.py +116 -0
- jarvis/jarvis_sec/report.py +257 -0
- jarvis/jarvis_sec/status.py +264 -0
- jarvis/jarvis_sec/types.py +20 -0
- jarvis/jarvis_sec/workflow.py +219 -0
- jarvis/jarvis_smart_shell/main.py +405 -137
- jarvis/jarvis_stats/__init__.py +13 -0
- jarvis/jarvis_stats/cli.py +387 -0
- jarvis/jarvis_stats/stats.py +711 -0
- jarvis/jarvis_stats/storage.py +612 -0
- jarvis/jarvis_stats/visualizer.py +282 -0
- jarvis/jarvis_tools/ask_user.py +1 -0
- jarvis/jarvis_tools/base.py +18 -2
- jarvis/jarvis_tools/clear_memory.py +239 -0
- jarvis/jarvis_tools/cli/main.py +220 -144
- jarvis/jarvis_tools/execute_script.py +52 -12
- jarvis/jarvis_tools/file_analyzer.py +17 -12
- jarvis/jarvis_tools/generate_new_tool.py +46 -24
- jarvis/jarvis_tools/read_code.py +277 -18
- jarvis/jarvis_tools/read_symbols.py +141 -0
- jarvis/jarvis_tools/read_webpage.py +86 -13
- jarvis/jarvis_tools/registry.py +294 -90
- jarvis/jarvis_tools/retrieve_memory.py +227 -0
- jarvis/jarvis_tools/save_memory.py +194 -0
- jarvis/jarvis_tools/search_web.py +62 -28
- jarvis/jarvis_tools/sub_agent.py +205 -0
- jarvis/jarvis_tools/sub_code_agent.py +217 -0
- jarvis/jarvis_tools/virtual_tty.py +330 -62
- jarvis/jarvis_utils/builtin_replace_map.py +4 -5
- jarvis/jarvis_utils/clipboard.py +90 -0
- jarvis/jarvis_utils/config.py +607 -50
- jarvis/jarvis_utils/embedding.py +3 -0
- jarvis/jarvis_utils/fzf.py +57 -0
- jarvis/jarvis_utils/git_utils.py +251 -29
- jarvis/jarvis_utils/globals.py +174 -17
- jarvis/jarvis_utils/http.py +58 -79
- jarvis/jarvis_utils/input.py +899 -153
- jarvis/jarvis_utils/methodology.py +210 -83
- jarvis/jarvis_utils/output.py +220 -137
- jarvis/jarvis_utils/utils.py +1906 -135
- jarvis_ai_assistant-0.7.0.dist-info/METADATA +465 -0
- jarvis_ai_assistant-0.7.0.dist-info/RECORD +192 -0
- {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/entry_points.txt +8 -2
- jarvis/jarvis_git_details/main.py +0 -265
- jarvis/jarvis_platform/oyi.py +0 -357
- jarvis/jarvis_tools/edit_file.py +0 -255
- jarvis/jarvis_tools/rewrite_file.py +0 -195
- jarvis_ai_assistant-0.1.222.dist-info/METADATA +0 -767
- jarvis_ai_assistant-0.1.222.dist-info/RECORD +0 -110
- /jarvis/{jarvis_git_details → jarvis_memory_organizer}/__init__.py +0 -0
- {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/licenses/LICENSE +0 -0
- {jarvis_ai_assistant-0.1.222.dist-info → jarvis_ai_assistant-0.7.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,2680 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Jarvis 安全分析套件 —— C/C++ 启发式安全检查器
|
|
4
|
+
|
|
5
|
+
目标与范围:
|
|
6
|
+
- 聚焦内存管理、缓冲区操作、错误处理三类基础安全问题,提供可解释的启发式检测与置信度评估。
|
|
7
|
+
- 面向 C/C++ 与头文件(.c/.cpp/.h/.hpp)。
|
|
8
|
+
|
|
9
|
+
输出约定:
|
|
10
|
+
- 返回 jarvis.jarvis_sec.workflow.Issue 列表(保持结构化,便于聚合评分与报告生成)。
|
|
11
|
+
- 置信度区间 [0,1],基于命中规则与上下文线索加权计算;严重性(severity)分为 high/medium/low。
|
|
12
|
+
|
|
13
|
+
使用方式(示例):
|
|
14
|
+
- from jarvis.jarvis_sec.checkers.c_checker import analyze_files
|
|
15
|
+
- issues = analyze_files("./repo", ["src/a.c", "include/a.h"])
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import re
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Iterable, List, Optional, Sequence, Tuple
|
|
23
|
+
|
|
24
|
+
from jarvis.jarvis_sec.types import Issue
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# ---------------------------
|
|
28
|
+
# 规则库(正则表达式)
|
|
29
|
+
# ---------------------------
|
|
30
|
+
|
|
31
|
+
RE_UNSAFE_API = re.compile(
|
|
32
|
+
r"\b(strcpy|strcat|gets|sprintf|vsprintf)\s*\(",
|
|
33
|
+
re.IGNORECASE,
|
|
34
|
+
)
|
|
35
|
+
RE_BOUNDARY_FUNCS = re.compile(
|
|
36
|
+
r"\b(memcpy|memmove|strncpy|strncat)\s*\(",
|
|
37
|
+
re.IGNORECASE,
|
|
38
|
+
)
|
|
39
|
+
RE_MEM_MGMT = re.compile(
|
|
40
|
+
r"\b(malloc|calloc|realloc|free|new\s+|delete\b)",
|
|
41
|
+
re.IGNORECASE,
|
|
42
|
+
)
|
|
43
|
+
RE_IO_API = re.compile(
|
|
44
|
+
r"\b(fopen|fclose|fread|fwrite|read|write|open|close)\s*\(",
|
|
45
|
+
re.IGNORECASE,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# 新增:格式化字符串/危险临时文件/命令执行等风险 API 模式
|
|
49
|
+
RE_PRINTF_LIKE = re.compile(r"\b(printf|sprintf|snprintf|vsprintf|vsnprintf)\s*\(", re.IGNORECASE)
|
|
50
|
+
RE_FPRINTF = re.compile(r"\bfprintf\s*\(", re.IGNORECASE)
|
|
51
|
+
RE_INSECURE_TMP = re.compile(r"\b(tmpnam|tempnam|mktemp)\s*\(", re.IGNORECASE)
|
|
52
|
+
RE_SYSTEM_LIKE = re.compile(r"\b(system|popen)\s*\(", re.IGNORECASE)
|
|
53
|
+
RE_EXEC_LIKE = re.compile(r"\b(execvp|execlp|execvpe|execl|execve|execv)\s*\(", re.IGNORECASE)
|
|
54
|
+
RE_SCANF_CALL = re.compile(r'\b(?:[fs]?scanf)\s*\(\s*"([^"]*)"', re.IGNORECASE)
|
|
55
|
+
# 线程/锁相关
|
|
56
|
+
RE_PTHREAD_LOCK = re.compile(r"\bpthread_mutex_lock\s*\(\s*&\s*([A-Za-z_]\w*)\s*\)\s*;?", re.IGNORECASE)
|
|
57
|
+
RE_PTHREAD_UNLOCK = re.compile(r"\bpthread_mutex_unlock\s*\(\s*&\s*([A-Za-z_]\w*)\s*\)\s*;?", re.IGNORECASE)
|
|
58
|
+
# 其他危险用法相关
|
|
59
|
+
RE_ATOI_FAMILY = re.compile(r"\b(atoi|atol|atoll|atof)\s*\(", re.IGNORECASE)
|
|
60
|
+
RE_RAND = re.compile(r"\b(rand|srand)\s*\(", re.IGNORECASE)
|
|
61
|
+
RE_STRTOK = re.compile(r"\bstrtok\s*\(", re.IGNORECASE)
|
|
62
|
+
RE_OPEN_PERMISSIVE = re.compile(r"\bopen\s*\(\s*[^,]+,\s*[^,]*O_CREAT[^,]*,\s*(0[0-7]{3,4})\s*\)", re.IGNORECASE)
|
|
63
|
+
RE_FOPEN_MODE = re.compile(r'\bfopen\s*\(\s*[^,]+,\s*"([^"]+)"\s*\)', re.IGNORECASE)
|
|
64
|
+
RE_GENERIC_ASSIGN = re.compile(r"\b([A-Za-z_]\w*)\s*=\s*")
|
|
65
|
+
RE_FREE_CALL_ANY = re.compile(r"\bfree\s*\(\s*([^)]+?)\s*\)", re.IGNORECASE)
|
|
66
|
+
# 扩展:更多危险用法相关
|
|
67
|
+
RE_ALLOCA = re.compile(r"\balloca\s*\(\s*([^)]+)\s*\)", re.IGNORECASE)
|
|
68
|
+
RE_VLA_DECL = re.compile(
|
|
69
|
+
r"\b(?:const\s+|volatile\s+|static\s+|register\s+|unsigned\s+|signed\s+)?[A-Za-z_]\w*(?:\s+\*|\s+)+[A-Za-z_]\w*\s*\[\s*([^\]]+)\s*\]\s*;",
|
|
70
|
+
re.IGNORECASE,
|
|
71
|
+
)
|
|
72
|
+
RE_PTHREAD_RET = re.compile(
|
|
73
|
+
r"\b(pthread_(?:mutex_(?:lock|trylock|timedlock)|create|cond_(?:wait|timedwait)|join|detach))\s*\(",
|
|
74
|
+
re.IGNORECASE,
|
|
75
|
+
)
|
|
76
|
+
RE_PTHREAD_COND_WAIT = re.compile(r"\bpthread_cond_(?:timed)?wait\s*\(", re.IGNORECASE)
|
|
77
|
+
RE_PTHREAD_CREATE = re.compile(r"\bpthread_create\s*\(\s*&\s*([A-Za-z_]\w*)\s*,", re.IGNORECASE)
|
|
78
|
+
RE_PTHREAD_JOIN = re.compile(r"\bpthread_join\s*\(\s*([A-Za-z_]\w*)\s*,", re.IGNORECASE)
|
|
79
|
+
RE_PTHREAD_DETACH = re.compile(r"\bpthread_detach\s*\(\s*([A-Za-z_]\w*)\s*\)", re.IGNORECASE)
|
|
80
|
+
# C++ 标准库锁相关
|
|
81
|
+
RE_STD_MUTEX = re.compile(r"\b(?:std::)?mutex\s+([A-Za-z_]\w*)", re.IGNORECASE)
|
|
82
|
+
RE_MUTEX_LOCK = re.compile(r"\b([A-Za-z_]\w*)\s*\.lock\s*\(", re.IGNORECASE)
|
|
83
|
+
RE_MUTEX_UNLOCK = re.compile(r"\b([A-Za-z_]\w*)\s*\.unlock\s*\(", re.IGNORECASE)
|
|
84
|
+
RE_MUTEX_TRY_LOCK = re.compile(r"\b([A-Za-z_]\w*)\s*\.try_lock\s*\(", re.IGNORECASE)
|
|
85
|
+
RE_LOCK_GUARD = re.compile(r"\b(?:std::)?lock_guard\s*<[^>]+>\s*([A-Za-z_]\w*)", re.IGNORECASE)
|
|
86
|
+
RE_UNIQUE_LOCK = re.compile(r"\b(?:std::)?unique_lock\s*<[^>]+>\s*([A-Za-z_]\w*)", re.IGNORECASE)
|
|
87
|
+
RE_SHARED_LOCK = re.compile(r"\b(?:std::)?shared_lock\s*<[^>]+>\s*([A-Za-z_]\w*)", re.IGNORECASE)
|
|
88
|
+
RE_STD_LOCK = re.compile(r"\bstd::lock\s*\(", re.IGNORECASE)
|
|
89
|
+
RE_SCOPED_LOCK = re.compile(r"\b(?:std::)?scoped_lock\s*<", re.IGNORECASE)
|
|
90
|
+
# 数据竞争检测相关
|
|
91
|
+
RE_STATIC_VAR = re.compile(r"\bstatic\s+(?:const\s+|volatile\s+)?[A-Za-z_]\w*(?:\s+\*|\s+)+([A-Za-z_]\w*)", re.IGNORECASE)
|
|
92
|
+
RE_EXTERN_VAR = re.compile(r"\bextern\s+[A-Za-z_]\w*(?:\s+\*|\s+)+([A-Za-z_]\w*)", re.IGNORECASE)
|
|
93
|
+
RE_STD_THREAD = re.compile(r"\b(?:std::)?thread\s+([A-Za-z_]\w*)", re.IGNORECASE)
|
|
94
|
+
RE_ATOMIC = re.compile(r"\b(?:std::)?atomic\s*<[^>]+>\s*([A-Za-z_]\w*)", re.IGNORECASE)
|
|
95
|
+
RE_VOLATILE = re.compile(r"\bvolatile\s+[A-Za-z_]\w*(?:\s+\*|\s+)+([A-Za-z_]\w*)", re.IGNORECASE)
|
|
96
|
+
RE_VAR_ACCESS = re.compile(r"\b([A-Za-z_]\w*)\s*(?:=|\[|->|\.)", re.IGNORECASE)
|
|
97
|
+
RE_VAR_ASSIGN = re.compile(r"\b([A-Za-z_]\w*)\s*=", re.IGNORECASE)
|
|
98
|
+
RE_INET_LEGACY = re.compile(r"\b(inet_addr|inet_aton)\s*\(", re.IGNORECASE)
|
|
99
|
+
RE_TIME_UNSAFE = re.compile(r"\b(asctime|ctime|localtime|gmtime)\s*\(", re.IGNORECASE)
|
|
100
|
+
RE_GETENV = re.compile(r'\bgetenv\s*\(\s*"[^"]*"\s*\)', re.IGNORECASE)
|
|
101
|
+
|
|
102
|
+
# 辅助正则
|
|
103
|
+
RE_REALLOC_ASSIGN_BACK = re.compile(
|
|
104
|
+
r"\b([A-Za-z_]\w*)\s*=\s*realloc\s*\(\s*\1\s*,", re.IGNORECASE
|
|
105
|
+
)
|
|
106
|
+
RE_MALLOC_ASSIGN = re.compile(
|
|
107
|
+
r"\b([A-Za-z_]\w*)\s*=\s*malloc\s*\(", re.IGNORECASE
|
|
108
|
+
)
|
|
109
|
+
RE_CALLOC_ASSIGN = re.compile(
|
|
110
|
+
r"\b([A-Za-z_]\w*)\s*=\s*calloc\s*\(", re.IGNORECASE
|
|
111
|
+
)
|
|
112
|
+
RE_NEW_ASSIGN = re.compile(
|
|
113
|
+
r"\b([A-Za-z_]\w*)\s*=\s*new\b", re.IGNORECASE
|
|
114
|
+
)
|
|
115
|
+
RE_DEREF = re.compile(
|
|
116
|
+
r"(\*|->)\s*[A-Za-z_]\w*|\b[A-Za-z_]\w*\s*\[", re.IGNORECASE
|
|
117
|
+
)
|
|
118
|
+
RE_NULL_CHECK = re.compile(
|
|
119
|
+
r"\bif\s*\(\s*(!\s*)?[A-Za-z_]\w*\s*(==|!=)\s*NULL\s*\)|\bif\s*\(\s*[A-Za-z_]\w*\s*\)", re.IGNORECASE
|
|
120
|
+
)
|
|
121
|
+
RE_FREE_VAR = re.compile(r"free\s*\(\s*([A-Za-z_]\w*)\s*\)\s*;", re.IGNORECASE)
|
|
122
|
+
RE_USE_VAR = re.compile(r"\b([A-Za-z_]\w*)\b")
|
|
123
|
+
RE_STRLEN_IN_SIZE = re.compile(r"\bstrlen\s*\(", re.IGNORECASE)
|
|
124
|
+
RE_SIZEOF_PTR = re.compile(r"\bsizeof\s*\(\s*\*\s*[A-Za-z_]\w*\s*\)", re.IGNORECASE)
|
|
125
|
+
RE_STRNCPY = re.compile(r"\bstrncpy\s*\(", re.IGNORECASE)
|
|
126
|
+
RE_STRNCAT = re.compile(r"\bstrncat\s*\(", re.IGNORECASE)
|
|
127
|
+
|
|
128
|
+
# C++ 特定模式
|
|
129
|
+
RE_SHARED_PTR = re.compile(r"\b(?:std::)?shared_ptr\s*<", re.IGNORECASE)
|
|
130
|
+
RE_UNIQUE_PTR = re.compile(r"\b(?:std::)?unique_ptr\s*<", re.IGNORECASE)
|
|
131
|
+
RE_WEAK_PTR = re.compile(r"\b(?:std::)?weak_ptr\s*<", re.IGNORECASE)
|
|
132
|
+
RE_SMART_PTR_ASSIGN = re.compile(r"\b([A-Za-z_]\w*)\s*=\s*(?:std::)?(?:shared_ptr|unique_ptr|weak_ptr)\s*<", re.IGNORECASE)
|
|
133
|
+
RE_NEW_ARRAY = re.compile(r"\bnew\s+[A-Za-z_]\w*\s*\[", re.IGNORECASE)
|
|
134
|
+
RE_DELETE_ARRAY = re.compile(r"\bdelete\s*\[\s*\]", re.IGNORECASE)
|
|
135
|
+
RE_DELETE = re.compile(r"\bdelete\s+(?!\[)", re.IGNORECASE)
|
|
136
|
+
RE_STATIC_CAST = re.compile(r"\bstatic_cast\s*<", re.IGNORECASE)
|
|
137
|
+
RE_DYNAMIC_CAST = re.compile(r"\bdynamic_cast\s*<", re.IGNORECASE)
|
|
138
|
+
RE_REINTERPRET_CAST = re.compile(r"\breinterpret_cast\s*<", re.IGNORECASE)
|
|
139
|
+
RE_CONST_CAST = re.compile(r"\bconst_cast\s*<", re.IGNORECASE)
|
|
140
|
+
RE_VECTOR_ACCESS = re.compile(r"\b(?:std::)?vector\s*<[^>]+>\s*[A-Za-z_]\w*\s*\[", re.IGNORECASE)
|
|
141
|
+
RE_STRING_ACCESS = re.compile(r"\b(?:std::)?(?:string|wstring)\s*[A-Za-z_]\w*\s*\[", re.IGNORECASE)
|
|
142
|
+
RE_VECTOR_VAR = re.compile(r"\b(?:std::)?vector\s*<[^>]+>\s*([A-Za-z_]\w*)", re.IGNORECASE)
|
|
143
|
+
RE_STRING_VAR = re.compile(r"\b(?:std::)?(?:string|wstring)\s+([A-Za-z_]\w*)", re.IGNORECASE)
|
|
144
|
+
RE_AT_METHOD = re.compile(r"\.at\s*\(", re.IGNORECASE)
|
|
145
|
+
RE_VIRTUAL_DTOR = re.compile(r"\bvirtual\s+~[A-Za-z_]\w*\s*\(", re.IGNORECASE)
|
|
146
|
+
RE_CLASS_DECL = re.compile(r"\bclass\s+([A-Za-z_]\w*)", re.IGNORECASE)
|
|
147
|
+
RE_DTOR_DECL = re.compile(r"~\s*([A-Za-z_]\w*)\s*\(", re.IGNORECASE)
|
|
148
|
+
RE_MOVE = re.compile(r"\bstd::move\s*\(", re.IGNORECASE)
|
|
149
|
+
RE_MOVE_ASSIGN = re.compile(r"\b([A-Za-z_]\w*)\s*=\s*std::move\s*\(", re.IGNORECASE)
|
|
150
|
+
RE_THROW = re.compile(r"\bthrow\s+", re.IGNORECASE)
|
|
151
|
+
RE_TRY = re.compile(r"\btry\s*\{", re.IGNORECASE)
|
|
152
|
+
RE_CATCH = re.compile(r"\bcatch\s*\(", re.IGNORECASE)
|
|
153
|
+
RE_NOEXCEPT = re.compile(r"\bnoexcept\s*(?:\([^)]*\))?", re.IGNORECASE)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# ---------------------------
|
|
157
|
+
# 公共工具
|
|
158
|
+
# ---------------------------
|
|
159
|
+
|
|
160
|
+
def _safe_line(lines: Sequence[str], idx: int) -> str:
|
|
161
|
+
if 1 <= idx <= len(lines):
|
|
162
|
+
return lines[idx - 1]
|
|
163
|
+
return ""
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _strip_line(s: str, max_len: int = 200) -> str:
|
|
167
|
+
s = s.strip().replace("\t", " ")
|
|
168
|
+
return s if len(s) <= max_len else s[: max_len - 3] + "..."
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _window(lines: Sequence[str], center: int, before: int = 3, after: int = 3) -> List[Tuple[int, str]]:
|
|
172
|
+
start = max(1, center - before)
|
|
173
|
+
end = min(len(lines), center + after)
|
|
174
|
+
return [(i, _safe_line(lines, i)) for i in range(start, end + 1)]
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _remove_comments_preserve_strings(text: str) -> str:
|
|
178
|
+
"""
|
|
179
|
+
移除 C/C++ 源码中的注释(// 与 /* */),保留字符串与字符字面量内容;
|
|
180
|
+
为了保持行号与窗口定位稳定,注释内容会被空格替换并保留换行符。
|
|
181
|
+
说明:本函数为启发式实现,旨在降低“注释中的API命中”造成的误报。
|
|
182
|
+
"""
|
|
183
|
+
res: list[str] = []
|
|
184
|
+
i = 0
|
|
185
|
+
n = len(text)
|
|
186
|
+
in_sl_comment = False # //
|
|
187
|
+
in_bl_comment = False # /* */
|
|
188
|
+
in_string = False # "
|
|
189
|
+
in_char = False # '
|
|
190
|
+
escape = False
|
|
191
|
+
|
|
192
|
+
while i < n:
|
|
193
|
+
ch = text[i]
|
|
194
|
+
nxt = text[i + 1] if i + 1 < n else ""
|
|
195
|
+
|
|
196
|
+
if in_sl_comment:
|
|
197
|
+
# 单行注释直到换行结束
|
|
198
|
+
if ch == "\n":
|
|
199
|
+
in_sl_comment = False
|
|
200
|
+
res.append(ch)
|
|
201
|
+
else:
|
|
202
|
+
# 用空格占位,保持列数
|
|
203
|
+
res.append(" ")
|
|
204
|
+
i += 1
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
if in_bl_comment:
|
|
208
|
+
# 多行注释直到 */
|
|
209
|
+
if ch == "*" and nxt == "/":
|
|
210
|
+
in_bl_comment = False
|
|
211
|
+
res.append(" ")
|
|
212
|
+
res.append(" ")
|
|
213
|
+
i += 2
|
|
214
|
+
else:
|
|
215
|
+
# 注释体内保留换行,其余替换为空格
|
|
216
|
+
res.append("\n" if ch == "\n" else " ")
|
|
217
|
+
i += 1
|
|
218
|
+
continue
|
|
219
|
+
|
|
220
|
+
# 非注释态下,处理字符串与字符字面量
|
|
221
|
+
if in_string:
|
|
222
|
+
res.append(ch)
|
|
223
|
+
if escape:
|
|
224
|
+
escape = False
|
|
225
|
+
elif ch == "\\":
|
|
226
|
+
escape = True
|
|
227
|
+
elif ch == '"':
|
|
228
|
+
in_string = False
|
|
229
|
+
i += 1
|
|
230
|
+
continue
|
|
231
|
+
|
|
232
|
+
if in_char:
|
|
233
|
+
res.append(ch)
|
|
234
|
+
if escape:
|
|
235
|
+
escape = False
|
|
236
|
+
elif ch == "\\":
|
|
237
|
+
escape = True
|
|
238
|
+
elif ch == "'":
|
|
239
|
+
in_char = False
|
|
240
|
+
i += 1
|
|
241
|
+
continue
|
|
242
|
+
|
|
243
|
+
# 进入注释判定(需不在字符串/字符字面量中)
|
|
244
|
+
if ch == "/" and nxt == "/":
|
|
245
|
+
in_sl_comment = True
|
|
246
|
+
# 保留两个占位,避免拼接
|
|
247
|
+
res.append(" ")
|
|
248
|
+
res.append(" ")
|
|
249
|
+
i += 2
|
|
250
|
+
continue
|
|
251
|
+
if ch == "/" and nxt == "*":
|
|
252
|
+
in_bl_comment = True
|
|
253
|
+
res.append(" ")
|
|
254
|
+
res.append(" ")
|
|
255
|
+
i += 2
|
|
256
|
+
continue
|
|
257
|
+
|
|
258
|
+
# 进入字符串/字符字面量
|
|
259
|
+
if ch == '"':
|
|
260
|
+
in_string = True
|
|
261
|
+
res.append(ch)
|
|
262
|
+
i += 1
|
|
263
|
+
continue
|
|
264
|
+
if ch == "'":
|
|
265
|
+
in_char = True
|
|
266
|
+
res.append(ch)
|
|
267
|
+
i += 1
|
|
268
|
+
continue
|
|
269
|
+
|
|
270
|
+
# 普通字符
|
|
271
|
+
res.append(ch)
|
|
272
|
+
i += 1
|
|
273
|
+
|
|
274
|
+
return "".join(res)
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _mask_strings_preserve_len(text: str) -> str:
|
|
278
|
+
"""
|
|
279
|
+
将字符串与字符字面量内部内容替换为空格,保留引号与换行,保持长度与行号不变。
|
|
280
|
+
用于在扫描通用 API 模式时避免误将字符串中的片段(如 "system(")当作代码。
|
|
281
|
+
注意:此函数不移除注释,请在已移除注释的文本上调用。
|
|
282
|
+
"""
|
|
283
|
+
res: list[str] = []
|
|
284
|
+
in_string = False
|
|
285
|
+
in_char = False
|
|
286
|
+
escape = False
|
|
287
|
+
for ch in text:
|
|
288
|
+
if in_string:
|
|
289
|
+
if escape:
|
|
290
|
+
# 保留转义反斜杠为两字符(反斜杠+空格),以不破坏列对齐过多
|
|
291
|
+
res.append(" ")
|
|
292
|
+
escape = False
|
|
293
|
+
elif ch == "\\":
|
|
294
|
+
res.append("\\")
|
|
295
|
+
escape = True
|
|
296
|
+
elif ch == '"':
|
|
297
|
+
res.append('"')
|
|
298
|
+
in_string = False
|
|
299
|
+
elif ch == "\n":
|
|
300
|
+
res.append("\n")
|
|
301
|
+
else:
|
|
302
|
+
res.append(" ")
|
|
303
|
+
continue
|
|
304
|
+
if in_char:
|
|
305
|
+
if escape:
|
|
306
|
+
res.append(" ")
|
|
307
|
+
escape = False
|
|
308
|
+
elif ch == "\\":
|
|
309
|
+
res.append("\\")
|
|
310
|
+
escape = True
|
|
311
|
+
elif ch == "'":
|
|
312
|
+
res.append("'")
|
|
313
|
+
in_char = False
|
|
314
|
+
elif ch == "\n":
|
|
315
|
+
res.append("\n")
|
|
316
|
+
else:
|
|
317
|
+
res.append(" ")
|
|
318
|
+
continue
|
|
319
|
+
if ch == '"':
|
|
320
|
+
in_string = True
|
|
321
|
+
res.append('"')
|
|
322
|
+
continue
|
|
323
|
+
if ch == "'":
|
|
324
|
+
in_char = True
|
|
325
|
+
res.append("'")
|
|
326
|
+
continue
|
|
327
|
+
res.append(ch)
|
|
328
|
+
return "".join(res)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _strip_if0_blocks(text: str) -> str:
|
|
332
|
+
"""
|
|
333
|
+
预处理常见的 #if 0 … #else … #endif 结构:
|
|
334
|
+
- 跳过 #if 0 的主体;若存在 #else,则保留 #else 分支
|
|
335
|
+
- 保留行数与换行,确保行号稳定
|
|
336
|
+
限制:仅识别常量 0 的条件,不对复杂表达式求值;#elif 未处理
|
|
337
|
+
"""
|
|
338
|
+
lines = text.splitlines(keepends=True)
|
|
339
|
+
out: list[str] = []
|
|
340
|
+
stack: list[dict] = [] # 每帧:{"kind": "if0"|"if", "skipping": bool, "in_else": bool}
|
|
341
|
+
|
|
342
|
+
def any_skipping() -> bool:
|
|
343
|
+
return any(frame.get("skipping", False) for frame in stack)
|
|
344
|
+
|
|
345
|
+
for line in lines:
|
|
346
|
+
if re.match(r"^\s*#\s*if\s+0\b", line):
|
|
347
|
+
# 进入 #if 0:主体跳过
|
|
348
|
+
stack.append({"kind": "if0", "skipping": True, "in_else": False})
|
|
349
|
+
out.append("\n" if line.endswith("\n") else "")
|
|
350
|
+
continue
|
|
351
|
+
if re.match(r"^\s*#\s*if\b", line):
|
|
352
|
+
# 其他 #if:不求值,仅记录,继承外层 skipping
|
|
353
|
+
stack.append({"kind": "if", "skipping": any_skipping(), "in_else": False})
|
|
354
|
+
out.append(line if not any_skipping() else ("\n" if line.endswith("\n") else ""))
|
|
355
|
+
continue
|
|
356
|
+
if re.match(r"^\s*#\s*else\b", line):
|
|
357
|
+
if stack:
|
|
358
|
+
top = stack[-1]
|
|
359
|
+
if top["kind"] == "if0":
|
|
360
|
+
# #if 0 的 else:翻转 skipping,使 else 分支有效
|
|
361
|
+
top["skipping"] = not top["skipping"]
|
|
362
|
+
top["in_else"] = True
|
|
363
|
+
out.append(line if not any_skipping() else ("\n" if line.endswith("\n") else ""))
|
|
364
|
+
continue
|
|
365
|
+
if re.match(r"^\s*#\s*endif\b", line):
|
|
366
|
+
if stack:
|
|
367
|
+
stack.pop()
|
|
368
|
+
out.append(line if not any_skipping() else ("\n" if line.endswith("\n") else ""))
|
|
369
|
+
continue
|
|
370
|
+
# 常规代码
|
|
371
|
+
if any_skipping():
|
|
372
|
+
out.append("\n" if line.endswith("\n") else "")
|
|
373
|
+
else:
|
|
374
|
+
out.append(line)
|
|
375
|
+
return "".join(out)
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def _has_null_check_around(var: str, lines: Sequence[str], line_no: int, radius: int = 5) -> bool:
|
|
379
|
+
"""
|
|
380
|
+
扩展空指针检查识别能力,减少误报:
|
|
381
|
+
- if (ptr) / if (!ptr)
|
|
382
|
+
- if (ptr == NULL/0) / if (NULL/0 == ptr)
|
|
383
|
+
- 断言/检查宏:assert(ptr)、assert(ptr != NULL)、BUG_ON(!ptr)、WARN_ON(!ptr)、CHECK/ENSURE 等
|
|
384
|
+
"""
|
|
385
|
+
for i, s in _window(lines, line_no, before=radius, after=radius):
|
|
386
|
+
# 直接真假判断
|
|
387
|
+
if re.search(rf"\bif\s*\(\s*{re.escape(var)}\s*\)", s):
|
|
388
|
+
return True
|
|
389
|
+
if re.search(rf"\bif\s*\(\s*!\s*{re.escape(var)}\s*\)", s):
|
|
390
|
+
return True
|
|
391
|
+
# 显式与 NULL/0 比较(任意顺序)
|
|
392
|
+
if re.search(rf"\bif\s*\(\s*{re.escape(var)}\s*(==|!=)\s*(NULL|0)\s*\)", s):
|
|
393
|
+
return True
|
|
394
|
+
if re.search(rf"\bif\s*\(\s*(NULL|0)\s*(==|!=)\s*{re.escape(var)}\s*\)", s):
|
|
395
|
+
return True
|
|
396
|
+
# 断言/检查宏(常见宏名):assert/BUG_ON/WARN_ON/CHECK/ENSURE
|
|
397
|
+
if re.search(
|
|
398
|
+
rf"\b(assert|BUG_ON|WARN_ON|CHECK|ENSURE)\s*\(\s*(!\s*)?{re.escape(var)}(\s*(==|!=)\s*(NULL|0))?\s*\)",
|
|
399
|
+
s,
|
|
400
|
+
):
|
|
401
|
+
return True
|
|
402
|
+
return False
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def _has_len_bound_around(lines: Sequence[str], line_no: int, radius: int = 3) -> bool:
|
|
406
|
+
for _, s in _window(lines, line_no, before=radius, after=radius):
|
|
407
|
+
# 检测是否出现长度上界/检查(非常粗略)
|
|
408
|
+
if any(k in s for k in ["sizeof(", "BUFFER_SIZE", "MAX_", "min(", "clamp(", "snprintf", "strlcpy", "strlcat"]):
|
|
409
|
+
return True
|
|
410
|
+
return False
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def _severity_from_confidence(conf: float, base: str) -> str:
|
|
414
|
+
# 基于基类目提供缺省严重度调整
|
|
415
|
+
if conf >= 0.8:
|
|
416
|
+
return "high"
|
|
417
|
+
if conf >= 0.6:
|
|
418
|
+
return "medium"
|
|
419
|
+
return "low"
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
# ---------------------------
|
|
423
|
+
# 具体验证规则
|
|
424
|
+
# ---------------------------
|
|
425
|
+
|
|
426
|
+
def _rule_unsafe_api(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
427
|
+
issues: List[Issue] = []
|
|
428
|
+
is_header = str(relpath).lower().endswith((".h", ".hpp"))
|
|
429
|
+
re_type_kw = re.compile(r"\b(static|inline|const|volatile|unsigned|signed|long|short|int|char|void|size_t|ssize_t)\b")
|
|
430
|
+
for idx, s in enumerate(lines, start=1):
|
|
431
|
+
# 跳过预处理行与声明行,减少原型/宏中的误报
|
|
432
|
+
t = s.lstrip()
|
|
433
|
+
if t.startswith("#") or re.search(r"\b(typedef|extern)\b", s):
|
|
434
|
+
continue
|
|
435
|
+
m = RE_UNSAFE_API.search(s)
|
|
436
|
+
if not m:
|
|
437
|
+
continue
|
|
438
|
+
# 若在头文件中,且形如“返回类型 + 函数原型”的声明行(以 ); 结尾),跳过,避免将原型误报为调用
|
|
439
|
+
if is_header:
|
|
440
|
+
before = s[: m.start()]
|
|
441
|
+
if re_type_kw.search(before) and s.strip().endswith(");"):
|
|
442
|
+
continue
|
|
443
|
+
api = m.group(1)
|
|
444
|
+
conf = 0.85
|
|
445
|
+
if not _has_len_bound_around(lines, idx, radius=2):
|
|
446
|
+
conf += 0.05
|
|
447
|
+
severity = _severity_from_confidence(conf, "unsafe_api")
|
|
448
|
+
issues.append(
|
|
449
|
+
Issue(
|
|
450
|
+
language="c/cpp",
|
|
451
|
+
category="unsafe_api",
|
|
452
|
+
pattern=api,
|
|
453
|
+
file=relpath,
|
|
454
|
+
line=idx,
|
|
455
|
+
evidence=_strip_line(s),
|
|
456
|
+
description="使用不安全/高风险字符串API,可能导致缓冲区溢出或格式化风险。",
|
|
457
|
+
suggestion="替换为带边界的安全API(如 snprintf/strlcpy 等)或加入显式长度检查。",
|
|
458
|
+
confidence=min(conf, 0.95),
|
|
459
|
+
severity=severity,
|
|
460
|
+
)
|
|
461
|
+
)
|
|
462
|
+
return issues
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def _rule_boundary_funcs(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
466
|
+
issues: List[Issue] = []
|
|
467
|
+
for idx, s in enumerate(lines, start=1):
|
|
468
|
+
# 跳过预处理行与声明行,避免在 typedef/extern 原型中误报
|
|
469
|
+
t = s.lstrip()
|
|
470
|
+
if t.startswith("#") or re.search(r"\b(typedef|extern)\b", s):
|
|
471
|
+
continue
|
|
472
|
+
m = RE_BOUNDARY_FUNCS.search(s)
|
|
473
|
+
if not m:
|
|
474
|
+
continue
|
|
475
|
+
api = m.group(1)
|
|
476
|
+
conf = 0.65
|
|
477
|
+
# 提取调用参数(启发式,便于准确性优化)
|
|
478
|
+
args = ""
|
|
479
|
+
try:
|
|
480
|
+
start = s.index("(", m.start())
|
|
481
|
+
end = s.rfind(")")
|
|
482
|
+
if end != -1 and end > start:
|
|
483
|
+
args = s[start + 1 : end]
|
|
484
|
+
except Exception:
|
|
485
|
+
args = ""
|
|
486
|
+
|
|
487
|
+
# 若为 memcpy/memmove 且第三参明显使用 sizeof(...)(且非 sizeof(*ptr))且未混入 strlen,
|
|
488
|
+
# 通常为更安全的写法:降低误报(直接跳过告警)
|
|
489
|
+
safe_sizeof = False
|
|
490
|
+
if api.lower() in ("memcpy", "memmove") and args:
|
|
491
|
+
if "sizeof" in args and not RE_SIZEOF_PTR.search(args) and not RE_STRLEN_IN_SIZE.search(args):
|
|
492
|
+
safe_sizeof = True
|
|
493
|
+
if safe_sizeof:
|
|
494
|
+
# 跳过该条,以提高准确性(避免将安全写法误报为风险)
|
|
495
|
+
continue
|
|
496
|
+
|
|
497
|
+
# 如果参数中包含 strlen 或 sizeof( *ptr ),提高风险(长度来源不稳定/指针大小)
|
|
498
|
+
if RE_STRLEN_IN_SIZE.search(s) or RE_SIZEOF_PTR.search(s):
|
|
499
|
+
conf += 0.15
|
|
500
|
+
# 周围未见边界检查,再提高
|
|
501
|
+
if not _has_len_bound_around(lines, idx, radius=2):
|
|
502
|
+
conf += 0.1
|
|
503
|
+
issues.append(
|
|
504
|
+
Issue(
|
|
505
|
+
language="c/cpp",
|
|
506
|
+
category="buffer_overflow",
|
|
507
|
+
pattern=api,
|
|
508
|
+
file=relpath,
|
|
509
|
+
line=idx,
|
|
510
|
+
evidence=_strip_line(s),
|
|
511
|
+
description="缓冲区操作涉及长度/边界,需确认长度来源是否可靠,避免越界。",
|
|
512
|
+
suggestion="核对目标缓冲区大小与拷贝长度;对外部输入进行校验;优先使用安全封装。",
|
|
513
|
+
confidence=min(conf, 0.95),
|
|
514
|
+
severity=_severity_from_confidence(conf, "buffer_overflow"),
|
|
515
|
+
)
|
|
516
|
+
)
|
|
517
|
+
return issues
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def _rule_realloc_assign_back(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
521
|
+
issues: List[Issue] = []
|
|
522
|
+
for idx, s in enumerate(lines, start=1):
|
|
523
|
+
m = RE_REALLOC_ASSIGN_BACK.search(s)
|
|
524
|
+
if not m:
|
|
525
|
+
continue
|
|
526
|
+
var = m.group(1)
|
|
527
|
+
conf = 0.8
|
|
528
|
+
# 如果附近未见错误处理/NULL检查,置信度更高
|
|
529
|
+
if not _has_null_check_around(var, lines, idx, radius=3):
|
|
530
|
+
conf += 0.1
|
|
531
|
+
issues.append(
|
|
532
|
+
Issue(
|
|
533
|
+
language="c/cpp",
|
|
534
|
+
category="memory_mgmt",
|
|
535
|
+
pattern="realloc_overwrite",
|
|
536
|
+
file=relpath,
|
|
537
|
+
line=idx,
|
|
538
|
+
evidence=_strip_line(s),
|
|
539
|
+
description=f"realloc 直接覆盖原指针 {var},若失败将导致原内存泄漏。",
|
|
540
|
+
suggestion="使用临时指针接收 realloc 返回值,判空成功后再赋值回原指针。",
|
|
541
|
+
confidence=min(conf, 0.95),
|
|
542
|
+
severity=_severity_from_confidence(conf, "memory_mgmt"),
|
|
543
|
+
)
|
|
544
|
+
)
|
|
545
|
+
return issues
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def _rule_malloc_no_null_check(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
549
|
+
issues: List[Issue] = []
|
|
550
|
+
for idx, s in enumerate(lines, start=1):
|
|
551
|
+
for pat in (RE_MALLOC_ASSIGN, RE_CALLOC_ASSIGN, RE_NEW_ASSIGN):
|
|
552
|
+
m = pat.search(s)
|
|
553
|
+
if not m:
|
|
554
|
+
continue
|
|
555
|
+
var = m.group(1)
|
|
556
|
+
# 在后续若干行中存在明显解引用/使用但未见 NULL 检查,提示
|
|
557
|
+
conf = 0.55
|
|
558
|
+
has_check = _has_null_check_around(var, lines, idx, radius=4)
|
|
559
|
+
# 搜索后续 6 行是否出现变量使用(粗略)
|
|
560
|
+
used = False
|
|
561
|
+
for j, sj in _window(lines, idx, before=0, after=6):
|
|
562
|
+
if j == idx:
|
|
563
|
+
continue
|
|
564
|
+
if re.search(rf"\b{re.escape(var)}\b(\s*(->|\[|\())", sj):
|
|
565
|
+
used = True
|
|
566
|
+
break
|
|
567
|
+
if used and not has_check:
|
|
568
|
+
conf += 0.25
|
|
569
|
+
elif not has_check:
|
|
570
|
+
conf += 0.1
|
|
571
|
+
issues.append(
|
|
572
|
+
Issue(
|
|
573
|
+
language="c/cpp",
|
|
574
|
+
category="memory_mgmt",
|
|
575
|
+
pattern="alloc_no_null_check",
|
|
576
|
+
file=relpath,
|
|
577
|
+
line=idx,
|
|
578
|
+
evidence=_strip_line(s),
|
|
579
|
+
description=f"内存/对象分配给 {var} 后可能未检查是否成功(NULL 检查缺失)。",
|
|
580
|
+
suggestion="在使用前检查分配结果是否为 NULL,并在错误路径上释放已获取的资源。",
|
|
581
|
+
confidence=min(conf, 0.9),
|
|
582
|
+
severity=_severity_from_confidence(conf, "memory_mgmt"),
|
|
583
|
+
)
|
|
584
|
+
)
|
|
585
|
+
return issues
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
def _rule_uaf_suspect(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
589
|
+
"""
|
|
590
|
+
启发式 UAF(use-after-free)线索检测(准确性优化版):
|
|
591
|
+
- 仅在 free(var) 之后的窗口内检测到明显“解引用使用”(v->、*v、v[...)而且在此之前未见重新赋值/置空时告警
|
|
592
|
+
- 忽略 free 后立即将指针置为 NULL/0 的情况
|
|
593
|
+
说明:仍为启发式,需要结合上下文确认。
|
|
594
|
+
"""
|
|
595
|
+
issues: List[Issue] = []
|
|
596
|
+
# 收集所有 free(var) 位置
|
|
597
|
+
free_calls: List[Tuple[str, int]] = []
|
|
598
|
+
for idx, s in enumerate(lines, start=1):
|
|
599
|
+
for m in re.finditer(r"free\s*\(\s*([A-Za-z_]\w*)\s*\)\s*;", s):
|
|
600
|
+
free_calls.append((m.group(1), idx))
|
|
601
|
+
|
|
602
|
+
# 针对每个 free(var),在后续窗口中寻找“危险使用”
|
|
603
|
+
for var, free_ln in free_calls:
|
|
604
|
+
# free 后 50 行窗口
|
|
605
|
+
start = free_ln + 1
|
|
606
|
+
end = min(len(lines), free_ln + 50)
|
|
607
|
+
|
|
608
|
+
# 同/邻近行若有置空,先快速跳过
|
|
609
|
+
early_null = False
|
|
610
|
+
for j in range(free_ln, min(len(lines), free_ln + 3) + 1):
|
|
611
|
+
sj = _safe_line(lines, j)
|
|
612
|
+
if re.search(rf"\b{re.escape(var)}\s*=\s*(NULL|0)\s*;", sj):
|
|
613
|
+
early_null = True
|
|
614
|
+
break
|
|
615
|
+
if early_null:
|
|
616
|
+
continue
|
|
617
|
+
|
|
618
|
+
reassigned = False
|
|
619
|
+
uaf_evidence_line: Optional[int] = None
|
|
620
|
+
|
|
621
|
+
deref_arrow = re.compile(rf"\b{re.escape(var)}\s*->")
|
|
622
|
+
deref_star = re.compile(rf"(?<!\w)\*\s*{re.escape(var)}\b")
|
|
623
|
+
deref_index = re.compile(rf"\b{re.escape(var)}\s*\[")
|
|
624
|
+
assign_pat = re.compile(rf"\b{re.escape(var)}\s*=")
|
|
625
|
+
|
|
626
|
+
for j in range(start, end + 1):
|
|
627
|
+
sj = _safe_line(lines, j)
|
|
628
|
+
# 先检测重新赋值(包括置NULL或重新指向),则视为“生命周期重置”,不报本条
|
|
629
|
+
if assign_pat.search(sj):
|
|
630
|
+
reassigned = True
|
|
631
|
+
break
|
|
632
|
+
# 检测明显的解引用使用
|
|
633
|
+
if deref_arrow.search(sj) or deref_star.search(sj) or deref_index.search(sj):
|
|
634
|
+
uaf_evidence_line = j
|
|
635
|
+
break
|
|
636
|
+
|
|
637
|
+
if uaf_evidence_line and not reassigned:
|
|
638
|
+
# 以 free 行作为证据点(保持与既有输出一致性)
|
|
639
|
+
evidence = _strip_line(_safe_line(lines, free_ln))
|
|
640
|
+
issues.append(
|
|
641
|
+
Issue(
|
|
642
|
+
language="c/cpp",
|
|
643
|
+
category="memory_mgmt",
|
|
644
|
+
pattern="use_after_free_suspect",
|
|
645
|
+
file=relpath,
|
|
646
|
+
line=free_ln,
|
|
647
|
+
evidence=evidence,
|
|
648
|
+
description=f"变量 {var} 在 free 后的邻近窗口内出现了解引用使用(UAF 线索),且未检测到重新赋值/置空。",
|
|
649
|
+
suggestion="free 后应将指针置为 NULL,并避免在重新赋值前进行任何解引用;建议引入生命周期管理与动态/静态检测。",
|
|
650
|
+
confidence=0.65,
|
|
651
|
+
severity="high",
|
|
652
|
+
)
|
|
653
|
+
)
|
|
654
|
+
return issues
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
def _rule_unchecked_io(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
658
|
+
issues: List[Issue] = []
|
|
659
|
+
for idx, s in enumerate(lines, start=1):
|
|
660
|
+
# 排除预处理与声明
|
|
661
|
+
t = s.lstrip()
|
|
662
|
+
if t.startswith("#") or re.search(r"\b(typedef|extern)\b", s):
|
|
663
|
+
continue
|
|
664
|
+
m = RE_IO_API.search(s)
|
|
665
|
+
if not m:
|
|
666
|
+
continue
|
|
667
|
+
|
|
668
|
+
# 若本行/紧随其后 2 行出现条件判断,认为已检查(直接跳过)
|
|
669
|
+
nearby = " ".join(_safe_line(lines, i) for i in range(idx, min(idx + 2, len(lines)) + 1))
|
|
670
|
+
if re.search(r"\b(if|while|for)\s*\(", nearby) or re.search(r"(>=|<=|==|!=|<|>)", nearby):
|
|
671
|
+
continue
|
|
672
|
+
|
|
673
|
+
# 若赋值给变量,则在后续窗口内寻找对该变量的检查
|
|
674
|
+
assigned_var: Optional[str] = None
|
|
675
|
+
try:
|
|
676
|
+
# 仅截取调用前的左侧以匹配最近的 "var ="
|
|
677
|
+
left = s[: m.start()]
|
|
678
|
+
assigns = list(RE_GENERIC_ASSIGN.finditer(left))
|
|
679
|
+
if assigns:
|
|
680
|
+
assigned_var = assigns[-1].group(1)
|
|
681
|
+
except Exception:
|
|
682
|
+
assigned_var = None
|
|
683
|
+
|
|
684
|
+
checked_via_var = False
|
|
685
|
+
if assigned_var:
|
|
686
|
+
end = min(len(lines), idx + 5)
|
|
687
|
+
var_pat_cond = re.compile(rf"\b(if|while|for)\s*\([^)]*\b{re.escape(assigned_var)}\b[^)]*\)")
|
|
688
|
+
var_pat_cmp = re.compile(rf"\b{re.escape(assigned_var)}\b\s*(>=|<=|==|!=|<|>)")
|
|
689
|
+
for j in range(idx + 1, end + 1):
|
|
690
|
+
sj = _safe_line(lines, j)
|
|
691
|
+
if var_pat_cond.search(sj) or var_pat_cmp.search(sj):
|
|
692
|
+
checked_via_var = True
|
|
693
|
+
break
|
|
694
|
+
if checked_via_var:
|
|
695
|
+
continue
|
|
696
|
+
|
|
697
|
+
# 到此仍未见检查,认为可能未检查错误
|
|
698
|
+
conf = 0.65 # 较原先略微提高基础置信度,因已进行更多排除
|
|
699
|
+
issues.append(
|
|
700
|
+
Issue(
|
|
701
|
+
language="c/cpp",
|
|
702
|
+
category="error_handling",
|
|
703
|
+
pattern="io_call",
|
|
704
|
+
file=relpath,
|
|
705
|
+
line=idx,
|
|
706
|
+
evidence=_strip_line(s),
|
|
707
|
+
description="I/O/系统调用可能未检查返回值,存在错误处理缺失风险。",
|
|
708
|
+
suggestion="检查返回值/errno;在错误路径上释放资源(句柄/内存/锁)。",
|
|
709
|
+
confidence=min(conf, 0.75),
|
|
710
|
+
severity=_severity_from_confidence(conf, "error_handling"),
|
|
711
|
+
)
|
|
712
|
+
)
|
|
713
|
+
return issues
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
def _rule_strncpy_no_nullterm(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
717
|
+
# 使用 strncpy/strncat 后未确保目标缓冲区以 NUL 结尾的常见隐患(启发式)
|
|
718
|
+
issues: List[Issue] = []
|
|
719
|
+
for idx, s in enumerate(lines, start=1):
|
|
720
|
+
if RE_STRNCPY.search(s) or RE_STRNCAT.search(s):
|
|
721
|
+
conf = 0.55
|
|
722
|
+
# 若邻近窗口未出现手动 '\0' 终止或显式长度-1 等处理,提升风险
|
|
723
|
+
window_text = " ".join(t for _, t in _window(lines, idx, before=1, after=2))
|
|
724
|
+
if not re.search(r"\\0|'\0'|\"\\0\"|len\s*-\s*1|sizeof\s*\(\s*\w+\s*\)\s*-\s*1", window_text):
|
|
725
|
+
conf += 0.15
|
|
726
|
+
issues.append(
|
|
727
|
+
Issue(
|
|
728
|
+
language="c/cpp",
|
|
729
|
+
category="buffer_overflow",
|
|
730
|
+
pattern="strncpy/strncat",
|
|
731
|
+
file=relpath,
|
|
732
|
+
line=idx,
|
|
733
|
+
evidence=_strip_line(s),
|
|
734
|
+
description="使用 strncpy/strncat 可能未自动添加 NUL 终止,导致潜在字符串未终止风险。",
|
|
735
|
+
suggestion="确保目标缓冲区以 '\\0' 终止(例如手动结尾或采用更安全 API)。",
|
|
736
|
+
confidence=min(conf, 0.75),
|
|
737
|
+
severity=_severity_from_confidence(conf, "buffer_overflow"),
|
|
738
|
+
)
|
|
739
|
+
)
|
|
740
|
+
return issues
|
|
741
|
+
|
|
742
|
+
|
|
743
|
+
# ---------------------------
|
|
744
|
+
# 对外主入口
|
|
745
|
+
# ---------------------------
|
|
746
|
+
|
|
747
|
+
# ---------------------------
|
|
748
|
+
# 额外规则(新增)
|
|
749
|
+
# ---------------------------
|
|
750
|
+
|
|
751
|
+
def _rule_format_string(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
752
|
+
"""
|
|
753
|
+
检测格式化字符串漏洞:printf/sprintf/snprintf/vsprintf/vsnprintf 的格式参数不是字面量;
|
|
754
|
+
fprintf 的第二个参数不是字面量。
|
|
755
|
+
准确性优化:
|
|
756
|
+
- 允许常见本地化/包装宏作为格式参数包装字面量(如 _("..."), gettext("..."), tr("..."), QT_TR_NOOP("..."))
|
|
757
|
+
- 若参数为变量名,回看若干行,若变量被赋值为字面量字符串,则视为较安全用法(跳过)
|
|
758
|
+
- 针对不同函数,准确定位“格式串”所在的参数位置:
|
|
759
|
+
printf: 第1参;sprintf/vsprintf: 第2参;snprintf/vsnprintf: 第3参;fprintf: 第2参
|
|
760
|
+
"""
|
|
761
|
+
SAFE_WRAPPERS = ("_", "gettext", "dgettext", "ngettext", "tr", "QT_TR_NOOP", "QT_TRANSLATE_NOOP")
|
|
762
|
+
issues: List[Issue] = []
|
|
763
|
+
|
|
764
|
+
def _arg_is_literal(s: str, j: int) -> bool:
|
|
765
|
+
while j < len(s) and s[j].isspace():
|
|
766
|
+
j += 1
|
|
767
|
+
return j < len(s) and s[j] == '"'
|
|
768
|
+
|
|
769
|
+
def _arg_is_wrapper_literal(s: str, j: int) -> bool:
|
|
770
|
+
k = j
|
|
771
|
+
while k < len(s) and (s[k].isalnum() or s[k] == "_"):
|
|
772
|
+
k += 1
|
|
773
|
+
name = s[j:k]
|
|
774
|
+
p = k
|
|
775
|
+
while p < len(s) and s[p].isspace():
|
|
776
|
+
p += 1
|
|
777
|
+
if name in SAFE_WRAPPERS and p < len(s) and s[p] == "(":
|
|
778
|
+
q = p + 1
|
|
779
|
+
while q < len(s) and s[q].isspace():
|
|
780
|
+
q += 1
|
|
781
|
+
return q < len(s) and s[q] == '"'
|
|
782
|
+
return False
|
|
783
|
+
|
|
784
|
+
def _leading_ident(s: str, j: int) -> Optional[str]:
|
|
785
|
+
k = j
|
|
786
|
+
if k < len(s) and (s[k].isalpha() or s[k] == "_"):
|
|
787
|
+
while k < len(s) and (s[k].isalnum() or s[k] == "_"):
|
|
788
|
+
k += 1
|
|
789
|
+
return s[j:k]
|
|
790
|
+
return None
|
|
791
|
+
|
|
792
|
+
def _var_assigned_literal(var: str, lines: Sequence[str], upto_idx: int, lookback: int = 5) -> bool:
|
|
793
|
+
start = max(1, upto_idx - lookback)
|
|
794
|
+
pat_assign = re.compile(rf"\b{re.escape(var)}\s*=\s*")
|
|
795
|
+
for j in range(start, upto_idx):
|
|
796
|
+
sj = _safe_line(lines, j)
|
|
797
|
+
m = pat_assign.search(sj)
|
|
798
|
+
if not m:
|
|
799
|
+
continue
|
|
800
|
+
k = m.end()
|
|
801
|
+
while k < len(sj) and sj[k].isspace():
|
|
802
|
+
k += 1
|
|
803
|
+
if k < len(sj) and sj[k] == '"':
|
|
804
|
+
return True
|
|
805
|
+
return False
|
|
806
|
+
|
|
807
|
+
def _nth_arg_start(s: str, open_paren_idx: int, n: int) -> Optional[int]:
|
|
808
|
+
"""
|
|
809
|
+
返回第 n 个参数的起始索引(首个非空白字符),若失败返回 None。
|
|
810
|
+
仅在单行内进行括号配对和逗号计数(启发式)。
|
|
811
|
+
"""
|
|
812
|
+
depth = 0
|
|
813
|
+
# 从 '(' 后开始
|
|
814
|
+
i = open_paren_idx + 1
|
|
815
|
+
# 跳到第一个参数
|
|
816
|
+
# 如果需要第1个参数,先定位其起始
|
|
817
|
+
# 统一逻辑:遍历,记录每个参数的起始位置
|
|
818
|
+
starts: List[int] = []
|
|
819
|
+
start_pos = None
|
|
820
|
+
while i < len(s):
|
|
821
|
+
ch = s[i]
|
|
822
|
+
if ch == "(":
|
|
823
|
+
depth += 1
|
|
824
|
+
elif ch == ")":
|
|
825
|
+
if depth == 0:
|
|
826
|
+
# 结束
|
|
827
|
+
if start_pos is not None:
|
|
828
|
+
starts.append(start_pos)
|
|
829
|
+
start_pos = None
|
|
830
|
+
break
|
|
831
|
+
depth -= 1
|
|
832
|
+
elif ch == "," and depth == 0:
|
|
833
|
+
# 参数分隔
|
|
834
|
+
if start_pos is None:
|
|
835
|
+
# 空参数,记录当前位置(可能是宏展开),尽量返回后续判断
|
|
836
|
+
starts.append(i + 1)
|
|
837
|
+
else:
|
|
838
|
+
starts.append(start_pos)
|
|
839
|
+
start_pos = None
|
|
840
|
+
# 下一个参数
|
|
841
|
+
else:
|
|
842
|
+
if not start_pos and not ch.isspace():
|
|
843
|
+
start_pos = i
|
|
844
|
+
i += 1
|
|
845
|
+
# 补上最后一个参数起点
|
|
846
|
+
if start_pos is not None:
|
|
847
|
+
starts.append(start_pos)
|
|
848
|
+
# 去除参数起点的前导空白
|
|
849
|
+
cleaned: List[int] = []
|
|
850
|
+
for pos in starts:
|
|
851
|
+
j = pos
|
|
852
|
+
while j < len(s) and s[j].isspace():
|
|
853
|
+
j += 1
|
|
854
|
+
cleaned.append(j)
|
|
855
|
+
if 1 <= n <= len(cleaned):
|
|
856
|
+
return cleaned[n - 1]
|
|
857
|
+
return None
|
|
858
|
+
|
|
859
|
+
for idx, s in enumerate(lines, start=1):
|
|
860
|
+
flagged = False
|
|
861
|
+
# 处理 printf/sprintf/snprintf/vsprintf/vsnprintf(格式串参数位置不同)
|
|
862
|
+
m1 = RE_PRINTF_LIKE.search(s)
|
|
863
|
+
if m1:
|
|
864
|
+
try:
|
|
865
|
+
name = m1.group(1).lower()
|
|
866
|
+
open_idx = s.index("(", m1.start())
|
|
867
|
+
# 参数索引映射
|
|
868
|
+
fmt_arg_map = {
|
|
869
|
+
"printf": 1,
|
|
870
|
+
"sprintf": 2,
|
|
871
|
+
"vsprintf": 2,
|
|
872
|
+
"snprintf": 3,
|
|
873
|
+
"vsnprintf": 3,
|
|
874
|
+
}
|
|
875
|
+
fmt_idx = fmt_arg_map.get(name, 1)
|
|
876
|
+
j = _nth_arg_start(s, open_idx, fmt_idx)
|
|
877
|
+
if j is not None:
|
|
878
|
+
# 字面量/包装字面量/回看字面量赋值的变量
|
|
879
|
+
if not _arg_is_literal(s, j):
|
|
880
|
+
if (s[j].isalpha() or s[j] == "_"):
|
|
881
|
+
if _arg_is_wrapper_literal(s, j):
|
|
882
|
+
flagged = False
|
|
883
|
+
else:
|
|
884
|
+
ident = _leading_ident(s, j)
|
|
885
|
+
if ident and _var_assigned_literal(ident, lines, idx, lookback=5):
|
|
886
|
+
flagged = False
|
|
887
|
+
else:
|
|
888
|
+
flagged = True
|
|
889
|
+
else:
|
|
890
|
+
flagged = True
|
|
891
|
+
else:
|
|
892
|
+
# 无法解析参数位置,保守告警
|
|
893
|
+
flagged = True
|
|
894
|
+
except Exception:
|
|
895
|
+
pass
|
|
896
|
+
|
|
897
|
+
# fprintf:第二个参数为格式串
|
|
898
|
+
m2 = RE_FPRINTF.search(s)
|
|
899
|
+
if not flagged and m2:
|
|
900
|
+
try:
|
|
901
|
+
open_idx = s.index("(", m2.start())
|
|
902
|
+
j = _nth_arg_start(s, open_idx, 2)
|
|
903
|
+
if j is not None:
|
|
904
|
+
if not _arg_is_literal(s, j):
|
|
905
|
+
if (s[j].isalpha() or s[j] == "_"):
|
|
906
|
+
if _arg_is_wrapper_literal(s, j):
|
|
907
|
+
flagged = False
|
|
908
|
+
else:
|
|
909
|
+
ident = _leading_ident(s, j)
|
|
910
|
+
if ident and _var_assigned_literal(ident, lines, idx, lookback=5):
|
|
911
|
+
flagged = False
|
|
912
|
+
else:
|
|
913
|
+
flagged = True
|
|
914
|
+
else:
|
|
915
|
+
flagged = True
|
|
916
|
+
else:
|
|
917
|
+
flagged = True
|
|
918
|
+
except Exception:
|
|
919
|
+
pass
|
|
920
|
+
|
|
921
|
+
if flagged:
|
|
922
|
+
issues.append(
|
|
923
|
+
Issue(
|
|
924
|
+
language="c/cpp",
|
|
925
|
+
category="unsafe_usage",
|
|
926
|
+
pattern="format_string",
|
|
927
|
+
file=relpath,
|
|
928
|
+
line=idx,
|
|
929
|
+
evidence=_strip_line(s),
|
|
930
|
+
description="格式化字符串参数不是字面量,可能导致格式化字符串漏洞。",
|
|
931
|
+
suggestion="使用常量格式串并对外部输入进行参数化处理;避免将未验证的输入作为格式串。",
|
|
932
|
+
confidence=0.8,
|
|
933
|
+
severity="high",
|
|
934
|
+
)
|
|
935
|
+
)
|
|
936
|
+
return issues
|
|
937
|
+
|
|
938
|
+
|
|
939
|
+
def _rule_insecure_tmpfile(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
940
|
+
"""
|
|
941
|
+
检测不安全临时文件API:tmpnam/tempnam/mktemp
|
|
942
|
+
"""
|
|
943
|
+
issues: List[Issue] = []
|
|
944
|
+
for idx, s in enumerate(lines, start=1):
|
|
945
|
+
if RE_INSECURE_TMP.search(s):
|
|
946
|
+
issues.append(
|
|
947
|
+
Issue(
|
|
948
|
+
language="c/cpp",
|
|
949
|
+
category="unsafe_usage",
|
|
950
|
+
pattern="insecure_tmpfile",
|
|
951
|
+
file=relpath,
|
|
952
|
+
line=idx,
|
|
953
|
+
evidence=_strip_line(s),
|
|
954
|
+
description="使用不安全的临时文件API(tmpnam/tempnam/mktemp)可能导致竞态条件与劫持风险。",
|
|
955
|
+
suggestion="使用 mkstemp/mkdtemp 或安全封装,并设置合适的权限。",
|
|
956
|
+
confidence=0.85,
|
|
957
|
+
severity="high",
|
|
958
|
+
)
|
|
959
|
+
)
|
|
960
|
+
return issues
|
|
961
|
+
|
|
962
|
+
|
|
963
|
+
def _rule_command_execution(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
964
|
+
"""
|
|
965
|
+
检测命令执行API:system/popen 和 exec* 系列,其中参数不是字面量(可能引入命令注入风险)
|
|
966
|
+
准确性优化:
|
|
967
|
+
- exec* 系列仅在第一个参数不是字面量路径时告警
|
|
968
|
+
- 若第一个参数为变量名,向前回看若干行,若检测到该变量被赋值为字面量字符串,则视为较安全用法(跳过)
|
|
969
|
+
"""
|
|
970
|
+
issues: List[Issue] = []
|
|
971
|
+
|
|
972
|
+
def _arg_is_literal_or_wrapper(s: str, start_idx: int) -> bool:
|
|
973
|
+
# 跳过空白,判断是否直接为字面量
|
|
974
|
+
j = start_idx + 1
|
|
975
|
+
while j < len(s) and s[j].isspace():
|
|
976
|
+
j += 1
|
|
977
|
+
return j < len(s) and s[j] == '"'
|
|
978
|
+
|
|
979
|
+
def _first_arg_identifier(s: str, start_idx: int) -> Optional[str]:
|
|
980
|
+
j = start_idx + 1
|
|
981
|
+
while j < len(s) and s[j].isspace():
|
|
982
|
+
j += 1
|
|
983
|
+
if j < len(s) and (s[j].isalpha() or s[j] == "_"):
|
|
984
|
+
k = j
|
|
985
|
+
while k < len(s) and (s[k].isalnum() or s[k] == "_"):
|
|
986
|
+
k += 1
|
|
987
|
+
return s[j:k]
|
|
988
|
+
return None
|
|
989
|
+
|
|
990
|
+
def _var_assigned_literal(var: str, lines: Sequence[str], upto_idx: int, lookback: int = 5) -> bool:
|
|
991
|
+
# 在前 lookback 行内查找 var = "..."
|
|
992
|
+
start = max(1, upto_idx - lookback)
|
|
993
|
+
pat_assign = re.compile(rf"\b{re.escape(var)}\s*=\s*")
|
|
994
|
+
for j in range(start, upto_idx):
|
|
995
|
+
sj = _safe_line(lines, j)
|
|
996
|
+
m = pat_assign.search(sj)
|
|
997
|
+
if not m:
|
|
998
|
+
continue
|
|
999
|
+
# 检查赋值右侧是否为字面量(masked 文本中依旧保留引号)
|
|
1000
|
+
k = m.end()
|
|
1001
|
+
while k < len(sj) and sj[k].isspace():
|
|
1002
|
+
k += 1
|
|
1003
|
+
if k < len(sj) and sj[k] == '"':
|
|
1004
|
+
return True
|
|
1005
|
+
return False
|
|
1006
|
+
|
|
1007
|
+
for idx, s in enumerate(lines, start=1):
|
|
1008
|
+
flagged = False
|
|
1009
|
+
m_sys = RE_SYSTEM_LIKE.search(s)
|
|
1010
|
+
if m_sys:
|
|
1011
|
+
try:
|
|
1012
|
+
start = s.index("(", m_sys.start())
|
|
1013
|
+
if not _arg_is_literal_or_wrapper(s, start):
|
|
1014
|
+
# 若首参为变量且之前赋过字面量,则跳过
|
|
1015
|
+
ident = _first_arg_identifier(s, start)
|
|
1016
|
+
if ident and _var_assigned_literal(ident, lines, idx, lookback=5):
|
|
1017
|
+
flagged = False
|
|
1018
|
+
else:
|
|
1019
|
+
flagged = True
|
|
1020
|
+
except Exception:
|
|
1021
|
+
pass
|
|
1022
|
+
if not flagged:
|
|
1023
|
+
m_exec = RE_EXEC_LIKE.search(s)
|
|
1024
|
+
if m_exec:
|
|
1025
|
+
try:
|
|
1026
|
+
start = s.index("(", m_exec.start())
|
|
1027
|
+
if not _arg_is_literal_or_wrapper(s, start):
|
|
1028
|
+
ident = _first_arg_identifier(s, start)
|
|
1029
|
+
if ident and _var_assigned_literal(ident, lines, idx, lookback=5):
|
|
1030
|
+
flagged = False
|
|
1031
|
+
else:
|
|
1032
|
+
flagged = True
|
|
1033
|
+
except Exception:
|
|
1034
|
+
flagged = True
|
|
1035
|
+
if flagged:
|
|
1036
|
+
issues.append(
|
|
1037
|
+
Issue(
|
|
1038
|
+
language="c/cpp",
|
|
1039
|
+
category="unsafe_usage",
|
|
1040
|
+
pattern="command_exec",
|
|
1041
|
+
file=relpath,
|
|
1042
|
+
line=idx,
|
|
1043
|
+
evidence=_strip_line(s),
|
|
1044
|
+
description="外部命令执行可能使用了非字面量参数,存在命令注入风险。",
|
|
1045
|
+
suggestion="避免拼接命令,使用参数化接口或受控白名单;严格校验/转义外部输入。",
|
|
1046
|
+
confidence=0.7,
|
|
1047
|
+
severity="high",
|
|
1048
|
+
)
|
|
1049
|
+
)
|
|
1050
|
+
return issues
|
|
1051
|
+
|
|
1052
|
+
|
|
1053
|
+
def _rule_scanf_no_width(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1054
|
+
"""
|
|
1055
|
+
检测 scanf/sscanf/fscanf 使用 %s 但未指定最大宽度,存在缓冲区溢出风险。
|
|
1056
|
+
仅对格式串直接字面量的情况进行粗略检查。
|
|
1057
|
+
准确性优化:
|
|
1058
|
+
- 忽略 GNU 扩展的 %ms(自动分配内存)与 %m[...] 模式(自动分配),这类不会对固定缓冲造成溢出
|
|
1059
|
+
- 忽略丢弃输入的 %*s(不写入目标缓冲)
|
|
1060
|
+
"""
|
|
1061
|
+
issues: List[Issue] = []
|
|
1062
|
+
for idx, s in enumerate(lines, start=1):
|
|
1063
|
+
m = RE_SCANF_CALL.search(s)
|
|
1064
|
+
if not m:
|
|
1065
|
+
continue
|
|
1066
|
+
fmt = m.group(1)
|
|
1067
|
+
unsafe = False
|
|
1068
|
+
# 经典不安全情形:出现 %s 但未指定最大宽度
|
|
1069
|
+
if "%s" in fmt and not re.search(r"%\d+s", fmt):
|
|
1070
|
+
unsafe = True
|
|
1071
|
+
# 例外:%*s 丢弃输入,不写入目标缓冲
|
|
1072
|
+
if unsafe and re.search(r"%\*s", fmt):
|
|
1073
|
+
unsafe = False
|
|
1074
|
+
# 例外:GNU 扩展 %ms 或 %m[...](自动分配)
|
|
1075
|
+
if unsafe and re.search(r"%m[a-z\[]", fmt, re.IGNORECASE):
|
|
1076
|
+
unsafe = False
|
|
1077
|
+
if unsafe:
|
|
1078
|
+
issues.append(
|
|
1079
|
+
Issue(
|
|
1080
|
+
language="c/cpp",
|
|
1081
|
+
category="buffer_overflow",
|
|
1082
|
+
pattern="scanf_%s_no_width",
|
|
1083
|
+
file=relpath,
|
|
1084
|
+
line=idx,
|
|
1085
|
+
evidence=_strip_line(s),
|
|
1086
|
+
description="scanf/sscanf/fscanf 使用 %s 但未限制最大宽度,存在缓冲区溢出风险。",
|
|
1087
|
+
suggestion="为 %s 指定最大宽度(如 \"%255s\"),或使用更安全的读取方式;若使用 GNU 扩展 %ms/%m[...] 请确保对返回内存进行释放。",
|
|
1088
|
+
confidence=0.75,
|
|
1089
|
+
severity="high",
|
|
1090
|
+
)
|
|
1091
|
+
)
|
|
1092
|
+
return issues
|
|
1093
|
+
|
|
1094
|
+
|
|
1095
|
+
def _rule_alloc_size_overflow(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1096
|
+
"""
|
|
1097
|
+
检测分配大小可能溢出的简单情形:malloc/calloc/realloc 形参存在乘法表达式且未显式使用 sizeof。
|
|
1098
|
+
该规则为启发式,需人工确认。
|
|
1099
|
+
"""
|
|
1100
|
+
issues: List[Issue] = []
|
|
1101
|
+
for idx, s in enumerate(lines, start=1):
|
|
1102
|
+
m = re.search(r"\bmalloc\s*\(", s, re.IGNORECASE)
|
|
1103
|
+
if not m:
|
|
1104
|
+
continue
|
|
1105
|
+
try:
|
|
1106
|
+
start = s.index("(", m.start())
|
|
1107
|
+
end = s.find(")", start + 1)
|
|
1108
|
+
if end != -1:
|
|
1109
|
+
args = s[start + 1 : end]
|
|
1110
|
+
if "*" in args and not re.search(r"\bsizeof\s*\(", args):
|
|
1111
|
+
issues.append(
|
|
1112
|
+
Issue(
|
|
1113
|
+
language="c/cpp",
|
|
1114
|
+
category="memory_mgmt",
|
|
1115
|
+
pattern="alloc_size_overflow",
|
|
1116
|
+
file=relpath,
|
|
1117
|
+
line=idx,
|
|
1118
|
+
evidence=_strip_line(s),
|
|
1119
|
+
description="malloc 大小计算包含乘法且未显式使用 sizeof,存在整数溢出或尺寸计算错误的风险。",
|
|
1120
|
+
suggestion="使用 sizeof 计算元素大小并检查乘法是否可能溢出;引入范围/上界校验。",
|
|
1121
|
+
confidence=0.6,
|
|
1122
|
+
severity="medium",
|
|
1123
|
+
)
|
|
1124
|
+
)
|
|
1125
|
+
except Exception:
|
|
1126
|
+
pass
|
|
1127
|
+
return issues
|
|
1128
|
+
|
|
1129
|
+
|
|
1130
|
+
# ---------------------------
|
|
1131
|
+
# 空指针/野指针/死锁 等新增规则
|
|
1132
|
+
# ---------------------------
|
|
1133
|
+
|
|
1134
|
+
def _rule_possible_null_deref(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1135
|
+
"""
|
|
1136
|
+
启发式检测空指针解引用:
|
|
1137
|
+
- 出现 p->... 或 *p 访问,且邻近未见明显的 NULL 检查。
|
|
1138
|
+
注:可能存在误报,需结合上下文确认。
|
|
1139
|
+
准确性优化:
|
|
1140
|
+
- 对于 *p 的检测,引入上下文判定,尽量排除乘法表达式 a * p 的误报
|
|
1141
|
+
(仅当 * 出现在典型解引用上下文,如行首/括号后/逗号后/赋值号后/分号后/冒号后/方括号后/逻辑非/取地址/另一解引用后)
|
|
1142
|
+
"""
|
|
1143
|
+
issues: List[Issue] = []
|
|
1144
|
+
re_arrow = re.compile(r"\b([A-Za-z_]\w*)\s*->")
|
|
1145
|
+
re_star = re.compile(r"(?<!\w)\*\s*([A-Za-z_]\w*)\b")
|
|
1146
|
+
type_kw = re.compile(r"\b(typedef|struct|union|enum|class|char|int|long|short|void|size_t|ssize_t|FILE)\b")
|
|
1147
|
+
|
|
1148
|
+
def _is_deref_context(line: str, star_pos: int) -> bool:
|
|
1149
|
+
k = star_pos - 1
|
|
1150
|
+
while k >= 0 and line[k].isspace():
|
|
1151
|
+
k -= 1
|
|
1152
|
+
if k < 0:
|
|
1153
|
+
return True
|
|
1154
|
+
# 典型可视为解引用的前导字符集合
|
|
1155
|
+
return line[k] in "(*,=:{;[!&"
|
|
1156
|
+
|
|
1157
|
+
for idx, s in enumerate(lines, start=1):
|
|
1158
|
+
vars_hit: List[str] = []
|
|
1159
|
+
# '->' 访问几乎必为解引用
|
|
1160
|
+
for m in re_arrow.finditer(s):
|
|
1161
|
+
vars_hit.append(m.group(1))
|
|
1162
|
+
# '*p':排除类型声明行;并通过上下文过滤乘法用法
|
|
1163
|
+
if "*" in s and not type_kw.search(s):
|
|
1164
|
+
for m in re_star.finditer(s):
|
|
1165
|
+
star_pos = m.start(0)
|
|
1166
|
+
if not _is_deref_context(s, star_pos):
|
|
1167
|
+
continue
|
|
1168
|
+
vars_hit.append(m.group(1))
|
|
1169
|
+
for v in set(vars_hit):
|
|
1170
|
+
if v == "this": # C++ 成员函数中 this-> 通常不应视为空指针
|
|
1171
|
+
continue
|
|
1172
|
+
if not _has_null_check_around(v, lines, idx, radius=3):
|
|
1173
|
+
issues.append(
|
|
1174
|
+
Issue(
|
|
1175
|
+
language="c/cpp",
|
|
1176
|
+
category="memory_mgmt",
|
|
1177
|
+
pattern="possible_null_deref",
|
|
1178
|
+
file=relpath,
|
|
1179
|
+
line=idx,
|
|
1180
|
+
evidence=_strip_line(s),
|
|
1181
|
+
description=f"可能对指针 {v} 进行了解引用,但附近未见 NULL 检查,存在空指针解引用风险。",
|
|
1182
|
+
suggestion="在使用指针前执行 NULL 判定;确保所有返回/赋值路径均进行了合法性检查。",
|
|
1183
|
+
confidence=0.6,
|
|
1184
|
+
severity="high",
|
|
1185
|
+
)
|
|
1186
|
+
)
|
|
1187
|
+
return issues
|
|
1188
|
+
|
|
1189
|
+
|
|
1190
|
+
def _rule_uninitialized_ptr_use(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1191
|
+
"""
|
|
1192
|
+
检测野指针(未初始化指针)使用的简单情形:
|
|
1193
|
+
- 出现形如 `type *p;`(行内不含 '=' 且不含 '(',避免函数指针)后,在后续若干行内出现 p-> 或 *p 访问,
|
|
1194
|
+
且未见 p 的赋值/初始化,则认为可能为野指针解引用。
|
|
1195
|
+
"""
|
|
1196
|
+
issues: List[Issue] = []
|
|
1197
|
+
# 收集候选未初始化指针声明
|
|
1198
|
+
candidates = [] # (var, decl_line)
|
|
1199
|
+
decl_ptr_line = re.compile(r"\*")
|
|
1200
|
+
type_prefix = re.compile(r"\b(typedef|struct|union|enum|class|const|volatile|static|register|signed|unsigned|char|int|long|short|void|float|double)\b")
|
|
1201
|
+
for idx, s in enumerate(lines, start=1):
|
|
1202
|
+
if ";" not in s or "(" in s or "=" in s:
|
|
1203
|
+
continue
|
|
1204
|
+
if not decl_ptr_line.search(s):
|
|
1205
|
+
continue
|
|
1206
|
+
if not type_prefix.search(s):
|
|
1207
|
+
continue
|
|
1208
|
+
# 提取形如 *p, *q
|
|
1209
|
+
for m in re.finditer(r"\*\s*([A-Za-z_]\w*)\b", s):
|
|
1210
|
+
v = m.group(1)
|
|
1211
|
+
candidates.append((v, idx))
|
|
1212
|
+
|
|
1213
|
+
# 检查候选在接下来的窗口中是否在赋值前被解引用
|
|
1214
|
+
for v, decl_line in candidates:
|
|
1215
|
+
# 向后查看 20 行
|
|
1216
|
+
end = min(len(lines), decl_line + 20)
|
|
1217
|
+
initialized = False
|
|
1218
|
+
deref_line = None
|
|
1219
|
+
for j in range(decl_line + 1, end + 1):
|
|
1220
|
+
sj = _safe_line(lines, j)
|
|
1221
|
+
# 赋值/初始化:p = ..., p = &x, p = malloc(...)
|
|
1222
|
+
if re.search(rf"\b{re.escape(v)}\s*=\s*", sj):
|
|
1223
|
+
initialized = True
|
|
1224
|
+
break
|
|
1225
|
+
# 解引用:p-> 或 *p
|
|
1226
|
+
if re.search(rf"\b{re.escape(v)}\s*->", sj) or re.search(rf"(?<!\w)\*\s*{re.escape(v)}\b", sj):
|
|
1227
|
+
deref_line = j
|
|
1228
|
+
# 若命中,若附近没有 NULL 检查/初始化则认为风险较高
|
|
1229
|
+
break
|
|
1230
|
+
if deref_line and not initialized:
|
|
1231
|
+
issues.append(
|
|
1232
|
+
Issue(
|
|
1233
|
+
language="c/cpp",
|
|
1234
|
+
category="memory_mgmt",
|
|
1235
|
+
pattern="wild_pointer_deref",
|
|
1236
|
+
file=relpath,
|
|
1237
|
+
line=deref_line,
|
|
1238
|
+
evidence=_strip_line(_safe_line(lines, deref_line)),
|
|
1239
|
+
description=f"指针 {v} 声明后未见初始化即被解引用,可能为野指针使用。",
|
|
1240
|
+
suggestion="在声明后立即将指针初始化为 NULL,并在使用前进行显式赋值与有效性校验。",
|
|
1241
|
+
confidence=0.65,
|
|
1242
|
+
severity="high",
|
|
1243
|
+
)
|
|
1244
|
+
)
|
|
1245
|
+
return issues
|
|
1246
|
+
|
|
1247
|
+
|
|
1248
|
+
def _rule_deadlock_patterns(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1249
|
+
"""
|
|
1250
|
+
检测常见死锁风险:
|
|
1251
|
+
- 双重加锁:同一互斥量在未解锁情况下再次加锁
|
|
1252
|
+
- 可能缺失解锁:加锁后在后续窗口内未看到对应解锁
|
|
1253
|
+
- 锁顺序反转:存在 (A->B) 与 (B->A) 两种加锁顺序
|
|
1254
|
+
实现基于启发式,可能产生误报。
|
|
1255
|
+
"""
|
|
1256
|
+
issues: List[Issue] = []
|
|
1257
|
+
lock_stack: list[str] = []
|
|
1258
|
+
# 记录出现过的加锁顺序对及其行号
|
|
1259
|
+
order_pairs: dict[tuple[str, str], int] = {}
|
|
1260
|
+
|
|
1261
|
+
# 先行扫描:顺序和双重加锁
|
|
1262
|
+
for idx, s in enumerate(lines, start=1):
|
|
1263
|
+
m_lock = RE_PTHREAD_LOCK.search(s)
|
|
1264
|
+
m_unlock = RE_PTHREAD_UNLOCK.search(s)
|
|
1265
|
+
if m_lock:
|
|
1266
|
+
mtx = m_lock.group(1)
|
|
1267
|
+
# 双重加锁检测
|
|
1268
|
+
if mtx in lock_stack:
|
|
1269
|
+
issues.append(
|
|
1270
|
+
Issue(
|
|
1271
|
+
language="c/cpp",
|
|
1272
|
+
category="error_handling",
|
|
1273
|
+
pattern="double_lock",
|
|
1274
|
+
file=relpath,
|
|
1275
|
+
line=idx,
|
|
1276
|
+
evidence=_strip_line(s),
|
|
1277
|
+
description=f"互斥量 {mtx} 在未解锁的情况下被再次加锁,存在死锁风险。",
|
|
1278
|
+
suggestion="避免对同一互斥量重复加锁;检查代码路径确保加锁/解锁严格匹配。",
|
|
1279
|
+
confidence=0.8,
|
|
1280
|
+
severity="high",
|
|
1281
|
+
)
|
|
1282
|
+
)
|
|
1283
|
+
# 锁顺序记录
|
|
1284
|
+
if lock_stack and lock_stack[-1] != mtx:
|
|
1285
|
+
pair = (lock_stack[-1], mtx)
|
|
1286
|
+
order_pairs.setdefault(pair, idx)
|
|
1287
|
+
lock_stack.append(mtx)
|
|
1288
|
+
elif m_unlock:
|
|
1289
|
+
mtx = m_unlock.group(1)
|
|
1290
|
+
# 从栈中移除最近的相同锁
|
|
1291
|
+
if mtx in lock_stack:
|
|
1292
|
+
# 移除最后一次加锁的该互斥量(近似)
|
|
1293
|
+
for k in range(len(lock_stack) - 1, -1, -1):
|
|
1294
|
+
if lock_stack[k] == mtx:
|
|
1295
|
+
del lock_stack[k]
|
|
1296
|
+
break
|
|
1297
|
+
# 粗略按函数/作用域结束重置
|
|
1298
|
+
if "}" in s and not lock_stack:
|
|
1299
|
+
lock_stack = []
|
|
1300
|
+
|
|
1301
|
+
# 锁顺序反转检测
|
|
1302
|
+
for (a, b), ln in order_pairs.items():
|
|
1303
|
+
if (b, a) in order_pairs:
|
|
1304
|
+
# 在第二次发现处报一次
|
|
1305
|
+
issues.append(
|
|
1306
|
+
Issue(
|
|
1307
|
+
language="c/cpp",
|
|
1308
|
+
category="error_handling",
|
|
1309
|
+
pattern="lock_order_inversion",
|
|
1310
|
+
file=relpath,
|
|
1311
|
+
line=order_pairs[(b, a)],
|
|
1312
|
+
evidence=_strip_line(_safe_line(lines, order_pairs[(b, a)])),
|
|
1313
|
+
description=f"检测到互斥量加锁顺序反转:({a} -> {b}) 与 ({b} -> {a}),存在死锁风险。",
|
|
1314
|
+
suggestion="统一多锁的获取顺序,制定全局锁等级或严格的加锁顺序规范。",
|
|
1315
|
+
confidence=0.7,
|
|
1316
|
+
severity="high",
|
|
1317
|
+
)
|
|
1318
|
+
)
|
|
1319
|
+
|
|
1320
|
+
# 可能缺失解锁:在加锁后的 50 行窗口内未见对应解锁
|
|
1321
|
+
for idx, s in enumerate(lines, start=1):
|
|
1322
|
+
m_lock = RE_PTHREAD_LOCK.search(s)
|
|
1323
|
+
if not m_lock:
|
|
1324
|
+
continue
|
|
1325
|
+
mtx = m_lock.group(1)
|
|
1326
|
+
end = min(len(lines), idx + 50)
|
|
1327
|
+
unlocked = False
|
|
1328
|
+
for j in range(idx + 1, end + 1):
|
|
1329
|
+
m_un = RE_PTHREAD_UNLOCK.search(_safe_line(lines, j))
|
|
1330
|
+
if m_un and m_un.group(1) == mtx:
|
|
1331
|
+
unlocked = True
|
|
1332
|
+
break
|
|
1333
|
+
if not unlocked:
|
|
1334
|
+
issues.append(
|
|
1335
|
+
Issue(
|
|
1336
|
+
language="c/cpp",
|
|
1337
|
+
category="error_handling",
|
|
1338
|
+
pattern="missing_unlock_suspect",
|
|
1339
|
+
file=relpath,
|
|
1340
|
+
line=idx,
|
|
1341
|
+
evidence=_strip_line(s),
|
|
1342
|
+
description=f"在加锁 {mtx} 之后的邻近窗口内未检测到匹配解锁,可能存在缺失解锁的风险。",
|
|
1343
|
+
suggestion="确保所有加锁路径都有配对的解锁;考虑使用 RAII/DEFER 风格避免遗漏。",
|
|
1344
|
+
confidence=0.55,
|
|
1345
|
+
severity="medium",
|
|
1346
|
+
)
|
|
1347
|
+
)
|
|
1348
|
+
return issues
|
|
1349
|
+
|
|
1350
|
+
|
|
1351
|
+
# ---------------------------
|
|
1352
|
+
# 其他危险用法规则(新增一批低误报)
|
|
1353
|
+
# ---------------------------
|
|
1354
|
+
|
|
1355
|
+
def _rule_double_free_and_free_non_heap(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1356
|
+
"""
|
|
1357
|
+
检测:
|
|
1358
|
+
- double_free:同一指针在未重新赋值/置空情况下被重复 free
|
|
1359
|
+
- free_non_heap:free(&x) 或 free("literal") 等明显非堆内存释放
|
|
1360
|
+
说明:启发式实现,复杂场景可能仍需人工确认。
|
|
1361
|
+
"""
|
|
1362
|
+
issues: List[Issue] = []
|
|
1363
|
+
last_free_line: dict[str, int] = {}
|
|
1364
|
+
last_assign_line: dict[str, int] = {}
|
|
1365
|
+
|
|
1366
|
+
for idx, s in enumerate(lines, start=1):
|
|
1367
|
+
# 记录简单赋值(用于判断 free 之间是否有重新赋值)
|
|
1368
|
+
for m in RE_GENERIC_ASSIGN.finditer(s):
|
|
1369
|
+
var = m.group(1)
|
|
1370
|
+
last_assign_line[var] = idx
|
|
1371
|
+
|
|
1372
|
+
# 处理 free(...) 调用
|
|
1373
|
+
for m in RE_FREE_CALL_ANY.finditer(s):
|
|
1374
|
+
arg = m.group(1).strip()
|
|
1375
|
+
|
|
1376
|
+
# 忽略 free(NULL)/free(0)
|
|
1377
|
+
if re.fullmatch(r"\(?\s*(NULL|0|\(void\s*\*\)\s*0)\s*\)?", arg, re.IGNORECASE):
|
|
1378
|
+
continue
|
|
1379
|
+
|
|
1380
|
+
# 明显非堆:&... 或 字符串字面量
|
|
1381
|
+
if re.match(r"^\(?\s*&", arg) or arg.lstrip().startswith('"'):
|
|
1382
|
+
issues.append(
|
|
1383
|
+
Issue(
|
|
1384
|
+
language="c/cpp",
|
|
1385
|
+
category="memory_mgmt",
|
|
1386
|
+
pattern="free_non_heap",
|
|
1387
|
+
file=relpath,
|
|
1388
|
+
line=idx,
|
|
1389
|
+
evidence=_strip_line(s),
|
|
1390
|
+
description="检测到对非堆内存的释放(如 &var 或字符串字面量),属于未定义行为。",
|
|
1391
|
+
suggestion="仅释放由 malloc/calloc/realloc/new/new[] 获得的堆内存;避免对栈地址/字面量调用 free。",
|
|
1392
|
+
confidence=0.85,
|
|
1393
|
+
severity="high",
|
|
1394
|
+
)
|
|
1395
|
+
)
|
|
1396
|
+
continue
|
|
1397
|
+
|
|
1398
|
+
# double_free:仅在参数为单一标识符时检测
|
|
1399
|
+
if re.fullmatch(r"[A-Za-z_]\w*", arg):
|
|
1400
|
+
var = arg
|
|
1401
|
+
prev = last_free_line.get(var)
|
|
1402
|
+
if prev is not None:
|
|
1403
|
+
assign_after_prev = last_assign_line.get(var, -1)
|
|
1404
|
+
if assign_after_prev < prev:
|
|
1405
|
+
# 在上次 free 之后没有重新赋值/置空即再次 free,认为 double_free 风险高
|
|
1406
|
+
issues.append(
|
|
1407
|
+
Issue(
|
|
1408
|
+
language="c/cpp",
|
|
1409
|
+
category="memory_mgmt",
|
|
1410
|
+
pattern="double_free",
|
|
1411
|
+
file=relpath,
|
|
1412
|
+
line=idx,
|
|
1413
|
+
evidence=_strip_line(s),
|
|
1414
|
+
description=f"指针 {var} 可能在未重新赋值/置空情况下被重复释放(double free)。",
|
|
1415
|
+
suggestion="free 后将指针置 NULL;确保每块内存仅释放一次;理清所有权与释放路径。",
|
|
1416
|
+
confidence=0.8,
|
|
1417
|
+
severity="high",
|
|
1418
|
+
)
|
|
1419
|
+
)
|
|
1420
|
+
last_free_line[var] = idx
|
|
1421
|
+
return issues
|
|
1422
|
+
|
|
1423
|
+
|
|
1424
|
+
def _rule_atoi_family(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1425
|
+
"""
|
|
1426
|
+
检测 atoi/atol/atoll/atof 的使用(缺乏错误与范围检查,易产生解析歧义)。
|
|
1427
|
+
建议改用 strtol/strtoul/strtod 并检查 errno/端点指针。
|
|
1428
|
+
"""
|
|
1429
|
+
issues: List[Issue] = []
|
|
1430
|
+
for idx, s in enumerate(lines, start=1):
|
|
1431
|
+
if RE_ATOI_FAMILY.search(s):
|
|
1432
|
+
issues.append(
|
|
1433
|
+
Issue(
|
|
1434
|
+
language="c/cpp",
|
|
1435
|
+
category="input_validation",
|
|
1436
|
+
pattern="atoi_family",
|
|
1437
|
+
file=relpath,
|
|
1438
|
+
line=idx,
|
|
1439
|
+
evidence=_strip_line(s),
|
|
1440
|
+
description="使用 atoi/atol/atoll/atof 缺乏错误与范围检查,容易产生解析错误或未定义行为。",
|
|
1441
|
+
suggestion="使用 strtol/strtoul/strtod 等并检查 errno 和 endptr;进行范围与格式校验。",
|
|
1442
|
+
confidence=0.65,
|
|
1443
|
+
severity="medium",
|
|
1444
|
+
)
|
|
1445
|
+
)
|
|
1446
|
+
return issues
|
|
1447
|
+
|
|
1448
|
+
|
|
1449
|
+
def _rule_rand_insecure(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1450
|
+
"""
|
|
1451
|
+
检测 rand/srand 的使用。若上下文包含安全敏感关键词,提升风险。
|
|
1452
|
+
"""
|
|
1453
|
+
issues: List[Issue] = []
|
|
1454
|
+
keywords = ("token", "nonce", "secret", "password", "passwd", "key", "auth", "salt", "session", "otp")
|
|
1455
|
+
for idx, s in enumerate(lines, start=1):
|
|
1456
|
+
if RE_RAND.search(s):
|
|
1457
|
+
conf = 0.55
|
|
1458
|
+
window_text = " ".join(t for _, t in _window(lines, idx, before=1, after=1)).lower()
|
|
1459
|
+
if any(k in window_text for k in keywords):
|
|
1460
|
+
conf += 0.2
|
|
1461
|
+
issues.append(
|
|
1462
|
+
Issue(
|
|
1463
|
+
language="c/cpp",
|
|
1464
|
+
category="crypto",
|
|
1465
|
+
pattern="rand_insecure",
|
|
1466
|
+
file=relpath,
|
|
1467
|
+
line=idx,
|
|
1468
|
+
evidence=_strip_line(s),
|
|
1469
|
+
description="检测到 rand/srand,用于安全敏感场景可能不安全,易被预测。",
|
|
1470
|
+
suggestion="使用系统级 CSPRNG(如 getrandom/arc4random/openssl RAND_bytes),避免用于密钥/令牌生成。",
|
|
1471
|
+
confidence=min(conf, 0.8),
|
|
1472
|
+
severity="high" if conf >= 0.7 else "medium",
|
|
1473
|
+
)
|
|
1474
|
+
)
|
|
1475
|
+
return issues
|
|
1476
|
+
|
|
1477
|
+
|
|
1478
|
+
def _rule_strtok_nonreentrant(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1479
|
+
"""
|
|
1480
|
+
检测 strtok 非重入/线程不安全使用。
|
|
1481
|
+
"""
|
|
1482
|
+
issues: List[Issue] = []
|
|
1483
|
+
for idx, s in enumerate(lines, start=1):
|
|
1484
|
+
if RE_STRTOK.search(s):
|
|
1485
|
+
issues.append(
|
|
1486
|
+
Issue(
|
|
1487
|
+
language="c/cpp",
|
|
1488
|
+
category="thread_safety",
|
|
1489
|
+
pattern="strtok_nonreentrant",
|
|
1490
|
+
file=relpath,
|
|
1491
|
+
line=idx,
|
|
1492
|
+
evidence=_strip_line(s),
|
|
1493
|
+
description="使用 strtok 非重入且线程不安全,可能导致竞态或数据覆盖。",
|
|
1494
|
+
suggestion="使用 strtok_r(POSIX)或可重入/线程安全的分割方案。",
|
|
1495
|
+
confidence=0.6,
|
|
1496
|
+
severity="medium",
|
|
1497
|
+
)
|
|
1498
|
+
)
|
|
1499
|
+
return issues
|
|
1500
|
+
|
|
1501
|
+
|
|
1502
|
+
def _rule_open_permissive_perms(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1503
|
+
"""
|
|
1504
|
+
检测过宽文件权限:
|
|
1505
|
+
- open(..., O_CREAT, 0666/0777/...) 直接授予过宽权限
|
|
1506
|
+
- fopen(..., "w"/"w+") 在安全敏感上下文可提示收紧权限(基于关键词启发)
|
|
1507
|
+
"""
|
|
1508
|
+
issues: List[Issue] = []
|
|
1509
|
+
sensitive_keys = ("key", "secret", "token", "passwd", "password", "cred", "config", "cert", "private", "id_rsa")
|
|
1510
|
+
for idx, s in enumerate(lines, start=1):
|
|
1511
|
+
m = RE_OPEN_PERMISSIVE.search(s)
|
|
1512
|
+
if m:
|
|
1513
|
+
mode = m.group(1)
|
|
1514
|
+
issues.append(
|
|
1515
|
+
Issue(
|
|
1516
|
+
language="c/cpp",
|
|
1517
|
+
category="insecure_permissions",
|
|
1518
|
+
pattern="open_permissive_perms",
|
|
1519
|
+
file=relpath,
|
|
1520
|
+
line=idx,
|
|
1521
|
+
evidence=_strip_line(s),
|
|
1522
|
+
description=f"open 使用 O_CREAT 且权限 {mode} 过宽,存在敏感信息泄露风险。",
|
|
1523
|
+
suggestion="显式使用更严格的权限(如 0600/0640),或设置合适 umask 后再创建文件。",
|
|
1524
|
+
confidence=0.8,
|
|
1525
|
+
severity="high",
|
|
1526
|
+
)
|
|
1527
|
+
)
|
|
1528
|
+
# fopen 模式为写入且上下文敏感时,进行提醒
|
|
1529
|
+
m2 = RE_FOPEN_MODE.search(s)
|
|
1530
|
+
if m2:
|
|
1531
|
+
mode = m2.group(1)
|
|
1532
|
+
if "w" in mode:
|
|
1533
|
+
window = " ".join(t for _, t in _window(lines, idx, before=1, after=1)).lower()
|
|
1534
|
+
if any(k in window for k in sensitive_keys):
|
|
1535
|
+
issues.append(
|
|
1536
|
+
Issue(
|
|
1537
|
+
language="c/cpp",
|
|
1538
|
+
category="insecure_permissions",
|
|
1539
|
+
pattern="fopen_write_sensitive",
|
|
1540
|
+
file=relpath,
|
|
1541
|
+
line=idx,
|
|
1542
|
+
evidence=_strip_line(s),
|
|
1543
|
+
description="fopen 以写入模式操作可能的敏感文件,需确认创建权限与 umask 设置是否足够严格。",
|
|
1544
|
+
suggestion="确认运行态 umask;必要时使用 open+fchmod/umask 控制权限,或以 0600 创建后再放宽。",
|
|
1545
|
+
confidence=0.55,
|
|
1546
|
+
severity="medium",
|
|
1547
|
+
)
|
|
1548
|
+
)
|
|
1549
|
+
return issues
|
|
1550
|
+
|
|
1551
|
+
|
|
1552
|
+
# ---------------------------
|
|
1553
|
+
# 更多危险用法规则(第二批)
|
|
1554
|
+
# ---------------------------
|
|
1555
|
+
|
|
1556
|
+
def _rule_alloca_unbounded(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1557
|
+
"""
|
|
1558
|
+
检测 alloca 使用非常量/未受控大小,可能导致栈耗尽或崩溃。
|
|
1559
|
+
仅在参数非纯数字常量、且不含 sizeof 时告警。
|
|
1560
|
+
"""
|
|
1561
|
+
issues: List[Issue] = []
|
|
1562
|
+
for idx, s in enumerate(lines, start=1):
|
|
1563
|
+
m = RE_ALLOCA.search(s)
|
|
1564
|
+
if not m:
|
|
1565
|
+
continue
|
|
1566
|
+
arg = m.group(1).strip()
|
|
1567
|
+
# 纯数字常量或包含 sizeof 视为更安全
|
|
1568
|
+
if re.fullmatch(r"\d+\s*", arg) or "sizeof" in arg:
|
|
1569
|
+
continue
|
|
1570
|
+
# 宏常量(全大写+下划线/数字)通常为编译期常量,减少误报
|
|
1571
|
+
if re.fullmatch(r"[A-Z_][A-Z0-9_]*", arg):
|
|
1572
|
+
continue
|
|
1573
|
+
conf = 0.6
|
|
1574
|
+
if re.search(r"(len|size|count|n)\b", arg, re.IGNORECASE):
|
|
1575
|
+
conf += 0.1
|
|
1576
|
+
issues.append(
|
|
1577
|
+
Issue(
|
|
1578
|
+
language="c/cpp",
|
|
1579
|
+
category="memory_mgmt",
|
|
1580
|
+
pattern="alloca_unbounded",
|
|
1581
|
+
file=relpath,
|
|
1582
|
+
line=idx,
|
|
1583
|
+
evidence=_strip_line(s),
|
|
1584
|
+
description="alloca 使用的大小不是编译期常量,可能导致未受控的栈分配与崩溃风险。",
|
|
1585
|
+
suggestion="避免使用 alloca;改用堆分配并对大小做上界检查与错误处理。",
|
|
1586
|
+
confidence=min(conf, 0.8),
|
|
1587
|
+
severity="high" if conf >= 0.7 else "medium",
|
|
1588
|
+
)
|
|
1589
|
+
)
|
|
1590
|
+
return issues
|
|
1591
|
+
|
|
1592
|
+
|
|
1593
|
+
def _rule_vla_usage(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1594
|
+
"""
|
|
1595
|
+
检测可变长度数组(VLA)使用:声明中使用变量/表达式作为数组长度。
|
|
1596
|
+
仅在长度非纯数字常量时提示。
|
|
1597
|
+
"""
|
|
1598
|
+
issues: List[Issue] = []
|
|
1599
|
+
type_prefix = re.compile(r"\b(typedef|struct|union|enum|class|const|volatile|static|register|signed|unsigned|char|int|long|short|void|float|double|size_t|ssize_t)\b")
|
|
1600
|
+
for idx, s in enumerate(lines, start=1):
|
|
1601
|
+
if ";" not in s or "=" in s:
|
|
1602
|
+
continue
|
|
1603
|
+
if not type_prefix.search(s):
|
|
1604
|
+
continue
|
|
1605
|
+
m = RE_VLA_DECL.search(s)
|
|
1606
|
+
if not m:
|
|
1607
|
+
continue
|
|
1608
|
+
length_expr = m.group(1).strip()
|
|
1609
|
+
if re.fullmatch(r"\d+\s*", length_expr):
|
|
1610
|
+
continue
|
|
1611
|
+
# 宏常量(全大写+下划线/数字)通常为编译期常量(非 VLA),降低误报
|
|
1612
|
+
if re.fullmatch(r"[A-Z_][A-Z0-9_]*", length_expr):
|
|
1613
|
+
continue
|
|
1614
|
+
issues.append(
|
|
1615
|
+
Issue(
|
|
1616
|
+
language="c/cpp",
|
|
1617
|
+
category="memory_mgmt",
|
|
1618
|
+
pattern="vla_usage",
|
|
1619
|
+
file=relpath,
|
|
1620
|
+
line=idx,
|
|
1621
|
+
evidence=_strip_line(s),
|
|
1622
|
+
description="检测到可变长度数组(VLA),在栈上进行不定大小分配,可能导致栈溢出/不可控内存使用。",
|
|
1623
|
+
suggestion="避免 VLA;改用堆分配并进行上界校验,或使用固定上界的静态分配。",
|
|
1624
|
+
confidence=0.6,
|
|
1625
|
+
severity="medium",
|
|
1626
|
+
)
|
|
1627
|
+
)
|
|
1628
|
+
return issues
|
|
1629
|
+
|
|
1630
|
+
|
|
1631
|
+
def _rule_pthread_returns_unchecked(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1632
|
+
"""
|
|
1633
|
+
检测 pthread 常见接口的返回值未检查的情形(同/后一两行缺少 if/比较判断)。
|
|
1634
|
+
"""
|
|
1635
|
+
issues: List[Issue] = []
|
|
1636
|
+
for idx, s in enumerate(lines, start=1):
|
|
1637
|
+
if not RE_PTHREAD_RET.search(s):
|
|
1638
|
+
continue
|
|
1639
|
+
nearby = " ".join(_safe_line(lines, i) for i in range(idx, min(idx + 2, len(lines)) + 1))
|
|
1640
|
+
if not re.search(r"\bif\s*\(|>=|<=|==|!=|<|>", nearby):
|
|
1641
|
+
issues.append(
|
|
1642
|
+
Issue(
|
|
1643
|
+
language="c/cpp",
|
|
1644
|
+
category="error_handling",
|
|
1645
|
+
pattern="pthread_ret_unchecked",
|
|
1646
|
+
file=relpath,
|
|
1647
|
+
line=idx,
|
|
1648
|
+
evidence=_strip_line(s),
|
|
1649
|
+
description="pthread 接口返回值可能未检查,错误处理缺失可能导致死锁/资源泄漏。",
|
|
1650
|
+
suggestion="检查 pthread 接口返回码并进行错误路径处理;必要时记录日志与清理资源。",
|
|
1651
|
+
confidence=0.6,
|
|
1652
|
+
severity="medium",
|
|
1653
|
+
)
|
|
1654
|
+
)
|
|
1655
|
+
return issues
|
|
1656
|
+
|
|
1657
|
+
|
|
1658
|
+
def _rule_cond_wait_no_loop(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1659
|
+
"""
|
|
1660
|
+
检测 pthread_cond_wait 未在 while 循环中使用(防止虚假唤醒)。
|
|
1661
|
+
准确性优化:
|
|
1662
|
+
- 支持检测“与调用在同一行的 while(predicate) pthread_cond_wait(...)”写法,避免误报
|
|
1663
|
+
"""
|
|
1664
|
+
issues: List[Issue] = []
|
|
1665
|
+
for idx, s in enumerate(lines, start=1):
|
|
1666
|
+
m = RE_PTHREAD_COND_WAIT.search(s)
|
|
1667
|
+
if not m:
|
|
1668
|
+
continue
|
|
1669
|
+
# 回看 2 行内是否有 while( ... )
|
|
1670
|
+
prev_text = " ".join(_safe_line(lines, j) for j in range(max(1, idx - 2), idx))
|
|
1671
|
+
has_prev_while = re.search(r"\bwhile\s*\(", prev_text) is not None
|
|
1672
|
+
# 同一行(调用前半部分)若包含 while(...),也视为正确用法
|
|
1673
|
+
same_line_before = s[: m.start()]
|
|
1674
|
+
has_same_line_while = re.search(r"\bwhile\s*\(", same_line_before) is not None
|
|
1675
|
+
|
|
1676
|
+
if has_prev_while or has_same_line_while:
|
|
1677
|
+
continue
|
|
1678
|
+
|
|
1679
|
+
issues.append(
|
|
1680
|
+
Issue(
|
|
1681
|
+
language="c/cpp",
|
|
1682
|
+
category="thread_safety",
|
|
1683
|
+
pattern="cond_wait_no_loop",
|
|
1684
|
+
file=relpath,
|
|
1685
|
+
line=idx,
|
|
1686
|
+
evidence=_strip_line(s),
|
|
1687
|
+
description="pthread_cond_wait 建议置于条件谓词的 while 循环中,以防止虚假唤醒。",
|
|
1688
|
+
suggestion="使用 while(predicate_not_satisfied) 包裹 pthread_cond_wait 调用并在唤醒后重新检查条件。",
|
|
1689
|
+
confidence=0.6,
|
|
1690
|
+
severity="medium",
|
|
1691
|
+
)
|
|
1692
|
+
)
|
|
1693
|
+
return issues
|
|
1694
|
+
|
|
1695
|
+
|
|
1696
|
+
def _rule_thread_leak_no_join(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1697
|
+
"""
|
|
1698
|
+
检测创建线程后未 join/detach 的可能线程泄漏。
|
|
1699
|
+
"""
|
|
1700
|
+
issues: List[Issue] = []
|
|
1701
|
+
for idx, s in enumerate(lines, start=1):
|
|
1702
|
+
m = RE_PTHREAD_CREATE.search(s)
|
|
1703
|
+
if not m:
|
|
1704
|
+
continue
|
|
1705
|
+
tid = m.group(1)
|
|
1706
|
+
end = min(len(lines), idx + 80)
|
|
1707
|
+
joined_or_detached = False
|
|
1708
|
+
for j in range(idx + 1, end + 1):
|
|
1709
|
+
sj = _safe_line(lines, j)
|
|
1710
|
+
m_join = RE_PTHREAD_JOIN.search(sj)
|
|
1711
|
+
if m_join and m_join.group(1) == tid:
|
|
1712
|
+
joined_or_detached = True
|
|
1713
|
+
break
|
|
1714
|
+
m_detach = RE_PTHREAD_DETACH.search(sj)
|
|
1715
|
+
if m_detach and m_detach.group(1) == tid:
|
|
1716
|
+
joined_or_detached = True
|
|
1717
|
+
break
|
|
1718
|
+
if not joined_or_detached:
|
|
1719
|
+
issues.append(
|
|
1720
|
+
Issue(
|
|
1721
|
+
language="c/cpp",
|
|
1722
|
+
category="resource_leak",
|
|
1723
|
+
pattern="thread_leak_no_join",
|
|
1724
|
+
file=relpath,
|
|
1725
|
+
line=idx,
|
|
1726
|
+
evidence=_strip_line(s),
|
|
1727
|
+
description=f"pthread_create 创建线程 {tid} 后的邻近窗口内未检测到 join/detach,可能导致线程泄漏或资源占用。",
|
|
1728
|
+
suggestion="确保创建的线程被显式 join 或 detach;遵循统一的线程生命周期管理策略。",
|
|
1729
|
+
confidence=0.6,
|
|
1730
|
+
severity="medium",
|
|
1731
|
+
)
|
|
1732
|
+
)
|
|
1733
|
+
return issues
|
|
1734
|
+
|
|
1735
|
+
|
|
1736
|
+
def _rule_inet_legacy(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1737
|
+
"""
|
|
1738
|
+
检测 inet_addr/inet_aton 等旧接口的使用。
|
|
1739
|
+
"""
|
|
1740
|
+
issues: List[Issue] = []
|
|
1741
|
+
for idx, s in enumerate(lines, start=1):
|
|
1742
|
+
if RE_INET_LEGACY.search(s):
|
|
1743
|
+
issues.append(
|
|
1744
|
+
Issue(
|
|
1745
|
+
language="c/cpp",
|
|
1746
|
+
category="network_api",
|
|
1747
|
+
pattern="inet_legacy",
|
|
1748
|
+
file=relpath,
|
|
1749
|
+
line=idx,
|
|
1750
|
+
evidence=_strip_line(s),
|
|
1751
|
+
description="使用 inet_addr/inet_aton 等旧接口,错误语义模糊/不一致。",
|
|
1752
|
+
suggestion="使用 inet_pton/inet_ntop 进行地址转换,错误处理更可靠且支持 IPv6。",
|
|
1753
|
+
confidence=0.6,
|
|
1754
|
+
severity="low",
|
|
1755
|
+
)
|
|
1756
|
+
)
|
|
1757
|
+
return issues
|
|
1758
|
+
|
|
1759
|
+
|
|
1760
|
+
def _rule_time_apis_not_threadsafe(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1761
|
+
"""
|
|
1762
|
+
检测 asctime/ctime/localtime/gmtime 非线程安全接口(非 *_r)。
|
|
1763
|
+
"""
|
|
1764
|
+
issues: List[Issue] = []
|
|
1765
|
+
for idx, s in enumerate(lines, start=1):
|
|
1766
|
+
# 排除 *_r 版本
|
|
1767
|
+
if RE_TIME_UNSAFE.search(s) and not re.search(r"_r\s*\(", s):
|
|
1768
|
+
issues.append(
|
|
1769
|
+
Issue(
|
|
1770
|
+
language="c/cpp",
|
|
1771
|
+
category="thread_safety",
|
|
1772
|
+
pattern="time_api_not_threadsafe",
|
|
1773
|
+
file=relpath,
|
|
1774
|
+
line=idx,
|
|
1775
|
+
evidence=_strip_line(s),
|
|
1776
|
+
description="使用 asctime/ctime/localtime/gmtime 等非重入接口,线程安全性不足。",
|
|
1777
|
+
suggestion="改用 *_r 线程安全版本(如 localtime_r/gmtime_r/ctime_r)。",
|
|
1778
|
+
confidence=0.6,
|
|
1779
|
+
severity="medium",
|
|
1780
|
+
)
|
|
1781
|
+
)
|
|
1782
|
+
return issues
|
|
1783
|
+
|
|
1784
|
+
|
|
1785
|
+
def _rule_getenv_unchecked(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1786
|
+
"""
|
|
1787
|
+
检测 getenv 使用(环境变量未校验可能导致配置/路径/命令注入风险)。
|
|
1788
|
+
"""
|
|
1789
|
+
issues: List[Issue] = []
|
|
1790
|
+
for idx, s in enumerate(lines, start=1):
|
|
1791
|
+
if RE_GETENV.search(s):
|
|
1792
|
+
issues.append(
|
|
1793
|
+
Issue(
|
|
1794
|
+
language="c/cpp",
|
|
1795
|
+
category="input_validation",
|
|
1796
|
+
pattern="getenv_unchecked",
|
|
1797
|
+
file=relpath,
|
|
1798
|
+
line=idx,
|
|
1799
|
+
evidence=_strip_line(s),
|
|
1800
|
+
description="读取环境变量后未见显式校验,可能被用于构造路径/命令等引入安全风险。",
|
|
1801
|
+
suggestion="对白名单键进行读取;对取值执行格式/长度/字符集校验;避免直接拼接为命令/路径。",
|
|
1802
|
+
confidence=0.55,
|
|
1803
|
+
severity="medium",
|
|
1804
|
+
)
|
|
1805
|
+
)
|
|
1806
|
+
return issues
|
|
1807
|
+
|
|
1808
|
+
|
|
1809
|
+
# ---------------------------
|
|
1810
|
+
# C++ 特定检查规则
|
|
1811
|
+
# ---------------------------
|
|
1812
|
+
|
|
1813
|
+
def _rule_new_delete_mismatch(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1814
|
+
"""
|
|
1815
|
+
检测 new[]/delete[] 和 new/delete 的匹配问题:
|
|
1816
|
+
- new[] 必须用 delete[] 释放
|
|
1817
|
+
- new 必须用 delete 释放(不能用 delete[])
|
|
1818
|
+
"""
|
|
1819
|
+
issues: List[Issue] = []
|
|
1820
|
+
new_array_vars: dict[str, int] = {} # var -> line_no
|
|
1821
|
+
new_vars: dict[str, int] = {} # var -> line_no
|
|
1822
|
+
|
|
1823
|
+
# 收集 new[] 和 new 的分配
|
|
1824
|
+
for idx, s in enumerate(lines, start=1):
|
|
1825
|
+
# new[] 分配
|
|
1826
|
+
m = RE_NEW_ARRAY.search(s)
|
|
1827
|
+
if m:
|
|
1828
|
+
# 尝试提取变量名(简单启发式)
|
|
1829
|
+
assign_match = re.search(r"\b([A-Za-z_]\w*)\s*=\s*new\s+", s, re.IGNORECASE)
|
|
1830
|
+
if assign_match:
|
|
1831
|
+
var = assign_match.group(1)
|
|
1832
|
+
new_array_vars[var] = idx
|
|
1833
|
+
|
|
1834
|
+
# new 分配(非数组)
|
|
1835
|
+
m_new = re.search(r"\b([A-Za-z_]\w*)\s*=\s*new\s+(?!.*\[)", s, re.IGNORECASE)
|
|
1836
|
+
if m_new:
|
|
1837
|
+
var = m_new.group(1)
|
|
1838
|
+
new_vars[var] = idx
|
|
1839
|
+
|
|
1840
|
+
# 检查 delete[] 和 delete 的使用
|
|
1841
|
+
for idx, s in enumerate(lines, start=1):
|
|
1842
|
+
# delete[] 使用
|
|
1843
|
+
if RE_DELETE_ARRAY.search(s):
|
|
1844
|
+
# 提取变量名
|
|
1845
|
+
m = re.search(r"delete\s*\[\s*\]\s*([A-Za-z_]\w*)", s, re.IGNORECASE)
|
|
1846
|
+
if m:
|
|
1847
|
+
var = m.group(1)
|
|
1848
|
+
if var in new_vars:
|
|
1849
|
+
# 用 delete[] 释放了 new 分配的内存
|
|
1850
|
+
issues.append(
|
|
1851
|
+
Issue(
|
|
1852
|
+
language="c/cpp",
|
|
1853
|
+
category="memory_mgmt",
|
|
1854
|
+
pattern="delete_array_mismatch",
|
|
1855
|
+
file=relpath,
|
|
1856
|
+
line=idx,
|
|
1857
|
+
evidence=_strip_line(s),
|
|
1858
|
+
description="使用 delete[] 释放由 new 分配的内存(非数组),存在未定义行为风险。",
|
|
1859
|
+
suggestion="new 分配的内存应使用 delete 释放;new[] 分配的内存应使用 delete[] 释放。",
|
|
1860
|
+
confidence=0.85,
|
|
1861
|
+
severity="high",
|
|
1862
|
+
)
|
|
1863
|
+
)
|
|
1864
|
+
|
|
1865
|
+
# delete 使用(非数组)
|
|
1866
|
+
if RE_DELETE.search(s):
|
|
1867
|
+
m = re.search(r"delete\s+([A-Za-z_]\w*)", s, re.IGNORECASE)
|
|
1868
|
+
if m:
|
|
1869
|
+
var = m.group(1)
|
|
1870
|
+
if var in new_array_vars:
|
|
1871
|
+
# 用 delete 释放了 new[] 分配的内存
|
|
1872
|
+
issues.append(
|
|
1873
|
+
Issue(
|
|
1874
|
+
language="c/cpp",
|
|
1875
|
+
category="memory_mgmt",
|
|
1876
|
+
pattern="delete_mismatch",
|
|
1877
|
+
file=relpath,
|
|
1878
|
+
line=idx,
|
|
1879
|
+
evidence=_strip_line(s),
|
|
1880
|
+
description="使用 delete 释放由 new[] 分配的数组内存,存在未定义行为风险。",
|
|
1881
|
+
suggestion="new[] 分配的内存应使用 delete[] 释放;new 分配的内存应使用 delete 释放。",
|
|
1882
|
+
confidence=0.85,
|
|
1883
|
+
severity="high",
|
|
1884
|
+
)
|
|
1885
|
+
)
|
|
1886
|
+
|
|
1887
|
+
return issues
|
|
1888
|
+
|
|
1889
|
+
|
|
1890
|
+
def _rule_reinterpret_cast_unsafe(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1891
|
+
"""
|
|
1892
|
+
检测 reinterpret_cast 的不安全使用(高风险类型转换)。
|
|
1893
|
+
"""
|
|
1894
|
+
issues: List[Issue] = []
|
|
1895
|
+
for idx, s in enumerate(lines, start=1):
|
|
1896
|
+
if RE_REINTERPRET_CAST.search(s):
|
|
1897
|
+
conf = 0.7
|
|
1898
|
+
# 如果转换为指针类型,风险更高
|
|
1899
|
+
if "->" in s or "*" in s:
|
|
1900
|
+
conf += 0.1
|
|
1901
|
+
issues.append(
|
|
1902
|
+
Issue(
|
|
1903
|
+
language="c/cpp",
|
|
1904
|
+
category="type_safety",
|
|
1905
|
+
pattern="reinterpret_cast_unsafe",
|
|
1906
|
+
file=relpath,
|
|
1907
|
+
line=idx,
|
|
1908
|
+
evidence=_strip_line(s),
|
|
1909
|
+
description="使用 reinterpret_cast 进行类型转换,可能导致未定义行为或类型安全问题。",
|
|
1910
|
+
suggestion="优先使用 static_cast 或 dynamic_cast;若必须使用 reinterpret_cast,需确保类型布局兼容并添加详细注释说明。",
|
|
1911
|
+
confidence=min(conf, 0.9),
|
|
1912
|
+
severity="high",
|
|
1913
|
+
)
|
|
1914
|
+
)
|
|
1915
|
+
return issues
|
|
1916
|
+
|
|
1917
|
+
|
|
1918
|
+
def _rule_const_cast_unsafe(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1919
|
+
"""
|
|
1920
|
+
检测 const_cast 的不安全使用(移除 const 修饰符可能导致未定义行为)。
|
|
1921
|
+
"""
|
|
1922
|
+
issues: List[Issue] = []
|
|
1923
|
+
for idx, s in enumerate(lines, start=1):
|
|
1924
|
+
if RE_CONST_CAST.search(s):
|
|
1925
|
+
conf = 0.65
|
|
1926
|
+
# 如果通过 const_cast 修改原本为 const 的对象,风险更高
|
|
1927
|
+
if "=" in s and not re.search(r"const\s+[A-Za-z_]\w*\s*\*", s):
|
|
1928
|
+
conf += 0.1
|
|
1929
|
+
issues.append(
|
|
1930
|
+
Issue(
|
|
1931
|
+
language="c/cpp",
|
|
1932
|
+
category="type_safety",
|
|
1933
|
+
pattern="const_cast_unsafe",
|
|
1934
|
+
file=relpath,
|
|
1935
|
+
line=idx,
|
|
1936
|
+
evidence=_strip_line(s),
|
|
1937
|
+
description="使用 const_cast 移除 const 修饰符,可能导致未定义行为(如修改常量对象)。",
|
|
1938
|
+
suggestion="避免使用 const_cast;若必须使用,确保仅用于移除非底层 const 且对象本身可变。",
|
|
1939
|
+
confidence=min(conf, 0.8),
|
|
1940
|
+
severity="high",
|
|
1941
|
+
)
|
|
1942
|
+
)
|
|
1943
|
+
return issues
|
|
1944
|
+
|
|
1945
|
+
|
|
1946
|
+
def _rule_vector_string_bounds_check(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
1947
|
+
"""
|
|
1948
|
+
检测 vector 和 string 的越界访问(使用 [] 而非 .at())。
|
|
1949
|
+
启发式:检测 [] 访问,若附近未见边界检查,则提示风险。
|
|
1950
|
+
"""
|
|
1951
|
+
issues: List[Issue] = []
|
|
1952
|
+
vector_vars: set[str] = set()
|
|
1953
|
+
string_vars: set[str] = set()
|
|
1954
|
+
|
|
1955
|
+
# 先收集 vector 和 string 变量
|
|
1956
|
+
for idx, s in enumerate(lines, start=1):
|
|
1957
|
+
m = RE_VECTOR_VAR.search(s)
|
|
1958
|
+
if m:
|
|
1959
|
+
vector_vars.add(m.group(1))
|
|
1960
|
+
m = RE_STRING_VAR.search(s)
|
|
1961
|
+
if m:
|
|
1962
|
+
string_vars.add(m.group(1))
|
|
1963
|
+
|
|
1964
|
+
for idx, s in enumerate(lines, start=1):
|
|
1965
|
+
# vector 访问:检测 var[...] 模式
|
|
1966
|
+
for var in vector_vars:
|
|
1967
|
+
if re.search(rf"\b{re.escape(var)}\s*\[", s):
|
|
1968
|
+
# 检查是否使用了 .at()(安全访问)
|
|
1969
|
+
if not RE_AT_METHOD.search(s):
|
|
1970
|
+
# 检查附近是否有边界检查
|
|
1971
|
+
window_text = " ".join(t for _, t in _window(lines, idx, before=2, after=2))
|
|
1972
|
+
if not re.search(rf"\b{re.escape(var)}\s*\.(size|length|empty|at)\s*\(", window_text, re.IGNORECASE):
|
|
1973
|
+
issues.append(
|
|
1974
|
+
Issue(
|
|
1975
|
+
language="c/cpp",
|
|
1976
|
+
category="buffer_overflow",
|
|
1977
|
+
pattern="vector_bounds_check",
|
|
1978
|
+
file=relpath,
|
|
1979
|
+
line=idx,
|
|
1980
|
+
evidence=_strip_line(s),
|
|
1981
|
+
description=f"vector {var} 使用 [] 访问可能越界,建议使用 .at() 进行边界检查。",
|
|
1982
|
+
suggestion="使用 .at() 方法进行安全访问,或在使用 [] 前显式检查索引范围。",
|
|
1983
|
+
confidence=0.6,
|
|
1984
|
+
severity="medium",
|
|
1985
|
+
)
|
|
1986
|
+
)
|
|
1987
|
+
break # 每行只报告一次
|
|
1988
|
+
|
|
1989
|
+
# string 访问:检测 var[...] 模式
|
|
1990
|
+
for var in string_vars:
|
|
1991
|
+
if re.search(rf"\b{re.escape(var)}\s*\[", s):
|
|
1992
|
+
if not RE_AT_METHOD.search(s):
|
|
1993
|
+
window_text = " ".join(t for _, t in _window(lines, idx, before=2, after=2))
|
|
1994
|
+
if not re.search(rf"\b{re.escape(var)}\s*\.(size|length|empty|at)\s*\(", window_text, re.IGNORECASE):
|
|
1995
|
+
issues.append(
|
|
1996
|
+
Issue(
|
|
1997
|
+
language="c/cpp",
|
|
1998
|
+
category="buffer_overflow",
|
|
1999
|
+
pattern="string_bounds_check",
|
|
2000
|
+
file=relpath,
|
|
2001
|
+
line=idx,
|
|
2002
|
+
evidence=_strip_line(s),
|
|
2003
|
+
description=f"string {var} 使用 [] 访问可能越界,建议使用 .at() 进行边界检查。",
|
|
2004
|
+
suggestion="使用 .at() 方法进行安全访问,或在使用 [] 前显式检查索引范围。",
|
|
2005
|
+
confidence=0.6,
|
|
2006
|
+
severity="medium",
|
|
2007
|
+
)
|
|
2008
|
+
)
|
|
2009
|
+
break # 每行只报告一次
|
|
2010
|
+
return issues
|
|
2011
|
+
|
|
2012
|
+
|
|
2013
|
+
def _rule_missing_virtual_dtor(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
2014
|
+
"""
|
|
2015
|
+
检测基类缺少虚析构函数的问题。
|
|
2016
|
+
启发式:检测 class 声明,若存在虚函数但析构函数非虚,则提示。
|
|
2017
|
+
"""
|
|
2018
|
+
issues: List[Issue] = []
|
|
2019
|
+
classes: dict[str, dict] = {} # class_name -> {"line": int, "has_virtual": bool, "has_virtual_dtor": bool}
|
|
2020
|
+
current_class: Optional[str] = None
|
|
2021
|
+
in_class = False
|
|
2022
|
+
brace_depth = 0
|
|
2023
|
+
|
|
2024
|
+
for idx, s in enumerate(lines, start=1):
|
|
2025
|
+
# 检测 class 声明
|
|
2026
|
+
m_class = RE_CLASS_DECL.search(s)
|
|
2027
|
+
if m_class:
|
|
2028
|
+
class_name = m_class.group(1)
|
|
2029
|
+
classes[class_name] = {"line": idx, "has_virtual": False, "has_virtual_dtor": False}
|
|
2030
|
+
current_class = class_name
|
|
2031
|
+
in_class = True
|
|
2032
|
+
brace_depth = s.count("{") - s.count("}")
|
|
2033
|
+
continue
|
|
2034
|
+
|
|
2035
|
+
if in_class and current_class:
|
|
2036
|
+
brace_depth += s.count("{") - s.count("}")
|
|
2037
|
+
if brace_depth <= 0:
|
|
2038
|
+
in_class = False
|
|
2039
|
+
current_class = None
|
|
2040
|
+
continue
|
|
2041
|
+
|
|
2042
|
+
# 检测虚函数
|
|
2043
|
+
if re.search(r"\bvirtual\s+[^~]", s, re.IGNORECASE):
|
|
2044
|
+
classes[current_class]["has_virtual"] = True
|
|
2045
|
+
|
|
2046
|
+
# 检测虚析构函数
|
|
2047
|
+
if RE_VIRTUAL_DTOR.search(s):
|
|
2048
|
+
classes[current_class]["has_virtual_dtor"] = True
|
|
2049
|
+
|
|
2050
|
+
# 检查有虚函数但无虚析构函数的类
|
|
2051
|
+
for class_name, info in classes.items():
|
|
2052
|
+
if info["has_virtual"] and not info["has_virtual_dtor"]:
|
|
2053
|
+
issues.append(
|
|
2054
|
+
Issue(
|
|
2055
|
+
language="c/cpp",
|
|
2056
|
+
category="memory_mgmt",
|
|
2057
|
+
pattern="missing_virtual_dtor",
|
|
2058
|
+
file=relpath,
|
|
2059
|
+
line=info["line"],
|
|
2060
|
+
evidence=_strip_line(_safe_line(lines, info["line"])),
|
|
2061
|
+
description=f"类 {class_name} 包含虚函数但析构函数非虚,通过基类指针删除派生类对象可能导致未定义行为。",
|
|
2062
|
+
suggestion="为基类添加虚析构函数,确保通过基类指针删除派生类对象时正确调用派生类析构函数。",
|
|
2063
|
+
confidence=0.75,
|
|
2064
|
+
severity="high",
|
|
2065
|
+
)
|
|
2066
|
+
)
|
|
2067
|
+
|
|
2068
|
+
return issues
|
|
2069
|
+
|
|
2070
|
+
|
|
2071
|
+
def _rule_move_after_use(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
2072
|
+
"""
|
|
2073
|
+
检测移动后使用的风险:对象被 std::move 后仍被使用。
|
|
2074
|
+
"""
|
|
2075
|
+
issues: List[Issue] = []
|
|
2076
|
+
moved_vars: dict[str, int] = {} # var -> line_no
|
|
2077
|
+
|
|
2078
|
+
for idx, s in enumerate(lines, start=1):
|
|
2079
|
+
# 检测 std::move 赋值
|
|
2080
|
+
m = RE_MOVE_ASSIGN.search(s)
|
|
2081
|
+
if m:
|
|
2082
|
+
var = m.group(1)
|
|
2083
|
+
moved_vars[var] = idx
|
|
2084
|
+
|
|
2085
|
+
# 检测移动后的使用
|
|
2086
|
+
vars_to_remove: set[str] = set() # 收集要删除的键,避免在遍历时修改字典
|
|
2087
|
+
for var, move_line in moved_vars.items():
|
|
2088
|
+
if idx > move_line and idx <= move_line + 10: # 在移动后 10 行内
|
|
2089
|
+
# 检测变量使用(排除重新赋值)
|
|
2090
|
+
if re.search(rf"\b{re.escape(var)}\b", s) and not re.search(rf"\b{re.escape(var)}\s*=", s):
|
|
2091
|
+
# 检查是否是重新赋值(重置移动状态)
|
|
2092
|
+
if re.search(rf"\b{re.escape(var)}\s*=\s*(?!std::move)", s):
|
|
2093
|
+
# 重新赋值,移除记录
|
|
2094
|
+
vars_to_remove.add(var)
|
|
2095
|
+
else:
|
|
2096
|
+
# 可能是使用
|
|
2097
|
+
if re.search(rf"\b{re.escape(var)}\s*(->|\[|\.|\(|,)", s):
|
|
2098
|
+
issues.append(
|
|
2099
|
+
Issue(
|
|
2100
|
+
language="c/cpp",
|
|
2101
|
+
category="memory_mgmt",
|
|
2102
|
+
pattern="move_after_use",
|
|
2103
|
+
file=relpath,
|
|
2104
|
+
line=idx,
|
|
2105
|
+
evidence=_strip_line(s),
|
|
2106
|
+
description=f"变量 {var} 在 std::move 后仍被使用,移动后的对象处于有效但未指定状态,可能导致未定义行为。",
|
|
2107
|
+
suggestion="移动后的对象不应再使用,除非重新赋值;考虑使用移动语义后立即停止使用该对象。",
|
|
2108
|
+
confidence=0.7,
|
|
2109
|
+
severity="high",
|
|
2110
|
+
)
|
|
2111
|
+
)
|
|
2112
|
+
# 移除记录,避免重复报告
|
|
2113
|
+
vars_to_remove.add(var)
|
|
2114
|
+
|
|
2115
|
+
# 遍历结束后再删除
|
|
2116
|
+
for var in vars_to_remove:
|
|
2117
|
+
moved_vars.pop(var, None)
|
|
2118
|
+
|
|
2119
|
+
return issues
|
|
2120
|
+
|
|
2121
|
+
|
|
2122
|
+
def _rule_uncaught_exception(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
2123
|
+
"""
|
|
2124
|
+
检测可能未捕获的异常:throw 语句附近未见 try-catch。
|
|
2125
|
+
"""
|
|
2126
|
+
issues: List[Issue] = []
|
|
2127
|
+
for idx, s in enumerate(lines, start=1):
|
|
2128
|
+
if RE_THROW.search(s):
|
|
2129
|
+
# 检查附近是否有 try-catch
|
|
2130
|
+
window_text = " ".join(t for _, t in _window(lines, idx, before=10, after=10))
|
|
2131
|
+
has_try = RE_TRY.search(window_text) is not None
|
|
2132
|
+
has_catch = RE_CATCH.search(window_text) is not None
|
|
2133
|
+
|
|
2134
|
+
if not (has_try and has_catch):
|
|
2135
|
+
conf = 0.6
|
|
2136
|
+
# 如果在 noexcept 函数中抛出异常,风险更高
|
|
2137
|
+
prev_text = " ".join(t for _, t in _window(lines, idx, before=5, after=0))
|
|
2138
|
+
if RE_NOEXCEPT.search(prev_text):
|
|
2139
|
+
conf += 0.2
|
|
2140
|
+
|
|
2141
|
+
issues.append(
|
|
2142
|
+
Issue(
|
|
2143
|
+
language="c/cpp",
|
|
2144
|
+
category="error_handling",
|
|
2145
|
+
pattern="uncaught_exception",
|
|
2146
|
+
file=relpath,
|
|
2147
|
+
line=idx,
|
|
2148
|
+
evidence=_strip_line(s),
|
|
2149
|
+
description="检测到 throw 语句,但附近未见 try-catch 块,可能导致未捕获异常。",
|
|
2150
|
+
suggestion="确保异常在适当的作用域内被捕获;考虑使用 RAII 确保资源在异常时正确释放。",
|
|
2151
|
+
confidence=min(conf, 0.85),
|
|
2152
|
+
severity="high" if conf >= 0.8 else "medium",
|
|
2153
|
+
)
|
|
2154
|
+
)
|
|
2155
|
+
return issues
|
|
2156
|
+
|
|
2157
|
+
|
|
2158
|
+
def _rule_smart_ptr_cycle(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
2159
|
+
"""
|
|
2160
|
+
检测智能指针可能的循环引用问题(启发式)。
|
|
2161
|
+
注意:完全检测循环引用需要图分析,这里仅做简单启发式检测。
|
|
2162
|
+
"""
|
|
2163
|
+
issues: List[Issue] = []
|
|
2164
|
+
shared_ptr_vars: set[str] = set()
|
|
2165
|
+
|
|
2166
|
+
for idx, s in enumerate(lines, start=1):
|
|
2167
|
+
# 收集 shared_ptr 变量
|
|
2168
|
+
if RE_SHARED_PTR.search(s):
|
|
2169
|
+
m = RE_SMART_PTR_ASSIGN.search(s)
|
|
2170
|
+
if m:
|
|
2171
|
+
var = m.group(1)
|
|
2172
|
+
shared_ptr_vars.add(var)
|
|
2173
|
+
|
|
2174
|
+
# 检测 shared_ptr 之间的相互引用(简单启发式)
|
|
2175
|
+
if RE_SHARED_PTR.search(s) and shared_ptr_vars:
|
|
2176
|
+
# 检查是否在 shared_ptr 初始化中使用了另一个 shared_ptr
|
|
2177
|
+
for var in shared_ptr_vars:
|
|
2178
|
+
if re.search(rf"\b{re.escape(var)}\b", s) and "make_shared" in s.lower():
|
|
2179
|
+
# 简单启发:如果两个 shared_ptr 相互引用,可能存在循环
|
|
2180
|
+
# 这里仅做提示,实际需要更复杂的分析
|
|
2181
|
+
pass
|
|
2182
|
+
|
|
2183
|
+
# 检测 weak_ptr 的使用(通常用于打破循环引用)
|
|
2184
|
+
has_weak_ptr = False
|
|
2185
|
+
for idx, s in enumerate(lines, start=1):
|
|
2186
|
+
if RE_WEAK_PTR.search(s):
|
|
2187
|
+
has_weak_ptr = True
|
|
2188
|
+
break
|
|
2189
|
+
|
|
2190
|
+
# 如果大量使用 shared_ptr 但未见 weak_ptr,提示可能的循环引用风险
|
|
2191
|
+
if len(shared_ptr_vars) > 3 and not has_weak_ptr:
|
|
2192
|
+
# 在第一个 shared_ptr 使用处提示
|
|
2193
|
+
for idx, s in enumerate(lines, start=1):
|
|
2194
|
+
if RE_SHARED_PTR.search(s):
|
|
2195
|
+
issues.append(
|
|
2196
|
+
Issue(
|
|
2197
|
+
language="c/cpp",
|
|
2198
|
+
category="memory_mgmt",
|
|
2199
|
+
pattern="smart_ptr_cycle_risk",
|
|
2200
|
+
file=relpath,
|
|
2201
|
+
line=idx,
|
|
2202
|
+
evidence=_strip_line(s),
|
|
2203
|
+
description="检测到多个 shared_ptr 使用但未见 weak_ptr,可能存在循环引用导致内存泄漏的风险。",
|
|
2204
|
+
suggestion="检查对象间的引用关系,必要时使用 weak_ptr 打破循环引用;考虑使用 unique_ptr 替代 shared_ptr 以明确所有权。",
|
|
2205
|
+
confidence=0.5,
|
|
2206
|
+
severity="medium",
|
|
2207
|
+
)
|
|
2208
|
+
)
|
|
2209
|
+
break
|
|
2210
|
+
|
|
2211
|
+
return issues
|
|
2212
|
+
|
|
2213
|
+
|
|
2214
|
+
def _rule_cpp_deadlock_patterns(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
2215
|
+
"""
|
|
2216
|
+
检测 C++ 标准库(std::mutex)相关的死锁风险:
|
|
2217
|
+
- 双重加锁:同一 mutex 在未解锁情况下再次加锁
|
|
2218
|
+
- 可能缺失解锁:lock() 后在后续窗口内未看到对应 unlock()
|
|
2219
|
+
- 锁顺序反转:存在 (A->B) 与 (B->A) 两种加锁顺序
|
|
2220
|
+
- 未使用 std::lock/scoped_lock:手动锁定多个 mutex 时未使用死锁避免机制
|
|
2221
|
+
实现基于启发式,可能产生误报。
|
|
2222
|
+
"""
|
|
2223
|
+
issues: List[Issue] = []
|
|
2224
|
+
lock_stack: list[str] = [] # 当前持有的锁栈
|
|
2225
|
+
order_pairs: dict[tuple[str, str], int] = {} # 加锁顺序对 -> 行号
|
|
2226
|
+
mutex_vars: set[str] = set() # 所有 mutex 变量名
|
|
2227
|
+
|
|
2228
|
+
# 先收集所有 mutex 变量
|
|
2229
|
+
for idx, s in enumerate(lines, start=1):
|
|
2230
|
+
m = RE_STD_MUTEX.search(s)
|
|
2231
|
+
if m:
|
|
2232
|
+
mutex_vars.add(m.group(1))
|
|
2233
|
+
|
|
2234
|
+
# 扫描加锁/解锁操作
|
|
2235
|
+
for idx, s in enumerate(lines, start=1):
|
|
2236
|
+
# 检测 lock() 调用
|
|
2237
|
+
m_lock = RE_MUTEX_LOCK.search(s)
|
|
2238
|
+
if m_lock:
|
|
2239
|
+
mtx = m_lock.group(1)
|
|
2240
|
+
if mtx in mutex_vars:
|
|
2241
|
+
# 双重加锁检测
|
|
2242
|
+
if mtx in lock_stack:
|
|
2243
|
+
issues.append(
|
|
2244
|
+
Issue(
|
|
2245
|
+
language="c/cpp",
|
|
2246
|
+
category="error_handling",
|
|
2247
|
+
pattern="cpp_double_lock",
|
|
2248
|
+
file=relpath,
|
|
2249
|
+
line=idx,
|
|
2250
|
+
evidence=_strip_line(s),
|
|
2251
|
+
description=f"mutex {mtx} 在未解锁的情况下被再次加锁,存在死锁风险。",
|
|
2252
|
+
suggestion="避免对同一 mutex 重复加锁;考虑使用 std::recursive_mutex 或重构代码避免嵌套加锁。",
|
|
2253
|
+
confidence=0.8,
|
|
2254
|
+
severity="high",
|
|
2255
|
+
)
|
|
2256
|
+
)
|
|
2257
|
+
# 锁顺序记录
|
|
2258
|
+
if lock_stack and lock_stack[-1] != mtx:
|
|
2259
|
+
pair = (lock_stack[-1], mtx)
|
|
2260
|
+
order_pairs.setdefault(pair, idx)
|
|
2261
|
+
lock_stack.append(mtx)
|
|
2262
|
+
|
|
2263
|
+
# 检测 unlock() 调用
|
|
2264
|
+
m_unlock = RE_MUTEX_UNLOCK.search(s)
|
|
2265
|
+
if m_unlock:
|
|
2266
|
+
mtx = m_unlock.group(1)
|
|
2267
|
+
if mtx in mutex_vars and mtx in lock_stack:
|
|
2268
|
+
# 从栈中移除最近的相同锁
|
|
2269
|
+
for k in range(len(lock_stack) - 1, -1, -1):
|
|
2270
|
+
if lock_stack[k] == mtx:
|
|
2271
|
+
del lock_stack[k]
|
|
2272
|
+
break
|
|
2273
|
+
|
|
2274
|
+
# 检测 lock_guard/unique_lock(RAII,自动解锁,通常更安全)
|
|
2275
|
+
RE_LOCK_GUARD.search(s) or RE_UNIQUE_LOCK.search(s) or RE_SHARED_LOCK.search(s)
|
|
2276
|
+
|
|
2277
|
+
# 检测 std::lock 或 scoped_lock(死锁避免机制)
|
|
2278
|
+
has_safe_lock = RE_STD_LOCK.search(s) or RE_SCOPED_LOCK.search(s)
|
|
2279
|
+
|
|
2280
|
+
# 粗略按作用域结束重置
|
|
2281
|
+
if "}" in s and not has_safe_lock:
|
|
2282
|
+
# 如果作用域结束且栈中还有锁,可能是问题(但可能是 RAII 锁,所以降低置信度)
|
|
2283
|
+
if lock_stack:
|
|
2284
|
+
# 这里不直接报错,因为可能是 RAII 锁
|
|
2285
|
+
pass
|
|
2286
|
+
|
|
2287
|
+
# 检测手动锁定多个 mutex 但未使用 std::lock
|
|
2288
|
+
if m_lock and len(lock_stack) > 1 and not has_safe_lock:
|
|
2289
|
+
# 在锁定第二个 mutex 时,如果之前已持有锁且未使用 std::lock,提示风险
|
|
2290
|
+
if idx > 1:
|
|
2291
|
+
prev_text = " ".join(_safe_line(lines, j) for j in range(max(1, idx - 3), idx))
|
|
2292
|
+
if not RE_STD_LOCK.search(prev_text) and not RE_SCOPED_LOCK.search(prev_text):
|
|
2293
|
+
issues.append(
|
|
2294
|
+
Issue(
|
|
2295
|
+
language="c/cpp",
|
|
2296
|
+
category="error_handling",
|
|
2297
|
+
pattern="cpp_multiple_lock_unsafe",
|
|
2298
|
+
file=relpath,
|
|
2299
|
+
line=idx,
|
|
2300
|
+
evidence=_strip_line(s),
|
|
2301
|
+
description="检测到手动锁定多个 mutex 但未使用 std::lock 或 std::scoped_lock,存在死锁风险。",
|
|
2302
|
+
suggestion="使用 std::lock 或 std::scoped_lock 同时锁定多个 mutex,可避免死锁;或统一加锁顺序。",
|
|
2303
|
+
confidence=0.65,
|
|
2304
|
+
severity="high",
|
|
2305
|
+
)
|
|
2306
|
+
)
|
|
2307
|
+
|
|
2308
|
+
# 锁顺序反转检测
|
|
2309
|
+
for (a, b), ln in order_pairs.items():
|
|
2310
|
+
if (b, a) in order_pairs:
|
|
2311
|
+
issues.append(
|
|
2312
|
+
Issue(
|
|
2313
|
+
language="c/cpp",
|
|
2314
|
+
category="error_handling",
|
|
2315
|
+
pattern="cpp_lock_order_inversion",
|
|
2316
|
+
file=relpath,
|
|
2317
|
+
line=order_pairs[(b, a)],
|
|
2318
|
+
evidence=_strip_line(_safe_line(lines, order_pairs[(b, a)])),
|
|
2319
|
+
description=f"检测到 mutex 加锁顺序反转:({a} -> {b}) 与 ({b} -> {a}),存在死锁风险。",
|
|
2320
|
+
suggestion="统一多锁的获取顺序,制定全局锁等级;或使用 std::lock/scoped_lock 避免死锁。",
|
|
2321
|
+
confidence=0.7,
|
|
2322
|
+
severity="high",
|
|
2323
|
+
)
|
|
2324
|
+
)
|
|
2325
|
+
|
|
2326
|
+
# 可能缺失解锁:在 lock() 后的 50 行窗口内未见对应 unlock()
|
|
2327
|
+
for idx, s in enumerate(lines, start=1):
|
|
2328
|
+
m_lock = RE_MUTEX_LOCK.search(s)
|
|
2329
|
+
if not m_lock:
|
|
2330
|
+
continue
|
|
2331
|
+
mtx = m_lock.group(1)
|
|
2332
|
+
if mtx not in mutex_vars:
|
|
2333
|
+
continue
|
|
2334
|
+
|
|
2335
|
+
# 检查是否是 lock_guard/unique_lock(RAII,自动解锁)
|
|
2336
|
+
window_text = " ".join(_safe_line(lines, j) for j in range(idx, min(idx + 3, len(lines)) + 1))
|
|
2337
|
+
is_raii = RE_LOCK_GUARD.search(window_text) or RE_UNIQUE_LOCK.search(window_text) or RE_SHARED_LOCK.search(window_text)
|
|
2338
|
+
if is_raii:
|
|
2339
|
+
continue # RAII 锁会自动解锁,跳过
|
|
2340
|
+
|
|
2341
|
+
end = min(len(lines), idx + 50)
|
|
2342
|
+
unlocked = False
|
|
2343
|
+
for j in range(idx + 1, end + 1):
|
|
2344
|
+
sj = _safe_line(lines, j)
|
|
2345
|
+
m_un = RE_MUTEX_UNLOCK.search(sj)
|
|
2346
|
+
if m_un and m_un.group(1) == mtx:
|
|
2347
|
+
unlocked = True
|
|
2348
|
+
break
|
|
2349
|
+
# 检查作用域结束(可能是 RAII 锁)
|
|
2350
|
+
if "}" in sj:
|
|
2351
|
+
# 检查是否是 lock_guard/unique_lock 的作用域
|
|
2352
|
+
prev_scope = " ".join(_safe_line(lines, k) for k in range(max(1, j - 5), j))
|
|
2353
|
+
if RE_LOCK_GUARD.search(prev_scope) or RE_UNIQUE_LOCK.search(prev_scope):
|
|
2354
|
+
unlocked = True
|
|
2355
|
+
break
|
|
2356
|
+
|
|
2357
|
+
if not unlocked:
|
|
2358
|
+
issues.append(
|
|
2359
|
+
Issue(
|
|
2360
|
+
language="c/cpp",
|
|
2361
|
+
category="error_handling",
|
|
2362
|
+
pattern="cpp_missing_unlock_suspect",
|
|
2363
|
+
file=relpath,
|
|
2364
|
+
line=idx,
|
|
2365
|
+
evidence=_strip_line(s),
|
|
2366
|
+
description=f"在 mutex {mtx} 调用 lock() 之后的邻近窗口内未检测到匹配 unlock(),可能存在缺失解锁的风险。",
|
|
2367
|
+
suggestion="确保所有 lock() 路径都有配对的 unlock();考虑使用 std::lock_guard 或 std::unique_lock(RAII)自动管理锁生命周期。",
|
|
2368
|
+
confidence=0.55,
|
|
2369
|
+
severity="medium",
|
|
2370
|
+
)
|
|
2371
|
+
)
|
|
2372
|
+
|
|
2373
|
+
return issues
|
|
2374
|
+
|
|
2375
|
+
|
|
2376
|
+
def _rule_data_race_suspect(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
2377
|
+
"""
|
|
2378
|
+
检测可能的数据竞争(data race)风险:
|
|
2379
|
+
- 共享变量(全局/静态变量)在多线程环境下未受保护访问
|
|
2380
|
+
- 检测到线程创建但共享变量访问时未见锁保护
|
|
2381
|
+
- volatile 误用(volatile 不能保证线程安全)
|
|
2382
|
+
- 未使用原子操作保护共享变量
|
|
2383
|
+
|
|
2384
|
+
实现基于启发式,需要结合上下文分析。
|
|
2385
|
+
"""
|
|
2386
|
+
issues: List[Issue] = []
|
|
2387
|
+
shared_vars: set[str] = set() # 共享变量集合
|
|
2388
|
+
thread_creation_lines: list[int] = [] # 线程创建行号
|
|
2389
|
+
atomic_vars: set[str] = set() # 原子变量集合
|
|
2390
|
+
volatile_vars: set[str] = set() # volatile 变量集合
|
|
2391
|
+
|
|
2392
|
+
# 第一遍扫描:收集共享变量、线程创建、原子变量
|
|
2393
|
+
for idx, s in enumerate(lines, start=1):
|
|
2394
|
+
# 收集全局/静态变量
|
|
2395
|
+
m_static = RE_STATIC_VAR.search(s)
|
|
2396
|
+
if m_static:
|
|
2397
|
+
var = m_static.group(1)
|
|
2398
|
+
# 排除 const 变量(只读,通常安全)
|
|
2399
|
+
if "const" not in s.lower():
|
|
2400
|
+
shared_vars.add(var)
|
|
2401
|
+
|
|
2402
|
+
m_extern = RE_EXTERN_VAR.search(s)
|
|
2403
|
+
if m_extern:
|
|
2404
|
+
var = m_extern.group(1)
|
|
2405
|
+
if "const" not in s.lower():
|
|
2406
|
+
shared_vars.add(var)
|
|
2407
|
+
|
|
2408
|
+
# 检测全局变量声明(文件作用域)
|
|
2409
|
+
if idx == 1 or (idx > 1 and _safe_line(lines, idx - 1).strip().endswith("}")):
|
|
2410
|
+
# 可能是文件作用域的变量
|
|
2411
|
+
m_global = re.search(r"^[A-Za-z_]\w*(?:\s+\*|\s+)+([A-Za-z_]\w*)\s*[=;]", s)
|
|
2412
|
+
if m_global and "const" not in s.lower() and "static" not in s.lower():
|
|
2413
|
+
var = m_global.group(1)
|
|
2414
|
+
shared_vars.add(var)
|
|
2415
|
+
|
|
2416
|
+
# 检测线程创建
|
|
2417
|
+
if RE_PTHREAD_CREATE.search(s) or RE_STD_THREAD.search(s):
|
|
2418
|
+
thread_creation_lines.append(idx)
|
|
2419
|
+
|
|
2420
|
+
# 收集原子变量
|
|
2421
|
+
m_atomic = RE_ATOMIC.search(s)
|
|
2422
|
+
if m_atomic:
|
|
2423
|
+
var = m_atomic.group(1)
|
|
2424
|
+
atomic_vars.add(var)
|
|
2425
|
+
|
|
2426
|
+
# 收集 volatile 变量
|
|
2427
|
+
m_volatile = RE_VOLATILE.search(s)
|
|
2428
|
+
if m_volatile:
|
|
2429
|
+
var = m_volatile.group(1)
|
|
2430
|
+
volatile_vars.add(var)
|
|
2431
|
+
|
|
2432
|
+
# 如果没有线程创建,通常不存在数据竞争风险
|
|
2433
|
+
if not thread_creation_lines:
|
|
2434
|
+
return issues
|
|
2435
|
+
|
|
2436
|
+
# 第二遍扫描:检测共享变量访问时的保护情况
|
|
2437
|
+
for idx, s in enumerate(lines, start=1):
|
|
2438
|
+
# 检测共享变量的访问(赋值或读取)
|
|
2439
|
+
for var in shared_vars:
|
|
2440
|
+
if var in atomic_vars:
|
|
2441
|
+
continue # 原子变量,通常安全
|
|
2442
|
+
|
|
2443
|
+
# 检测变量访问
|
|
2444
|
+
var_pattern = re.compile(rf"\b{re.escape(var)}\b")
|
|
2445
|
+
if not var_pattern.search(s):
|
|
2446
|
+
continue
|
|
2447
|
+
|
|
2448
|
+
# 检查是否是赋值操作
|
|
2449
|
+
is_write = RE_VAR_ASSIGN.search(s) and var in s[:s.find("=")]
|
|
2450
|
+
|
|
2451
|
+
# 检查附近是否有锁保护
|
|
2452
|
+
window_text = " ".join(t for _, t in _window(lines, idx, before=5, after=5))
|
|
2453
|
+
has_lock = (
|
|
2454
|
+
RE_PTHREAD_LOCK.search(window_text) is not None or
|
|
2455
|
+
RE_MUTEX_LOCK.search(window_text) is not None or
|
|
2456
|
+
RE_LOCK_GUARD.search(window_text) is not None or
|
|
2457
|
+
RE_UNIQUE_LOCK.search(window_text) is not None or
|
|
2458
|
+
RE_SHARED_LOCK.search(window_text) is not None
|
|
2459
|
+
)
|
|
2460
|
+
|
|
2461
|
+
# 检查是否在锁的作用域内(简单启发式)
|
|
2462
|
+
# 查找最近的锁
|
|
2463
|
+
lock_line = None
|
|
2464
|
+
for j in range(max(1, idx - 10), idx):
|
|
2465
|
+
sj = _safe_line(lines, j)
|
|
2466
|
+
if RE_PTHREAD_LOCK.search(sj) or RE_MUTEX_LOCK.search(sj) or RE_LOCK_GUARD.search(sj) or RE_UNIQUE_LOCK.search(sj):
|
|
2467
|
+
lock_line = j
|
|
2468
|
+
break
|
|
2469
|
+
|
|
2470
|
+
# 检查锁是否已解锁
|
|
2471
|
+
unlocked = False
|
|
2472
|
+
if lock_line:
|
|
2473
|
+
for j in range(lock_line + 1, idx):
|
|
2474
|
+
sj = _safe_line(lines, j)
|
|
2475
|
+
if RE_PTHREAD_UNLOCK.search(sj) or RE_MUTEX_UNLOCK.search(sj):
|
|
2476
|
+
unlocked = True
|
|
2477
|
+
break
|
|
2478
|
+
|
|
2479
|
+
# 如果未检测到锁保护,且是写操作,风险更高
|
|
2480
|
+
if not has_lock or (lock_line and unlocked):
|
|
2481
|
+
conf = 0.6
|
|
2482
|
+
if is_write:
|
|
2483
|
+
conf += 0.15
|
|
2484
|
+
if var in volatile_vars:
|
|
2485
|
+
# volatile 不能保证线程安全,但可能被误用
|
|
2486
|
+
conf += 0.1
|
|
2487
|
+
|
|
2488
|
+
# 检查是否在函数参数中(可能是局部变量,降低风险)
|
|
2489
|
+
if "(" in s and ")" in s:
|
|
2490
|
+
# 可能是函数调用参数,降低置信度
|
|
2491
|
+
conf -= 0.1
|
|
2492
|
+
|
|
2493
|
+
issues.append(
|
|
2494
|
+
Issue(
|
|
2495
|
+
language="c/cpp",
|
|
2496
|
+
category="concurrency",
|
|
2497
|
+
pattern="data_race_suspect",
|
|
2498
|
+
file=relpath,
|
|
2499
|
+
line=idx,
|
|
2500
|
+
evidence=_strip_line(s),
|
|
2501
|
+
description=f"共享变量 {var} 在多线程环境下访问但未见明确的锁保护,可能存在数据竞争风险。",
|
|
2502
|
+
suggestion="使用互斥锁保护共享变量访问;或使用原子操作(std::atomic)进行无锁编程;注意 volatile 不能保证线程安全。",
|
|
2503
|
+
confidence=min(conf, 0.85),
|
|
2504
|
+
severity="high" if conf >= 0.7 else "medium",
|
|
2505
|
+
)
|
|
2506
|
+
)
|
|
2507
|
+
|
|
2508
|
+
# 检测 volatile 的误用(volatile 不能保证线程安全)
|
|
2509
|
+
for idx, s in enumerate(lines, start=1):
|
|
2510
|
+
for var in volatile_vars:
|
|
2511
|
+
if var in atomic_vars:
|
|
2512
|
+
continue # 如果同时是原子变量,跳过
|
|
2513
|
+
|
|
2514
|
+
if re.search(rf"\b{re.escape(var)}\b", s):
|
|
2515
|
+
# 检查是否在多线程上下文中使用 volatile
|
|
2516
|
+
window_text = " ".join(t for _, t in _window(lines, idx, before=3, after=3))
|
|
2517
|
+
has_thread = (
|
|
2518
|
+
RE_PTHREAD_CREATE.search(window_text) is not None or
|
|
2519
|
+
RE_STD_THREAD.search(window_text) is not None or
|
|
2520
|
+
any(abs(j - idx) < 20 for j in thread_creation_lines)
|
|
2521
|
+
)
|
|
2522
|
+
|
|
2523
|
+
if has_thread:
|
|
2524
|
+
# 检查是否有锁保护
|
|
2525
|
+
has_lock = (
|
|
2526
|
+
RE_PTHREAD_LOCK.search(window_text) is not None or
|
|
2527
|
+
RE_MUTEX_LOCK.search(window_text) is not None or
|
|
2528
|
+
RE_LOCK_GUARD.search(window_text) is not None
|
|
2529
|
+
)
|
|
2530
|
+
|
|
2531
|
+
if not has_lock:
|
|
2532
|
+
issues.append(
|
|
2533
|
+
Issue(
|
|
2534
|
+
language="c/cpp",
|
|
2535
|
+
category="concurrency",
|
|
2536
|
+
pattern="volatile_not_threadsafe",
|
|
2537
|
+
file=relpath,
|
|
2538
|
+
line=idx,
|
|
2539
|
+
evidence=_strip_line(s),
|
|
2540
|
+
description=f"volatile 变量 {var} 在多线程环境下使用,但 volatile 不能保证线程安全,可能存在数据竞争。",
|
|
2541
|
+
suggestion="volatile 仅防止编译器优化,不能保证原子性或内存可见性;使用 std::atomic 或互斥锁保护共享变量。",
|
|
2542
|
+
confidence=0.7,
|
|
2543
|
+
severity="high",
|
|
2544
|
+
)
|
|
2545
|
+
)
|
|
2546
|
+
|
|
2547
|
+
return issues
|
|
2548
|
+
|
|
2549
|
+
|
|
2550
|
+
def _rule_smart_ptr_get_unsafe(lines: Sequence[str], relpath: str) -> List[Issue]:
|
|
2551
|
+
"""
|
|
2552
|
+
检测智能指针的 .get() 方法不安全使用(返回的原始指针可能悬空)。
|
|
2553
|
+
"""
|
|
2554
|
+
issues: List[Issue] = []
|
|
2555
|
+
smart_ptr_vars: set[str] = set()
|
|
2556
|
+
|
|
2557
|
+
# 先收集智能指针变量
|
|
2558
|
+
for idx, s in enumerate(lines, start=1):
|
|
2559
|
+
m = RE_SMART_PTR_ASSIGN.search(s)
|
|
2560
|
+
if m:
|
|
2561
|
+
smart_ptr_vars.add(m.group(1))
|
|
2562
|
+
# 也检测声明
|
|
2563
|
+
if RE_SHARED_PTR.search(s) or RE_UNIQUE_PTR.search(s) or RE_WEAK_PTR.search(s):
|
|
2564
|
+
m = re.search(r"\b([A-Za-z_]\w*)\s*(?:=|;)", s)
|
|
2565
|
+
if m:
|
|
2566
|
+
smart_ptr_vars.add(m.group(1))
|
|
2567
|
+
|
|
2568
|
+
for idx, s in enumerate(lines, start=1):
|
|
2569
|
+
# 检测 .get() 调用
|
|
2570
|
+
for var in smart_ptr_vars:
|
|
2571
|
+
if re.search(rf"\b{re.escape(var)}\s*\.get\s*\(", s, re.IGNORECASE):
|
|
2572
|
+
conf = 0.65
|
|
2573
|
+
# 如果 .get() 的结果被存储或传递,风险更高
|
|
2574
|
+
if "=" in s or re.search(r"\.get\s*\([^)]*\)\s*[=,\(]", s):
|
|
2575
|
+
conf += 0.1
|
|
2576
|
+
|
|
2577
|
+
issues.append(
|
|
2578
|
+
Issue(
|
|
2579
|
+
language="c/cpp",
|
|
2580
|
+
category="memory_mgmt",
|
|
2581
|
+
pattern="smart_ptr_get_unsafe",
|
|
2582
|
+
file=relpath,
|
|
2583
|
+
line=idx,
|
|
2584
|
+
evidence=_strip_line(s),
|
|
2585
|
+
description=f"智能指针 {var} 使用 .get() 方法获取原始指针,若智能指针生命周期结束,原始指针将悬空。",
|
|
2586
|
+
suggestion="避免存储 .get() 返回的原始指针;若必须使用,确保智能指针的生命周期覆盖原始指针的使用期。",
|
|
2587
|
+
confidence=min(conf, 0.8),
|
|
2588
|
+
severity="high",
|
|
2589
|
+
)
|
|
2590
|
+
)
|
|
2591
|
+
break # 每行只报告一次
|
|
2592
|
+
return issues
|
|
2593
|
+
|
|
2594
|
+
|
|
2595
|
+
def analyze_c_cpp_text(relpath: str, text: str) -> List[Issue]:
|
|
2596
|
+
"""
|
|
2597
|
+
基于提供的文本进行 C/C++ 启发式分析。
|
|
2598
|
+
- 准确性优化:在启发式匹配前移除注释(保留字符串/字符字面量),
|
|
2599
|
+
以避免注释中的API命中导致的误报。
|
|
2600
|
+
- 准确性优化2:对通用 API 扫描使用“字符串内容掩蔽”的副本,避免把字符串里的片段当作代码。
|
|
2601
|
+
"""
|
|
2602
|
+
pre_text = _strip_if0_blocks(text)
|
|
2603
|
+
clean_text = _remove_comments_preserve_strings(pre_text)
|
|
2604
|
+
masked_text = _mask_strings_preserve_len(clean_text)
|
|
2605
|
+
# 原始行:保留字符串内容,供需要解析字面量的规则使用(如格式串、scanf 宽度等)
|
|
2606
|
+
lines = clean_text.splitlines()
|
|
2607
|
+
# 掩蔽行:字符串内容已被空格替换,适合用于通用 API/关键字匹配,减少误报
|
|
2608
|
+
mlines = masked_text.splitlines()
|
|
2609
|
+
|
|
2610
|
+
issues: List[Issue] = []
|
|
2611
|
+
# 通用 API/关键字匹配(使用掩蔽行)
|
|
2612
|
+
issues.extend(_rule_unsafe_api(mlines, relpath))
|
|
2613
|
+
issues.extend(_rule_boundary_funcs(mlines, relpath))
|
|
2614
|
+
issues.extend(_rule_realloc_assign_back(mlines, relpath))
|
|
2615
|
+
issues.extend(_rule_malloc_no_null_check(mlines, relpath))
|
|
2616
|
+
issues.extend(_rule_unchecked_io(mlines, relpath))
|
|
2617
|
+
# 需要字符串字面量信息的规则(使用原始行)
|
|
2618
|
+
issues.extend(_rule_strncpy_no_nullterm(lines, relpath))
|
|
2619
|
+
issues.extend(_rule_format_string(lines, relpath))
|
|
2620
|
+
issues.extend(_rule_scanf_no_width(lines, relpath))
|
|
2621
|
+
# 其他规则
|
|
2622
|
+
issues.extend(_rule_insecure_tmpfile(mlines, relpath))
|
|
2623
|
+
issues.extend(_rule_command_execution(mlines, relpath))
|
|
2624
|
+
issues.extend(_rule_alloc_size_overflow(mlines, relpath))
|
|
2625
|
+
issues.extend(_rule_double_free_and_free_non_heap(mlines, relpath))
|
|
2626
|
+
issues.extend(_rule_atoi_family(mlines, relpath))
|
|
2627
|
+
issues.extend(_rule_rand_insecure(mlines, relpath))
|
|
2628
|
+
issues.extend(_rule_strtok_nonreentrant(mlines, relpath))
|
|
2629
|
+
issues.extend(_rule_open_permissive_perms(mlines, relpath))
|
|
2630
|
+
issues.extend(_rule_alloca_unbounded(mlines, relpath))
|
|
2631
|
+
issues.extend(_rule_vla_usage(mlines, relpath))
|
|
2632
|
+
issues.extend(_rule_pthread_returns_unchecked(mlines, relpath))
|
|
2633
|
+
issues.extend(_rule_cond_wait_no_loop(mlines, relpath))
|
|
2634
|
+
issues.extend(_rule_thread_leak_no_join(mlines, relpath))
|
|
2635
|
+
issues.extend(_rule_inet_legacy(mlines, relpath))
|
|
2636
|
+
issues.extend(_rule_time_apis_not_threadsafe(mlines, relpath))
|
|
2637
|
+
issues.extend(_rule_getenv_unchecked(mlines, relpath))
|
|
2638
|
+
# 复杂语义(使用掩蔽行避免字符串干扰)
|
|
2639
|
+
issues.extend(_rule_uaf_suspect(mlines, relpath))
|
|
2640
|
+
issues.extend(_rule_possible_null_deref(mlines, relpath))
|
|
2641
|
+
issues.extend(_rule_uninitialized_ptr_use(mlines, relpath))
|
|
2642
|
+
issues.extend(_rule_deadlock_patterns(mlines, relpath))
|
|
2643
|
+
# C++ 特定检查规则
|
|
2644
|
+
issues.extend(_rule_new_delete_mismatch(mlines, relpath))
|
|
2645
|
+
issues.extend(_rule_reinterpret_cast_unsafe(mlines, relpath))
|
|
2646
|
+
issues.extend(_rule_const_cast_unsafe(mlines, relpath))
|
|
2647
|
+
issues.extend(_rule_vector_string_bounds_check(mlines, relpath))
|
|
2648
|
+
issues.extend(_rule_missing_virtual_dtor(mlines, relpath))
|
|
2649
|
+
issues.extend(_rule_move_after_use(mlines, relpath))
|
|
2650
|
+
issues.extend(_rule_uncaught_exception(mlines, relpath))
|
|
2651
|
+
issues.extend(_rule_smart_ptr_cycle(mlines, relpath))
|
|
2652
|
+
issues.extend(_rule_smart_ptr_get_unsafe(mlines, relpath))
|
|
2653
|
+
# C++ 死锁检测
|
|
2654
|
+
issues.extend(_rule_cpp_deadlock_patterns(mlines, relpath))
|
|
2655
|
+
# 数据竞争检测
|
|
2656
|
+
issues.extend(_rule_data_race_suspect(mlines, relpath))
|
|
2657
|
+
return issues
|
|
2658
|
+
|
|
2659
|
+
|
|
2660
|
+
def analyze_c_cpp_file(base: Path, relpath: Path) -> List[Issue]:
|
|
2661
|
+
"""
|
|
2662
|
+
从磁盘读取文件进行分析。
|
|
2663
|
+
"""
|
|
2664
|
+
try:
|
|
2665
|
+
text = (base / relpath).read_text(errors="ignore")
|
|
2666
|
+
except Exception:
|
|
2667
|
+
return []
|
|
2668
|
+
return analyze_c_cpp_text(str(relpath), text)
|
|
2669
|
+
|
|
2670
|
+
|
|
2671
|
+
def analyze_files(base_path: str, files: Iterable[str]) -> List[Issue]:
|
|
2672
|
+
"""
|
|
2673
|
+
批量分析文件,相对路径相对于 base_path。
|
|
2674
|
+
"""
|
|
2675
|
+
base = Path(base_path).resolve()
|
|
2676
|
+
out: List[Issue] = []
|
|
2677
|
+
for f in files:
|
|
2678
|
+
rel = Path(f)
|
|
2679
|
+
out.extend(analyze_c_cpp_file(base, rel))
|
|
2680
|
+
return out
|