jarvis-ai-assistant 0.7.8__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +567 -222
  3. jarvis/jarvis_agent/agent_manager.py +19 -12
  4. jarvis/jarvis_agent/builtin_input_handler.py +79 -11
  5. jarvis/jarvis_agent/config_editor.py +7 -2
  6. jarvis/jarvis_agent/event_bus.py +24 -13
  7. jarvis/jarvis_agent/events.py +19 -1
  8. jarvis/jarvis_agent/file_context_handler.py +67 -64
  9. jarvis/jarvis_agent/file_methodology_manager.py +38 -24
  10. jarvis/jarvis_agent/jarvis.py +186 -114
  11. jarvis/jarvis_agent/language_extractors/__init__.py +8 -1
  12. jarvis/jarvis_agent/language_extractors/c_extractor.py +7 -4
  13. jarvis/jarvis_agent/language_extractors/cpp_extractor.py +9 -4
  14. jarvis/jarvis_agent/language_extractors/go_extractor.py +7 -4
  15. jarvis/jarvis_agent/language_extractors/java_extractor.py +27 -20
  16. jarvis/jarvis_agent/language_extractors/javascript_extractor.py +22 -17
  17. jarvis/jarvis_agent/language_extractors/python_extractor.py +7 -4
  18. jarvis/jarvis_agent/language_extractors/rust_extractor.py +7 -4
  19. jarvis/jarvis_agent/language_extractors/typescript_extractor.py +22 -17
  20. jarvis/jarvis_agent/language_support_info.py +250 -219
  21. jarvis/jarvis_agent/main.py +19 -23
  22. jarvis/jarvis_agent/memory_manager.py +9 -6
  23. jarvis/jarvis_agent/methodology_share_manager.py +21 -15
  24. jarvis/jarvis_agent/output_handler.py +4 -2
  25. jarvis/jarvis_agent/prompt_builder.py +7 -6
  26. jarvis/jarvis_agent/prompt_manager.py +113 -8
  27. jarvis/jarvis_agent/prompts.py +317 -85
  28. jarvis/jarvis_agent/protocols.py +5 -2
  29. jarvis/jarvis_agent/run_loop.py +192 -32
  30. jarvis/jarvis_agent/session_manager.py +7 -3
  31. jarvis/jarvis_agent/share_manager.py +23 -13
  32. jarvis/jarvis_agent/shell_input_handler.py +12 -8
  33. jarvis/jarvis_agent/stdio_redirect.py +25 -26
  34. jarvis/jarvis_agent/task_analyzer.py +29 -23
  35. jarvis/jarvis_agent/task_list.py +869 -0
  36. jarvis/jarvis_agent/task_manager.py +26 -23
  37. jarvis/jarvis_agent/tool_executor.py +6 -5
  38. jarvis/jarvis_agent/tool_share_manager.py +24 -14
  39. jarvis/jarvis_agent/user_interaction.py +3 -3
  40. jarvis/jarvis_agent/utils.py +9 -1
  41. jarvis/jarvis_agent/web_bridge.py +37 -17
  42. jarvis/jarvis_agent/web_output_sink.py +5 -2
  43. jarvis/jarvis_agent/web_server.py +165 -36
  44. jarvis/jarvis_c2rust/__init__.py +1 -1
  45. jarvis/jarvis_c2rust/cli.py +260 -141
  46. jarvis/jarvis_c2rust/collector.py +37 -18
  47. jarvis/jarvis_c2rust/constants.py +60 -0
  48. jarvis/jarvis_c2rust/library_replacer.py +242 -1010
  49. jarvis/jarvis_c2rust/library_replacer_checkpoint.py +133 -0
  50. jarvis/jarvis_c2rust/library_replacer_llm.py +287 -0
  51. jarvis/jarvis_c2rust/library_replacer_loader.py +191 -0
  52. jarvis/jarvis_c2rust/library_replacer_output.py +134 -0
  53. jarvis/jarvis_c2rust/library_replacer_prompts.py +124 -0
  54. jarvis/jarvis_c2rust/library_replacer_utils.py +188 -0
  55. jarvis/jarvis_c2rust/llm_module_agent.py +98 -1044
  56. jarvis/jarvis_c2rust/llm_module_agent_apply.py +170 -0
  57. jarvis/jarvis_c2rust/llm_module_agent_executor.py +288 -0
  58. jarvis/jarvis_c2rust/llm_module_agent_loader.py +170 -0
  59. jarvis/jarvis_c2rust/llm_module_agent_prompts.py +268 -0
  60. jarvis/jarvis_c2rust/llm_module_agent_types.py +57 -0
  61. jarvis/jarvis_c2rust/llm_module_agent_utils.py +150 -0
  62. jarvis/jarvis_c2rust/llm_module_agent_validator.py +119 -0
  63. jarvis/jarvis_c2rust/loaders.py +28 -10
  64. jarvis/jarvis_c2rust/models.py +5 -2
  65. jarvis/jarvis_c2rust/optimizer.py +192 -1974
  66. jarvis/jarvis_c2rust/optimizer_build_fix.py +286 -0
  67. jarvis/jarvis_c2rust/optimizer_clippy.py +766 -0
  68. jarvis/jarvis_c2rust/optimizer_config.py +49 -0
  69. jarvis/jarvis_c2rust/optimizer_docs.py +183 -0
  70. jarvis/jarvis_c2rust/optimizer_options.py +48 -0
  71. jarvis/jarvis_c2rust/optimizer_progress.py +469 -0
  72. jarvis/jarvis_c2rust/optimizer_report.py +52 -0
  73. jarvis/jarvis_c2rust/optimizer_unsafe.py +309 -0
  74. jarvis/jarvis_c2rust/optimizer_utils.py +469 -0
  75. jarvis/jarvis_c2rust/optimizer_visibility.py +185 -0
  76. jarvis/jarvis_c2rust/scanner.py +229 -166
  77. jarvis/jarvis_c2rust/transpiler.py +531 -2732
  78. jarvis/jarvis_c2rust/transpiler_agents.py +503 -0
  79. jarvis/jarvis_c2rust/transpiler_build.py +1294 -0
  80. jarvis/jarvis_c2rust/transpiler_codegen.py +204 -0
  81. jarvis/jarvis_c2rust/transpiler_compile.py +146 -0
  82. jarvis/jarvis_c2rust/transpiler_config.py +178 -0
  83. jarvis/jarvis_c2rust/transpiler_context.py +122 -0
  84. jarvis/jarvis_c2rust/transpiler_executor.py +516 -0
  85. jarvis/jarvis_c2rust/transpiler_generation.py +278 -0
  86. jarvis/jarvis_c2rust/transpiler_git.py +163 -0
  87. jarvis/jarvis_c2rust/transpiler_mod_utils.py +225 -0
  88. jarvis/jarvis_c2rust/transpiler_modules.py +336 -0
  89. jarvis/jarvis_c2rust/transpiler_planning.py +394 -0
  90. jarvis/jarvis_c2rust/transpiler_review.py +1196 -0
  91. jarvis/jarvis_c2rust/transpiler_symbols.py +176 -0
  92. jarvis/jarvis_c2rust/utils.py +269 -79
  93. jarvis/jarvis_code_agent/after_change.py +233 -0
  94. jarvis/jarvis_code_agent/build_validation_config.py +37 -30
  95. jarvis/jarvis_code_agent/builtin_rules.py +68 -0
  96. jarvis/jarvis_code_agent/code_agent.py +976 -1517
  97. jarvis/jarvis_code_agent/code_agent_build.py +227 -0
  98. jarvis/jarvis_code_agent/code_agent_diff.py +246 -0
  99. jarvis/jarvis_code_agent/code_agent_git.py +525 -0
  100. jarvis/jarvis_code_agent/code_agent_impact.py +177 -0
  101. jarvis/jarvis_code_agent/code_agent_lint.py +283 -0
  102. jarvis/jarvis_code_agent/code_agent_llm.py +159 -0
  103. jarvis/jarvis_code_agent/code_agent_postprocess.py +105 -0
  104. jarvis/jarvis_code_agent/code_agent_prompts.py +46 -0
  105. jarvis/jarvis_code_agent/code_agent_rules.py +305 -0
  106. jarvis/jarvis_code_agent/code_analyzer/__init__.py +52 -48
  107. jarvis/jarvis_code_agent/code_analyzer/base_language.py +12 -10
  108. jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +12 -11
  109. jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +16 -12
  110. jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +26 -17
  111. jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +558 -104
  112. jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +27 -16
  113. jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +22 -18
  114. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +21 -16
  115. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +20 -16
  116. jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +27 -16
  117. jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +47 -23
  118. jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +71 -37
  119. jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +162 -35
  120. jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +111 -57
  121. jarvis/jarvis_code_agent/code_analyzer/build_validator.py +18 -12
  122. jarvis/jarvis_code_agent/code_analyzer/context_manager.py +185 -183
  123. jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +2 -1
  124. jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +24 -15
  125. jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +227 -141
  126. jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +321 -247
  127. jarvis/jarvis_code_agent/code_analyzer/language_registry.py +37 -29
  128. jarvis/jarvis_code_agent/code_analyzer/language_support.py +21 -13
  129. jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +15 -9
  130. jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +75 -45
  131. jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +87 -52
  132. jarvis/jarvis_code_agent/code_analyzer/languages/java_language.py +84 -51
  133. jarvis/jarvis_code_agent/code_analyzer/languages/javascript_language.py +94 -64
  134. jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +109 -71
  135. jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +97 -63
  136. jarvis/jarvis_code_agent/code_analyzer/languages/typescript_language.py +103 -69
  137. jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +271 -268
  138. jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +76 -64
  139. jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +92 -19
  140. jarvis/jarvis_code_agent/diff_visualizer.py +998 -0
  141. jarvis/jarvis_code_agent/lint.py +223 -524
  142. jarvis/jarvis_code_agent/rule_share_manager.py +158 -0
  143. jarvis/jarvis_code_agent/rules/clean_code.md +144 -0
  144. jarvis/jarvis_code_agent/rules/code_review.md +115 -0
  145. jarvis/jarvis_code_agent/rules/documentation.md +165 -0
  146. jarvis/jarvis_code_agent/rules/generate_rules.md +52 -0
  147. jarvis/jarvis_code_agent/rules/performance.md +158 -0
  148. jarvis/jarvis_code_agent/rules/refactoring.md +139 -0
  149. jarvis/jarvis_code_agent/rules/security.md +160 -0
  150. jarvis/jarvis_code_agent/rules/tdd.md +78 -0
  151. jarvis/jarvis_code_agent/test_rules/cpp_test.md +118 -0
  152. jarvis/jarvis_code_agent/test_rules/go_test.md +98 -0
  153. jarvis/jarvis_code_agent/test_rules/java_test.md +99 -0
  154. jarvis/jarvis_code_agent/test_rules/javascript_test.md +113 -0
  155. jarvis/jarvis_code_agent/test_rules/php_test.md +117 -0
  156. jarvis/jarvis_code_agent/test_rules/python_test.md +91 -0
  157. jarvis/jarvis_code_agent/test_rules/ruby_test.md +102 -0
  158. jarvis/jarvis_code_agent/test_rules/rust_test.md +86 -0
  159. jarvis/jarvis_code_agent/utils.py +36 -26
  160. jarvis/jarvis_code_analysis/checklists/loader.py +21 -21
  161. jarvis/jarvis_code_analysis/code_review.py +64 -33
  162. jarvis/jarvis_data/config_schema.json +285 -192
  163. jarvis/jarvis_git_squash/main.py +8 -6
  164. jarvis/jarvis_git_utils/git_commiter.py +53 -76
  165. jarvis/jarvis_mcp/__init__.py +5 -2
  166. jarvis/jarvis_mcp/sse_mcp_client.py +40 -30
  167. jarvis/jarvis_mcp/stdio_mcp_client.py +27 -19
  168. jarvis/jarvis_mcp/streamable_mcp_client.py +35 -26
  169. jarvis/jarvis_memory_organizer/memory_organizer.py +78 -55
  170. jarvis/jarvis_methodology/main.py +48 -39
  171. jarvis/jarvis_multi_agent/__init__.py +56 -23
  172. jarvis/jarvis_multi_agent/main.py +15 -18
  173. jarvis/jarvis_platform/base.py +179 -111
  174. jarvis/jarvis_platform/human.py +27 -16
  175. jarvis/jarvis_platform/kimi.py +52 -45
  176. jarvis/jarvis_platform/openai.py +101 -40
  177. jarvis/jarvis_platform/registry.py +51 -33
  178. jarvis/jarvis_platform/tongyi.py +68 -38
  179. jarvis/jarvis_platform/yuanbao.py +59 -43
  180. jarvis/jarvis_platform_manager/main.py +68 -76
  181. jarvis/jarvis_platform_manager/service.py +24 -14
  182. jarvis/jarvis_rag/README_CONFIG.md +314 -0
  183. jarvis/jarvis_rag/README_DYNAMIC_LOADING.md +311 -0
  184. jarvis/jarvis_rag/README_ONLINE_MODELS.md +230 -0
  185. jarvis/jarvis_rag/__init__.py +57 -4
  186. jarvis/jarvis_rag/cache.py +3 -1
  187. jarvis/jarvis_rag/cli.py +48 -68
  188. jarvis/jarvis_rag/embedding_interface.py +39 -0
  189. jarvis/jarvis_rag/embedding_manager.py +7 -230
  190. jarvis/jarvis_rag/embeddings/__init__.py +41 -0
  191. jarvis/jarvis_rag/embeddings/base.py +114 -0
  192. jarvis/jarvis_rag/embeddings/cohere.py +66 -0
  193. jarvis/jarvis_rag/embeddings/edgefn.py +117 -0
  194. jarvis/jarvis_rag/embeddings/local.py +260 -0
  195. jarvis/jarvis_rag/embeddings/openai.py +62 -0
  196. jarvis/jarvis_rag/embeddings/registry.py +293 -0
  197. jarvis/jarvis_rag/llm_interface.py +8 -6
  198. jarvis/jarvis_rag/query_rewriter.py +8 -9
  199. jarvis/jarvis_rag/rag_pipeline.py +61 -52
  200. jarvis/jarvis_rag/reranker.py +7 -75
  201. jarvis/jarvis_rag/reranker_interface.py +32 -0
  202. jarvis/jarvis_rag/rerankers/__init__.py +41 -0
  203. jarvis/jarvis_rag/rerankers/base.py +109 -0
  204. jarvis/jarvis_rag/rerankers/cohere.py +67 -0
  205. jarvis/jarvis_rag/rerankers/edgefn.py +140 -0
  206. jarvis/jarvis_rag/rerankers/jina.py +79 -0
  207. jarvis/jarvis_rag/rerankers/local.py +89 -0
  208. jarvis/jarvis_rag/rerankers/registry.py +293 -0
  209. jarvis/jarvis_rag/retriever.py +58 -43
  210. jarvis/jarvis_sec/__init__.py +66 -141
  211. jarvis/jarvis_sec/agents.py +21 -17
  212. jarvis/jarvis_sec/analysis.py +80 -33
  213. jarvis/jarvis_sec/checkers/__init__.py +7 -13
  214. jarvis/jarvis_sec/checkers/c_checker.py +356 -164
  215. jarvis/jarvis_sec/checkers/rust_checker.py +47 -29
  216. jarvis/jarvis_sec/cli.py +43 -21
  217. jarvis/jarvis_sec/clustering.py +430 -272
  218. jarvis/jarvis_sec/file_manager.py +99 -55
  219. jarvis/jarvis_sec/parsers.py +9 -6
  220. jarvis/jarvis_sec/prompts.py +4 -3
  221. jarvis/jarvis_sec/report.py +44 -22
  222. jarvis/jarvis_sec/review.py +180 -107
  223. jarvis/jarvis_sec/status.py +50 -41
  224. jarvis/jarvis_sec/types.py +3 -0
  225. jarvis/jarvis_sec/utils.py +160 -83
  226. jarvis/jarvis_sec/verification.py +411 -181
  227. jarvis/jarvis_sec/workflow.py +132 -21
  228. jarvis/jarvis_smart_shell/main.py +28 -41
  229. jarvis/jarvis_stats/cli.py +14 -12
  230. jarvis/jarvis_stats/stats.py +28 -19
  231. jarvis/jarvis_stats/storage.py +14 -8
  232. jarvis/jarvis_stats/visualizer.py +12 -7
  233. jarvis/jarvis_tools/base.py +5 -2
  234. jarvis/jarvis_tools/clear_memory.py +13 -9
  235. jarvis/jarvis_tools/cli/main.py +23 -18
  236. jarvis/jarvis_tools/edit_file.py +572 -873
  237. jarvis/jarvis_tools/execute_script.py +10 -7
  238. jarvis/jarvis_tools/file_analyzer.py +7 -8
  239. jarvis/jarvis_tools/meta_agent.py +287 -0
  240. jarvis/jarvis_tools/methodology.py +5 -3
  241. jarvis/jarvis_tools/read_code.py +305 -1438
  242. jarvis/jarvis_tools/read_symbols.py +50 -17
  243. jarvis/jarvis_tools/read_webpage.py +19 -18
  244. jarvis/jarvis_tools/registry.py +435 -156
  245. jarvis/jarvis_tools/retrieve_memory.py +16 -11
  246. jarvis/jarvis_tools/save_memory.py +8 -6
  247. jarvis/jarvis_tools/search_web.py +31 -31
  248. jarvis/jarvis_tools/sub_agent.py +32 -28
  249. jarvis/jarvis_tools/sub_code_agent.py +44 -60
  250. jarvis/jarvis_tools/task_list_manager.py +1811 -0
  251. jarvis/jarvis_tools/virtual_tty.py +29 -19
  252. jarvis/jarvis_utils/__init__.py +4 -0
  253. jarvis/jarvis_utils/builtin_replace_map.py +2 -1
  254. jarvis/jarvis_utils/clipboard.py +9 -8
  255. jarvis/jarvis_utils/collections.py +331 -0
  256. jarvis/jarvis_utils/config.py +699 -194
  257. jarvis/jarvis_utils/dialogue_recorder.py +294 -0
  258. jarvis/jarvis_utils/embedding.py +6 -3
  259. jarvis/jarvis_utils/file_processors.py +7 -1
  260. jarvis/jarvis_utils/fzf.py +9 -3
  261. jarvis/jarvis_utils/git_utils.py +71 -42
  262. jarvis/jarvis_utils/globals.py +116 -32
  263. jarvis/jarvis_utils/http.py +6 -2
  264. jarvis/jarvis_utils/input.py +318 -83
  265. jarvis/jarvis_utils/jsonnet_compat.py +119 -104
  266. jarvis/jarvis_utils/methodology.py +37 -28
  267. jarvis/jarvis_utils/output.py +201 -44
  268. jarvis/jarvis_utils/utils.py +986 -628
  269. {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/METADATA +49 -33
  270. jarvis_ai_assistant-1.0.2.dist-info/RECORD +304 -0
  271. jarvis/jarvis_code_agent/code_analyzer/structured_code.py +0 -556
  272. jarvis/jarvis_tools/generate_new_tool.py +0 -205
  273. jarvis/jarvis_tools/lsp_client.py +0 -1552
  274. jarvis/jarvis_tools/rewrite_file.py +0 -105
  275. jarvis_ai_assistant-0.7.8.dist-info/RECORD +0 -218
  276. {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/WHEEL +0 -0
  277. {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/entry_points.txt +0 -0
  278. {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/licenses/LICENSE +0 -0
  279. {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/top_level.txt +0 -0
@@ -19,11 +19,14 @@ from __future__ import annotations
19
19
 
20
20
  import re
21
21
  from pathlib import Path
22
- from typing import Iterable, List, Optional, Sequence, Tuple
22
+ from typing import Iterable
23
+ from typing import List
24
+ from typing import Optional
25
+ from typing import Sequence
26
+ from typing import Tuple
23
27
 
24
28
  from jarvis.jarvis_sec.types import Issue
25
29
 
26
-
27
30
  # ---------------------------
28
31
  # 规则库(正则表达式)
29
32
  # ---------------------------
@@ -46,20 +49,30 @@ RE_IO_API = re.compile(
46
49
  )
47
50
 
48
51
  # 新增:格式化字符串/危险临时文件/命令执行等风险 API 模式
49
- RE_PRINTF_LIKE = re.compile(r"\b(printf|sprintf|snprintf|vsprintf|vsnprintf)\s*\(", re.IGNORECASE)
52
+ RE_PRINTF_LIKE = re.compile(
53
+ r"\b(printf|sprintf|snprintf|vsprintf|vsnprintf)\s*\(", re.IGNORECASE
54
+ )
50
55
  RE_FPRINTF = re.compile(r"\bfprintf\s*\(", re.IGNORECASE)
51
56
  RE_INSECURE_TMP = re.compile(r"\b(tmpnam|tempnam|mktemp)\s*\(", re.IGNORECASE)
52
57
  RE_SYSTEM_LIKE = re.compile(r"\b(system|popen)\s*\(", re.IGNORECASE)
53
- RE_EXEC_LIKE = re.compile(r"\b(execvp|execlp|execvpe|execl|execve|execv)\s*\(", re.IGNORECASE)
58
+ RE_EXEC_LIKE = re.compile(
59
+ r"\b(execvp|execlp|execvpe|execl|execve|execv)\s*\(", re.IGNORECASE
60
+ )
54
61
  RE_SCANF_CALL = re.compile(r'\b(?:[fs]?scanf)\s*\(\s*"([^"]*)"', re.IGNORECASE)
55
62
  # 线程/锁相关
56
- RE_PTHREAD_LOCK = re.compile(r"\bpthread_mutex_lock\s*\(\s*&\s*([A-Za-z_]\w*)\s*\)\s*;?", re.IGNORECASE)
57
- RE_PTHREAD_UNLOCK = re.compile(r"\bpthread_mutex_unlock\s*\(\s*&\s*([A-Za-z_]\w*)\s*\)\s*;?", re.IGNORECASE)
63
+ RE_PTHREAD_LOCK = re.compile(
64
+ r"\bpthread_mutex_lock\s*\(\s*&\s*([A-Za-z_]\w*)\s*\)\s*;?", re.IGNORECASE
65
+ )
66
+ RE_PTHREAD_UNLOCK = re.compile(
67
+ r"\bpthread_mutex_unlock\s*\(\s*&\s*([A-Za-z_]\w*)\s*\)\s*;?", re.IGNORECASE
68
+ )
58
69
  # 其他危险用法相关
59
70
  RE_ATOI_FAMILY = re.compile(r"\b(atoi|atol|atoll|atof)\s*\(", re.IGNORECASE)
60
71
  RE_RAND = re.compile(r"\b(rand|srand)\s*\(", re.IGNORECASE)
61
72
  RE_STRTOK = re.compile(r"\bstrtok\s*\(", re.IGNORECASE)
62
- RE_OPEN_PERMISSIVE = re.compile(r"\bopen\s*\(\s*[^,]+,\s*[^,]*O_CREAT[^,]*,\s*(0[0-7]{3,4})\s*\)", re.IGNORECASE)
73
+ RE_OPEN_PERMISSIVE = re.compile(
74
+ r"\bopen\s*\(\s*[^,]+,\s*[^,]*O_CREAT[^,]*,\s*(0[0-7]{3,4})\s*\)", re.IGNORECASE
75
+ )
63
76
  RE_FOPEN_MODE = re.compile(r'\bfopen\s*\(\s*[^,]+,\s*"([^"]+)"\s*\)', re.IGNORECASE)
64
77
  RE_GENERIC_ASSIGN = re.compile(r"\b([A-Za-z_]\w*)\s*=\s*")
65
78
  RE_FREE_CALL_ANY = re.compile(r"\bfree\s*\(\s*([^)]+?)\s*\)", re.IGNORECASE)
@@ -74,25 +87,42 @@ RE_PTHREAD_RET = re.compile(
74
87
  re.IGNORECASE,
75
88
  )
76
89
  RE_PTHREAD_COND_WAIT = re.compile(r"\bpthread_cond_(?:timed)?wait\s*\(", re.IGNORECASE)
77
- RE_PTHREAD_CREATE = re.compile(r"\bpthread_create\s*\(\s*&\s*([A-Za-z_]\w*)\s*,", re.IGNORECASE)
90
+ RE_PTHREAD_CREATE = re.compile(
91
+ r"\bpthread_create\s*\(\s*&\s*([A-Za-z_]\w*)\s*,", re.IGNORECASE
92
+ )
78
93
  RE_PTHREAD_JOIN = re.compile(r"\bpthread_join\s*\(\s*([A-Za-z_]\w*)\s*,", re.IGNORECASE)
79
- RE_PTHREAD_DETACH = re.compile(r"\bpthread_detach\s*\(\s*([A-Za-z_]\w*)\s*\)", re.IGNORECASE)
94
+ RE_PTHREAD_DETACH = re.compile(
95
+ r"\bpthread_detach\s*\(\s*([A-Za-z_]\w*)\s*\)", re.IGNORECASE
96
+ )
80
97
  # C++ 标准库锁相关
81
98
  RE_STD_MUTEX = re.compile(r"\b(?:std::)?mutex\s+([A-Za-z_]\w*)", re.IGNORECASE)
82
99
  RE_MUTEX_LOCK = re.compile(r"\b([A-Za-z_]\w*)\s*\.lock\s*\(", re.IGNORECASE)
83
100
  RE_MUTEX_UNLOCK = re.compile(r"\b([A-Za-z_]\w*)\s*\.unlock\s*\(", re.IGNORECASE)
84
101
  RE_MUTEX_TRY_LOCK = re.compile(r"\b([A-Za-z_]\w*)\s*\.try_lock\s*\(", re.IGNORECASE)
85
- RE_LOCK_GUARD = re.compile(r"\b(?:std::)?lock_guard\s*<[^>]+>\s*([A-Za-z_]\w*)", re.IGNORECASE)
86
- RE_UNIQUE_LOCK = re.compile(r"\b(?:std::)?unique_lock\s*<[^>]+>\s*([A-Za-z_]\w*)", re.IGNORECASE)
87
- RE_SHARED_LOCK = re.compile(r"\b(?:std::)?shared_lock\s*<[^>]+>\s*([A-Za-z_]\w*)", re.IGNORECASE)
102
+ RE_LOCK_GUARD = re.compile(
103
+ r"\b(?:std::)?lock_guard\s*<[^>]+>\s*([A-Za-z_]\w*)", re.IGNORECASE
104
+ )
105
+ RE_UNIQUE_LOCK = re.compile(
106
+ r"\b(?:std::)?unique_lock\s*<[^>]+>\s*([A-Za-z_]\w*)", re.IGNORECASE
107
+ )
108
+ RE_SHARED_LOCK = re.compile(
109
+ r"\b(?:std::)?shared_lock\s*<[^>]+>\s*([A-Za-z_]\w*)", re.IGNORECASE
110
+ )
88
111
  RE_STD_LOCK = re.compile(r"\bstd::lock\s*\(", re.IGNORECASE)
89
112
  RE_SCOPED_LOCK = re.compile(r"\b(?:std::)?scoped_lock\s*<", re.IGNORECASE)
90
113
  # 数据竞争检测相关
91
- RE_STATIC_VAR = re.compile(r"\bstatic\s+(?:const\s+|volatile\s+)?[A-Za-z_]\w*(?:\s+\*|\s+)+([A-Za-z_]\w*)", re.IGNORECASE)
92
- RE_EXTERN_VAR = re.compile(r"\bextern\s+[A-Za-z_]\w*(?:\s+\*|\s+)+([A-Za-z_]\w*)", re.IGNORECASE)
114
+ RE_STATIC_VAR = re.compile(
115
+ r"\bstatic\s+(?:const\s+|volatile\s+)?[A-Za-z_]\w*(?:\s+\*|\s+)+([A-Za-z_]\w*)",
116
+ re.IGNORECASE,
117
+ )
118
+ RE_EXTERN_VAR = re.compile(
119
+ r"\bextern\s+[A-Za-z_]\w*(?:\s+\*|\s+)+([A-Za-z_]\w*)", re.IGNORECASE
120
+ )
93
121
  RE_STD_THREAD = re.compile(r"\b(?:std::)?thread\s+([A-Za-z_]\w*)", re.IGNORECASE)
94
122
  RE_ATOMIC = re.compile(r"\b(?:std::)?atomic\s*<[^>]+>\s*([A-Za-z_]\w*)", re.IGNORECASE)
95
- RE_VOLATILE = re.compile(r"\bvolatile\s+[A-Za-z_]\w*(?:\s+\*|\s+)+([A-Za-z_]\w*)", re.IGNORECASE)
123
+ RE_VOLATILE = re.compile(
124
+ r"\bvolatile\s+[A-Za-z_]\w*(?:\s+\*|\s+)+([A-Za-z_]\w*)", re.IGNORECASE
125
+ )
96
126
  RE_VAR_ACCESS = re.compile(r"\b([A-Za-z_]\w*)\s*(?:=|\[|->|\.)", re.IGNORECASE)
97
127
  RE_VAR_ASSIGN = re.compile(r"\b([A-Za-z_]\w*)\s*=", re.IGNORECASE)
98
128
  RE_INET_LEGACY = re.compile(r"\b(inet_addr|inet_aton)\s*\(", re.IGNORECASE)
@@ -103,20 +133,13 @@ RE_GETENV = re.compile(r'\bgetenv\s*\(\s*"[^"]*"\s*\)', re.IGNORECASE)
103
133
  RE_REALLOC_ASSIGN_BACK = re.compile(
104
134
  r"\b([A-Za-z_]\w*)\s*=\s*realloc\s*\(\s*\1\s*,", re.IGNORECASE
105
135
  )
106
- RE_MALLOC_ASSIGN = re.compile(
107
- r"\b([A-Za-z_]\w*)\s*=\s*malloc\s*\(", re.IGNORECASE
108
- )
109
- RE_CALLOC_ASSIGN = re.compile(
110
- r"\b([A-Za-z_]\w*)\s*=\s*calloc\s*\(", re.IGNORECASE
111
- )
112
- RE_NEW_ASSIGN = re.compile(
113
- r"\b([A-Za-z_]\w*)\s*=\s*new\b", re.IGNORECASE
114
- )
115
- RE_DEREF = re.compile(
116
- r"(\*|->)\s*[A-Za-z_]\w*|\b[A-Za-z_]\w*\s*\[", re.IGNORECASE
117
- )
136
+ RE_MALLOC_ASSIGN = re.compile(r"\b([A-Za-z_]\w*)\s*=\s*malloc\s*\(", re.IGNORECASE)
137
+ RE_CALLOC_ASSIGN = re.compile(r"\b([A-Za-z_]\w*)\s*=\s*calloc\s*\(", re.IGNORECASE)
138
+ RE_NEW_ASSIGN = re.compile(r"\b([A-Za-z_]\w*)\s*=\s*new\b", re.IGNORECASE)
139
+ RE_DEREF = re.compile(r"(\*|->)\s*[A-Za-z_]\w*|\b[A-Za-z_]\w*\s*\[", re.IGNORECASE)
118
140
  RE_NULL_CHECK = re.compile(
119
- r"\bif\s*\(\s*(!\s*)?[A-Za-z_]\w*\s*(==|!=)\s*NULL\s*\)|\bif\s*\(\s*[A-Za-z_]\w*\s*\)", re.IGNORECASE
141
+ r"\bif\s*\(\s*(!\s*)?[A-Za-z_]\w*\s*(==|!=)\s*NULL\s*\)|\bif\s*\(\s*[A-Za-z_]\w*\s*\)",
142
+ re.IGNORECASE,
120
143
  )
121
144
  RE_FREE_VAR = re.compile(r"free\s*\(\s*([A-Za-z_]\w*)\s*\)\s*;", re.IGNORECASE)
122
145
  RE_USE_VAR = re.compile(r"\b([A-Za-z_]\w*)\b")
@@ -129,7 +152,10 @@ RE_STRNCAT = re.compile(r"\bstrncat\s*\(", re.IGNORECASE)
129
152
  RE_SHARED_PTR = re.compile(r"\b(?:std::)?shared_ptr\s*<", re.IGNORECASE)
130
153
  RE_UNIQUE_PTR = re.compile(r"\b(?:std::)?unique_ptr\s*<", re.IGNORECASE)
131
154
  RE_WEAK_PTR = re.compile(r"\b(?:std::)?weak_ptr\s*<", re.IGNORECASE)
132
- RE_SMART_PTR_ASSIGN = re.compile(r"\b([A-Za-z_]\w*)\s*=\s*(?:std::)?(?:shared_ptr|unique_ptr|weak_ptr)\s*<", re.IGNORECASE)
155
+ RE_SMART_PTR_ASSIGN = re.compile(
156
+ r"\b([A-Za-z_]\w*)\s*=\s*(?:std::)?(?:shared_ptr|unique_ptr|weak_ptr)\s*<",
157
+ re.IGNORECASE,
158
+ )
133
159
  RE_NEW_ARRAY = re.compile(r"\bnew\s+[A-Za-z_]\w*\s*\[", re.IGNORECASE)
134
160
  RE_DELETE_ARRAY = re.compile(r"\bdelete\s*\[\s*\]", re.IGNORECASE)
135
161
  RE_DELETE = re.compile(r"\bdelete\s+(?!\[)", re.IGNORECASE)
@@ -137,10 +163,18 @@ RE_STATIC_CAST = re.compile(r"\bstatic_cast\s*<", re.IGNORECASE)
137
163
  RE_DYNAMIC_CAST = re.compile(r"\bdynamic_cast\s*<", re.IGNORECASE)
138
164
  RE_REINTERPRET_CAST = re.compile(r"\breinterpret_cast\s*<", re.IGNORECASE)
139
165
  RE_CONST_CAST = re.compile(r"\bconst_cast\s*<", re.IGNORECASE)
140
- RE_VECTOR_ACCESS = re.compile(r"\b(?:std::)?vector\s*<[^>]+>\s*[A-Za-z_]\w*\s*\[", re.IGNORECASE)
141
- RE_STRING_ACCESS = re.compile(r"\b(?:std::)?(?:string|wstring)\s*[A-Za-z_]\w*\s*\[", re.IGNORECASE)
142
- RE_VECTOR_VAR = re.compile(r"\b(?:std::)?vector\s*<[^>]+>\s*([A-Za-z_]\w*)", re.IGNORECASE)
143
- RE_STRING_VAR = re.compile(r"\b(?:std::)?(?:string|wstring)\s+([A-Za-z_]\w*)", re.IGNORECASE)
166
+ RE_VECTOR_ACCESS = re.compile(
167
+ r"\b(?:std::)?vector\s*<[^>]+>\s*[A-Za-z_]\w*\s*\[", re.IGNORECASE
168
+ )
169
+ RE_STRING_ACCESS = re.compile(
170
+ r"\b(?:std::)?(?:string|wstring)\s*[A-Za-z_]\w*\s*\[", re.IGNORECASE
171
+ )
172
+ RE_VECTOR_VAR = re.compile(
173
+ r"\b(?:std::)?vector\s*<[^>]+>\s*([A-Za-z_]\w*)", re.IGNORECASE
174
+ )
175
+ RE_STRING_VAR = re.compile(
176
+ r"\b(?:std::)?(?:string|wstring)\s+([A-Za-z_]\w*)", re.IGNORECASE
177
+ )
144
178
  RE_AT_METHOD = re.compile(r"\.at\s*\(", re.IGNORECASE)
145
179
  RE_VIRTUAL_DTOR = re.compile(r"\bvirtual\s+~[A-Za-z_]\w*\s*\(", re.IGNORECASE)
146
180
  RE_CLASS_DECL = re.compile(r"\bclass\s+([A-Za-z_]\w*)", re.IGNORECASE)
@@ -157,6 +191,7 @@ RE_NOEXCEPT = re.compile(r"\bnoexcept\s*(?:\([^)]*\))?", re.IGNORECASE)
157
191
  # 公共工具
158
192
  # ---------------------------
159
193
 
194
+
160
195
  def _safe_line(lines: Sequence[str], idx: int) -> str:
161
196
  if 1 <= idx <= len(lines):
162
197
  return lines[idx - 1]
@@ -168,7 +203,9 @@ def _strip_line(s: str, max_len: int = 200) -> str:
168
203
  return s if len(s) <= max_len else s[: max_len - 3] + "..."
169
204
 
170
205
 
171
- def _window(lines: Sequence[str], center: int, before: int = 3, after: int = 3) -> List[Tuple[int, str]]:
206
+ def _window(
207
+ lines: Sequence[str], center: int, before: int = 3, after: int = 3
208
+ ) -> List[Tuple[int, str]]:
172
209
  start = max(1, center - before)
173
210
  end = min(len(lines), center + after)
174
211
  return [(i, _safe_line(lines, i)) for i in range(start, end + 1)]
@@ -185,8 +222,8 @@ def _remove_comments_preserve_strings(text: str) -> str:
185
222
  n = len(text)
186
223
  in_sl_comment = False # //
187
224
  in_bl_comment = False # /* */
188
- in_string = False # "
189
- in_char = False # '
225
+ in_string = False # "
226
+ in_char = False # '
190
227
  escape = False
191
228
 
192
229
  while i < n:
@@ -337,7 +374,9 @@ def _strip_if0_blocks(text: str) -> str:
337
374
  """
338
375
  lines = text.splitlines(keepends=True)
339
376
  out: list[str] = []
340
- stack: list[dict] = [] # 每帧:{"kind": "if0"|"if", "skipping": bool, "in_else": bool}
377
+ stack: list[
378
+ dict
379
+ ] = [] # 每帧:{"kind": "if0"|"if", "skipping": bool, "in_else": bool}
341
380
 
342
381
  def any_skipping() -> bool:
343
382
  return any(frame.get("skipping", False) for frame in stack)
@@ -351,7 +390,9 @@ def _strip_if0_blocks(text: str) -> str:
351
390
  if re.match(r"^\s*#\s*if\b", line):
352
391
  # 其他 #if:不求值,仅记录,继承外层 skipping
353
392
  stack.append({"kind": "if", "skipping": any_skipping(), "in_else": False})
354
- out.append(line if not any_skipping() else ("\n" if line.endswith("\n") else ""))
393
+ out.append(
394
+ line if not any_skipping() else ("\n" if line.endswith("\n") else "")
395
+ )
355
396
  continue
356
397
  if re.match(r"^\s*#\s*else\b", line):
357
398
  if stack:
@@ -360,12 +401,16 @@ def _strip_if0_blocks(text: str) -> str:
360
401
  # #if 0 的 else:翻转 skipping,使 else 分支有效
361
402
  top["skipping"] = not top["skipping"]
362
403
  top["in_else"] = True
363
- out.append(line if not any_skipping() else ("\n" if line.endswith("\n") else ""))
404
+ out.append(
405
+ line if not any_skipping() else ("\n" if line.endswith("\n") else "")
406
+ )
364
407
  continue
365
408
  if re.match(r"^\s*#\s*endif\b", line):
366
409
  if stack:
367
410
  stack.pop()
368
- out.append(line if not any_skipping() else ("\n" if line.endswith("\n") else ""))
411
+ out.append(
412
+ line if not any_skipping() else ("\n" if line.endswith("\n") else "")
413
+ )
369
414
  continue
370
415
  # 常规代码
371
416
  if any_skipping():
@@ -375,7 +420,9 @@ def _strip_if0_blocks(text: str) -> str:
375
420
  return "".join(out)
376
421
 
377
422
 
378
- def _has_null_check_around(var: str, lines: Sequence[str], line_no: int, radius: int = 5) -> bool:
423
+ def _has_null_check_around(
424
+ var: str, lines: Sequence[str], line_no: int, radius: int = 5
425
+ ) -> bool:
379
426
  """
380
427
  扩展空指针检查识别能力,减少误报:
381
428
  - if (ptr) / if (!ptr)
@@ -405,7 +452,19 @@ def _has_null_check_around(var: str, lines: Sequence[str], line_no: int, radius:
405
452
  def _has_len_bound_around(lines: Sequence[str], line_no: int, radius: int = 3) -> bool:
406
453
  for _, s in _window(lines, line_no, before=radius, after=radius):
407
454
  # 检测是否出现长度上界/检查(非常粗略)
408
- if any(k in s for k in ["sizeof(", "BUFFER_SIZE", "MAX_", "min(", "clamp(", "snprintf", "strlcpy", "strlcat"]):
455
+ if any(
456
+ k in s
457
+ for k in [
458
+ "sizeof(",
459
+ "BUFFER_SIZE",
460
+ "MAX_",
461
+ "min(",
462
+ "clamp(",
463
+ "snprintf",
464
+ "strlcpy",
465
+ "strlcat",
466
+ ]
467
+ ):
409
468
  return True
410
469
  return False
411
470
 
@@ -423,10 +482,13 @@ def _severity_from_confidence(conf: float, base: str) -> str:
423
482
  # 具体验证规则
424
483
  # ---------------------------
425
484
 
485
+
426
486
  def _rule_unsafe_api(lines: Sequence[str], relpath: str) -> List[Issue]:
427
487
  issues: List[Issue] = []
428
488
  is_header = str(relpath).lower().endswith((".h", ".hpp"))
429
- re_type_kw = re.compile(r"\b(static|inline|const|volatile|unsigned|signed|long|short|int|char|void|size_t|ssize_t)\b")
489
+ re_type_kw = re.compile(
490
+ r"\b(static|inline|const|volatile|unsigned|signed|long|short|int|char|void|size_t|ssize_t)\b"
491
+ )
430
492
  for idx, s in enumerate(lines, start=1):
431
493
  # 跳过预处理行与声明行,减少原型/宏中的误报
432
494
  t = s.lstrip()
@@ -488,7 +550,11 @@ def _rule_boundary_funcs(lines: Sequence[str], relpath: str) -> List[Issue]:
488
550
  # 通常为更安全的写法:降低误报(直接跳过告警)
489
551
  safe_sizeof = False
490
552
  if api.lower() in ("memcpy", "memmove") and args:
491
- if "sizeof" in args and not RE_SIZEOF_PTR.search(args) and not RE_STRLEN_IN_SIZE.search(args):
553
+ if (
554
+ "sizeof" in args
555
+ and not RE_SIZEOF_PTR.search(args)
556
+ and not RE_STRLEN_IN_SIZE.search(args)
557
+ ):
492
558
  safe_sizeof = True
493
559
  if safe_sizeof:
494
560
  # 跳过该条,以提高准确性(避免将安全写法误报为风险)
@@ -630,7 +696,11 @@ def _rule_uaf_suspect(lines: Sequence[str], relpath: str) -> List[Issue]:
630
696
  reassigned = True
631
697
  break
632
698
  # 检测明显的解引用使用
633
- if deref_arrow.search(sj) or deref_star.search(sj) or deref_index.search(sj):
699
+ if (
700
+ deref_arrow.search(sj)
701
+ or deref_star.search(sj)
702
+ or deref_index.search(sj)
703
+ ):
634
704
  uaf_evidence_line = j
635
705
  break
636
706
 
@@ -666,8 +736,12 @@ def _rule_unchecked_io(lines: Sequence[str], relpath: str) -> List[Issue]:
666
736
  continue
667
737
 
668
738
  # 若本行/紧随其后 2 行出现条件判断,认为已检查(直接跳过)
669
- nearby = " ".join(_safe_line(lines, i) for i in range(idx, min(idx + 2, len(lines)) + 1))
670
- if re.search(r"\b(if|while|for)\s*\(", nearby) or re.search(r"(>=|<=|==|!=|<|>)", nearby):
739
+ nearby = " ".join(
740
+ _safe_line(lines, i) for i in range(idx, min(idx + 2, len(lines)) + 1)
741
+ )
742
+ if re.search(r"\b(if|while|for)\s*\(", nearby) or re.search(
743
+ r"(>=|<=|==|!=|<|>)", nearby
744
+ ):
671
745
  continue
672
746
 
673
747
  # 若赋值给变量,则在后续窗口内寻找对该变量的检查
@@ -684,8 +758,12 @@ def _rule_unchecked_io(lines: Sequence[str], relpath: str) -> List[Issue]:
684
758
  checked_via_var = False
685
759
  if assigned_var:
686
760
  end = min(len(lines), idx + 5)
687
- var_pat_cond = re.compile(rf"\b(if|while|for)\s*\([^)]*\b{re.escape(assigned_var)}\b[^)]*\)")
688
- var_pat_cmp = re.compile(rf"\b{re.escape(assigned_var)}\b\s*(>=|<=|==|!=|<|>)")
761
+ var_pat_cond = re.compile(
762
+ rf"\b(if|while|for)\s*\([^)]*\b{re.escape(assigned_var)}\b[^)]*\)"
763
+ )
764
+ var_pat_cmp = re.compile(
765
+ rf"\b{re.escape(assigned_var)}\b\s*(>=|<=|==|!=|<|>)"
766
+ )
689
767
  for j in range(idx + 1, end + 1):
690
768
  sj = _safe_line(lines, j)
691
769
  if var_pat_cond.search(sj) or var_pat_cmp.search(sj):
@@ -721,7 +799,10 @@ def _rule_strncpy_no_nullterm(lines: Sequence[str], relpath: str) -> List[Issue]
721
799
  conf = 0.55
722
800
  # 若邻近窗口未出现手动 '\0' 终止或显式长度-1 等处理,提升风险
723
801
  window_text = " ".join(t for _, t in _window(lines, idx, before=1, after=2))
724
- if not re.search(r"\\0|'\0'|\"\\0\"|len\s*-\s*1|sizeof\s*\(\s*\w+\s*\)\s*-\s*1", window_text):
802
+ if not re.search(
803
+ r"\\0|'\0'|\"\\0\"|len\s*-\s*1|sizeof\s*\(\s*\w+\s*\)\s*-\s*1",
804
+ window_text,
805
+ ):
725
806
  conf += 0.15
726
807
  issues.append(
727
808
  Issue(
@@ -748,6 +829,7 @@ def _rule_strncpy_no_nullterm(lines: Sequence[str], relpath: str) -> List[Issue]
748
829
  # 额外规则(新增)
749
830
  # ---------------------------
750
831
 
832
+
751
833
  def _rule_format_string(lines: Sequence[str], relpath: str) -> List[Issue]:
752
834
  """
753
835
  检测格式化字符串漏洞:printf/sprintf/snprintf/vsprintf/vsnprintf 的格式参数不是字面量;
@@ -758,7 +840,15 @@ def _rule_format_string(lines: Sequence[str], relpath: str) -> List[Issue]:
758
840
  - 针对不同函数,准确定位“格式串”所在的参数位置:
759
841
  printf: 第1参;sprintf/vsprintf: 第2参;snprintf/vsnprintf: 第3参;fprintf: 第2参
760
842
  """
761
- SAFE_WRAPPERS = ("_", "gettext", "dgettext", "ngettext", "tr", "QT_TR_NOOP", "QT_TRANSLATE_NOOP")
843
+ SAFE_WRAPPERS = (
844
+ "_",
845
+ "gettext",
846
+ "dgettext",
847
+ "ngettext",
848
+ "tr",
849
+ "QT_TR_NOOP",
850
+ "QT_TRANSLATE_NOOP",
851
+ )
762
852
  issues: List[Issue] = []
763
853
 
764
854
  def _arg_is_literal(s: str, j: int) -> bool:
@@ -789,7 +879,9 @@ def _rule_format_string(lines: Sequence[str], relpath: str) -> List[Issue]:
789
879
  return s[j:k]
790
880
  return None
791
881
 
792
- def _var_assigned_literal(var: str, lines: Sequence[str], upto_idx: int, lookback: int = 5) -> bool:
882
+ def _var_assigned_literal(
883
+ var: str, lines: Sequence[str], upto_idx: int, lookback: int = 5
884
+ ) -> bool:
793
885
  start = max(1, upto_idx - lookback)
794
886
  pat_assign = re.compile(rf"\b{re.escape(var)}\s*=\s*")
795
887
  for j in range(start, upto_idx):
@@ -877,12 +969,14 @@ def _rule_format_string(lines: Sequence[str], relpath: str) -> List[Issue]:
877
969
  if j is not None:
878
970
  # 字面量/包装字面量/回看字面量赋值的变量
879
971
  if not _arg_is_literal(s, j):
880
- if (s[j].isalpha() or s[j] == "_"):
972
+ if s[j].isalpha() or s[j] == "_":
881
973
  if _arg_is_wrapper_literal(s, j):
882
974
  flagged = False
883
975
  else:
884
976
  ident = _leading_ident(s, j)
885
- if ident and _var_assigned_literal(ident, lines, idx, lookback=5):
977
+ if ident and _var_assigned_literal(
978
+ ident, lines, idx, lookback=5
979
+ ):
886
980
  flagged = False
887
981
  else:
888
982
  flagged = True
@@ -902,12 +996,14 @@ def _rule_format_string(lines: Sequence[str], relpath: str) -> List[Issue]:
902
996
  j = _nth_arg_start(s, open_idx, 2)
903
997
  if j is not None:
904
998
  if not _arg_is_literal(s, j):
905
- if (s[j].isalpha() or s[j] == "_"):
999
+ if s[j].isalpha() or s[j] == "_":
906
1000
  if _arg_is_wrapper_literal(s, j):
907
1001
  flagged = False
908
1002
  else:
909
1003
  ident = _leading_ident(s, j)
910
- if ident and _var_assigned_literal(ident, lines, idx, lookback=5):
1004
+ if ident and _var_assigned_literal(
1005
+ ident, lines, idx, lookback=5
1006
+ ):
911
1007
  flagged = False
912
1008
  else:
913
1009
  flagged = True
@@ -987,7 +1083,9 @@ def _rule_command_execution(lines: Sequence[str], relpath: str) -> List[Issue]:
987
1083
  return s[j:k]
988
1084
  return None
989
1085
 
990
- def _var_assigned_literal(var: str, lines: Sequence[str], upto_idx: int, lookback: int = 5) -> bool:
1086
+ def _var_assigned_literal(
1087
+ var: str, lines: Sequence[str], upto_idx: int, lookback: int = 5
1088
+ ) -> bool:
991
1089
  # 在前 lookback 行内查找 var = "..."
992
1090
  start = max(1, upto_idx - lookback)
993
1091
  pat_assign = re.compile(rf"\b{re.escape(var)}\s*=\s*")
@@ -1026,7 +1124,9 @@ def _rule_command_execution(lines: Sequence[str], relpath: str) -> List[Issue]:
1026
1124
  start = s.index("(", m_exec.start())
1027
1125
  if not _arg_is_literal_or_wrapper(s, start):
1028
1126
  ident = _first_arg_identifier(s, start)
1029
- if ident and _var_assigned_literal(ident, lines, idx, lookback=5):
1127
+ if ident and _var_assigned_literal(
1128
+ ident, lines, idx, lookback=5
1129
+ ):
1030
1130
  flagged = False
1031
1131
  else:
1032
1132
  flagged = True
@@ -1084,7 +1184,7 @@ def _rule_scanf_no_width(lines: Sequence[str], relpath: str) -> List[Issue]:
1084
1184
  line=idx,
1085
1185
  evidence=_strip_line(s),
1086
1186
  description="scanf/sscanf/fscanf 使用 %s 但未限制最大宽度,存在缓冲区溢出风险。",
1087
- suggestion="为 %s 指定最大宽度(如 \"%255s\"),或使用更安全的读取方式;若使用 GNU 扩展 %ms/%m[...] 请确保对返回内存进行释放。",
1187
+ suggestion='为 %s 指定最大宽度(如 "%255s"),或使用更安全的读取方式;若使用 GNU 扩展 %ms/%m[...] 请确保对返回内存进行释放。',
1088
1188
  confidence=0.75,
1089
1189
  severity="high",
1090
1190
  )
@@ -1131,6 +1231,7 @@ def _rule_alloc_size_overflow(lines: Sequence[str], relpath: str) -> List[Issue]
1131
1231
  # 空指针/野指针/死锁 等新增规则
1132
1232
  # ---------------------------
1133
1233
 
1234
+
1134
1235
  def _rule_possible_null_deref(lines: Sequence[str], relpath: str) -> List[Issue]:
1135
1236
  """
1136
1237
  启发式检测空指针解引用:
@@ -1143,7 +1244,9 @@ def _rule_possible_null_deref(lines: Sequence[str], relpath: str) -> List[Issue]
1143
1244
  issues: List[Issue] = []
1144
1245
  re_arrow = re.compile(r"\b([A-Za-z_]\w*)\s*->")
1145
1246
  re_star = re.compile(r"(?<!\w)\*\s*([A-Za-z_]\w*)\b")
1146
- type_kw = re.compile(r"\b(typedef|struct|union|enum|class|char|int|long|short|void|size_t|ssize_t|FILE)\b")
1247
+ type_kw = re.compile(
1248
+ r"\b(typedef|struct|union|enum|class|char|int|long|short|void|size_t|ssize_t|FILE)\b"
1249
+ )
1147
1250
 
1148
1251
  def _is_deref_context(line: str, star_pos: int) -> bool:
1149
1252
  k = star_pos - 1
@@ -1197,7 +1300,9 @@ def _rule_uninitialized_ptr_use(lines: Sequence[str], relpath: str) -> List[Issu
1197
1300
  # 收集候选未初始化指针声明
1198
1301
  candidates = [] # (var, decl_line)
1199
1302
  decl_ptr_line = re.compile(r"\*")
1200
- type_prefix = re.compile(r"\b(typedef|struct|union|enum|class|const|volatile|static|register|signed|unsigned|char|int|long|short|void|float|double)\b")
1303
+ type_prefix = re.compile(
1304
+ r"\b(typedef|struct|union|enum|class|const|volatile|static|register|signed|unsigned|char|int|long|short|void|float|double)\b"
1305
+ )
1201
1306
  for idx, s in enumerate(lines, start=1):
1202
1307
  if ";" not in s or "(" in s or "=" in s:
1203
1308
  continue
@@ -1223,7 +1328,9 @@ def _rule_uninitialized_ptr_use(lines: Sequence[str], relpath: str) -> List[Issu
1223
1328
  initialized = True
1224
1329
  break
1225
1330
  # 解引用:p-> 或 *p
1226
- if re.search(rf"\b{re.escape(v)}\s*->", sj) or re.search(rf"(?<!\w)\*\s*{re.escape(v)}\b", sj):
1331
+ if re.search(rf"\b{re.escape(v)}\s*->", sj) or re.search(
1332
+ rf"(?<!\w)\*\s*{re.escape(v)}\b", sj
1333
+ ):
1227
1334
  deref_line = j
1228
1335
  # 若命中,若附近没有 NULL 检查/初始化则认为风险较高
1229
1336
  break
@@ -1352,7 +1459,10 @@ def _rule_deadlock_patterns(lines: Sequence[str], relpath: str) -> List[Issue]:
1352
1459
  # 其他危险用法规则(新增一批低误报)
1353
1460
  # ---------------------------
1354
1461
 
1355
- def _rule_double_free_and_free_non_heap(lines: Sequence[str], relpath: str) -> List[Issue]:
1462
+
1463
+ def _rule_double_free_and_free_non_heap(
1464
+ lines: Sequence[str], relpath: str
1465
+ ) -> List[Issue]:
1356
1466
  """
1357
1467
  检测:
1358
1468
  - double_free:同一指针在未重新赋值/置空情况下被重复 free
@@ -1374,7 +1484,9 @@ def _rule_double_free_and_free_non_heap(lines: Sequence[str], relpath: str) -> L
1374
1484
  arg = m.group(1).strip()
1375
1485
 
1376
1486
  # 忽略 free(NULL)/free(0)
1377
- if re.fullmatch(r"\(?\s*(NULL|0|\(void\s*\*\)\s*0)\s*\)?", arg, re.IGNORECASE):
1487
+ if re.fullmatch(
1488
+ r"\(?\s*(NULL|0|\(void\s*\*\)\s*0)\s*\)?", arg, re.IGNORECASE
1489
+ ):
1378
1490
  continue
1379
1491
 
1380
1492
  # 明显非堆:&... 或 字符串字面量
@@ -1451,11 +1563,24 @@ def _rule_rand_insecure(lines: Sequence[str], relpath: str) -> List[Issue]:
1451
1563
  检测 rand/srand 的使用。若上下文包含安全敏感关键词,提升风险。
1452
1564
  """
1453
1565
  issues: List[Issue] = []
1454
- keywords = ("token", "nonce", "secret", "password", "passwd", "key", "auth", "salt", "session", "otp")
1566
+ keywords = (
1567
+ "token",
1568
+ "nonce",
1569
+ "secret",
1570
+ "password",
1571
+ "passwd",
1572
+ "key",
1573
+ "auth",
1574
+ "salt",
1575
+ "session",
1576
+ "otp",
1577
+ )
1455
1578
  for idx, s in enumerate(lines, start=1):
1456
1579
  if RE_RAND.search(s):
1457
1580
  conf = 0.55
1458
- window_text = " ".join(t for _, t in _window(lines, idx, before=1, after=1)).lower()
1581
+ window_text = " ".join(
1582
+ t for _, t in _window(lines, idx, before=1, after=1)
1583
+ ).lower()
1459
1584
  if any(k in window_text for k in keywords):
1460
1585
  conf += 0.2
1461
1586
  issues.append(
@@ -1506,7 +1631,18 @@ def _rule_open_permissive_perms(lines: Sequence[str], relpath: str) -> List[Issu
1506
1631
  - fopen(..., "w"/"w+") 在安全敏感上下文可提示收紧权限(基于关键词启发)
1507
1632
  """
1508
1633
  issues: List[Issue] = []
1509
- sensitive_keys = ("key", "secret", "token", "passwd", "password", "cred", "config", "cert", "private", "id_rsa")
1634
+ sensitive_keys = (
1635
+ "key",
1636
+ "secret",
1637
+ "token",
1638
+ "passwd",
1639
+ "password",
1640
+ "cred",
1641
+ "config",
1642
+ "cert",
1643
+ "private",
1644
+ "id_rsa",
1645
+ )
1510
1646
  for idx, s in enumerate(lines, start=1):
1511
1647
  m = RE_OPEN_PERMISSIVE.search(s)
1512
1648
  if m:
@@ -1530,7 +1666,9 @@ def _rule_open_permissive_perms(lines: Sequence[str], relpath: str) -> List[Issu
1530
1666
  if m2:
1531
1667
  mode = m2.group(1)
1532
1668
  if "w" in mode:
1533
- window = " ".join(t for _, t in _window(lines, idx, before=1, after=1)).lower()
1669
+ window = " ".join(
1670
+ t for _, t in _window(lines, idx, before=1, after=1)
1671
+ ).lower()
1534
1672
  if any(k in window for k in sensitive_keys):
1535
1673
  issues.append(
1536
1674
  Issue(
@@ -1553,6 +1691,7 @@ def _rule_open_permissive_perms(lines: Sequence[str], relpath: str) -> List[Issu
1553
1691
  # 更多危险用法规则(第二批)
1554
1692
  # ---------------------------
1555
1693
 
1694
+
1556
1695
  def _rule_alloca_unbounded(lines: Sequence[str], relpath: str) -> List[Issue]:
1557
1696
  """
1558
1697
  检测 alloca 使用非常量/未受控大小,可能导致栈耗尽或崩溃。
@@ -1596,7 +1735,9 @@ def _rule_vla_usage(lines: Sequence[str], relpath: str) -> List[Issue]:
1596
1735
  仅在长度非纯数字常量时提示。
1597
1736
  """
1598
1737
  issues: List[Issue] = []
1599
- type_prefix = re.compile(r"\b(typedef|struct|union|enum|class|const|volatile|static|register|signed|unsigned|char|int|long|short|void|float|double|size_t|ssize_t)\b")
1738
+ type_prefix = re.compile(
1739
+ r"\b(typedef|struct|union|enum|class|const|volatile|static|register|signed|unsigned|char|int|long|short|void|float|double|size_t|ssize_t)\b"
1740
+ )
1600
1741
  for idx, s in enumerate(lines, start=1):
1601
1742
  if ";" not in s or "=" in s:
1602
1743
  continue
@@ -1636,7 +1777,9 @@ def _rule_pthread_returns_unchecked(lines: Sequence[str], relpath: str) -> List[
1636
1777
  for idx, s in enumerate(lines, start=1):
1637
1778
  if not RE_PTHREAD_RET.search(s):
1638
1779
  continue
1639
- nearby = " ".join(_safe_line(lines, i) for i in range(idx, min(idx + 2, len(lines)) + 1))
1780
+ nearby = " ".join(
1781
+ _safe_line(lines, i) for i in range(idx, min(idx + 2, len(lines)) + 1)
1782
+ )
1640
1783
  if not re.search(r"\bif\s*\(|>=|<=|==|!=|<|>", nearby):
1641
1784
  issues.append(
1642
1785
  Issue(
@@ -1810,6 +1953,7 @@ def _rule_getenv_unchecked(lines: Sequence[str], relpath: str) -> List[Issue]:
1810
1953
  # C++ 特定检查规则
1811
1954
  # ---------------------------
1812
1955
 
1956
+
1813
1957
  def _rule_new_delete_mismatch(lines: Sequence[str], relpath: str) -> List[Issue]:
1814
1958
  """
1815
1959
  检测 new[]/delete[] 和 new/delete 的匹配问题:
@@ -1819,7 +1963,7 @@ def _rule_new_delete_mismatch(lines: Sequence[str], relpath: str) -> List[Issue]
1819
1963
  issues: List[Issue] = []
1820
1964
  new_array_vars: dict[str, int] = {} # var -> line_no
1821
1965
  new_vars: dict[str, int] = {} # var -> line_no
1822
-
1966
+
1823
1967
  # 收集 new[] 和 new 的分配
1824
1968
  for idx, s in enumerate(lines, start=1):
1825
1969
  # new[] 分配
@@ -1830,13 +1974,13 @@ def _rule_new_delete_mismatch(lines: Sequence[str], relpath: str) -> List[Issue]
1830
1974
  if assign_match:
1831
1975
  var = assign_match.group(1)
1832
1976
  new_array_vars[var] = idx
1833
-
1977
+
1834
1978
  # new 分配(非数组)
1835
1979
  m_new = re.search(r"\b([A-Za-z_]\w*)\s*=\s*new\s+(?!.*\[)", s, re.IGNORECASE)
1836
1980
  if m_new:
1837
1981
  var = m_new.group(1)
1838
1982
  new_vars[var] = idx
1839
-
1983
+
1840
1984
  # 检查 delete[] 和 delete 的使用
1841
1985
  for idx, s in enumerate(lines, start=1):
1842
1986
  # delete[] 使用
@@ -1861,7 +2005,7 @@ def _rule_new_delete_mismatch(lines: Sequence[str], relpath: str) -> List[Issue]
1861
2005
  severity="high",
1862
2006
  )
1863
2007
  )
1864
-
2008
+
1865
2009
  # delete 使用(非数组)
1866
2010
  if RE_DELETE.search(s):
1867
2011
  m = re.search(r"delete\s+([A-Za-z_]\w*)", s, re.IGNORECASE)
@@ -1883,7 +2027,7 @@ def _rule_new_delete_mismatch(lines: Sequence[str], relpath: str) -> List[Issue]
1883
2027
  severity="high",
1884
2028
  )
1885
2029
  )
1886
-
2030
+
1887
2031
  return issues
1888
2032
 
1889
2033
 
@@ -1951,7 +2095,7 @@ def _rule_vector_string_bounds_check(lines: Sequence[str], relpath: str) -> List
1951
2095
  issues: List[Issue] = []
1952
2096
  vector_vars: set[str] = set()
1953
2097
  string_vars: set[str] = set()
1954
-
2098
+
1955
2099
  # 先收集 vector 和 string 变量
1956
2100
  for idx, s in enumerate(lines, start=1):
1957
2101
  m = RE_VECTOR_VAR.search(s)
@@ -1960,7 +2104,7 @@ def _rule_vector_string_bounds_check(lines: Sequence[str], relpath: str) -> List
1960
2104
  m = RE_STRING_VAR.search(s)
1961
2105
  if m:
1962
2106
  string_vars.add(m.group(1))
1963
-
2107
+
1964
2108
  for idx, s in enumerate(lines, start=1):
1965
2109
  # vector 访问:检测 var[...] 模式
1966
2110
  for var in vector_vars:
@@ -1968,8 +2112,14 @@ def _rule_vector_string_bounds_check(lines: Sequence[str], relpath: str) -> List
1968
2112
  # 检查是否使用了 .at()(安全访问)
1969
2113
  if not RE_AT_METHOD.search(s):
1970
2114
  # 检查附近是否有边界检查
1971
- window_text = " ".join(t for _, t in _window(lines, idx, before=2, after=2))
1972
- if not re.search(rf"\b{re.escape(var)}\s*\.(size|length|empty|at)\s*\(", window_text, re.IGNORECASE):
2115
+ window_text = " ".join(
2116
+ t for _, t in _window(lines, idx, before=2, after=2)
2117
+ )
2118
+ if not re.search(
2119
+ rf"\b{re.escape(var)}\s*\.(size|length|empty|at)\s*\(",
2120
+ window_text,
2121
+ re.IGNORECASE,
2122
+ ):
1973
2123
  issues.append(
1974
2124
  Issue(
1975
2125
  language="c/cpp",
@@ -1985,13 +2135,19 @@ def _rule_vector_string_bounds_check(lines: Sequence[str], relpath: str) -> List
1985
2135
  )
1986
2136
  )
1987
2137
  break # 每行只报告一次
1988
-
2138
+
1989
2139
  # string 访问:检测 var[...] 模式
1990
2140
  for var in string_vars:
1991
2141
  if re.search(rf"\b{re.escape(var)}\s*\[", s):
1992
2142
  if not RE_AT_METHOD.search(s):
1993
- window_text = " ".join(t for _, t in _window(lines, idx, before=2, after=2))
1994
- if not re.search(rf"\b{re.escape(var)}\s*\.(size|length|empty|at)\s*\(", window_text, re.IGNORECASE):
2143
+ window_text = " ".join(
2144
+ t for _, t in _window(lines, idx, before=2, after=2)
2145
+ )
2146
+ if not re.search(
2147
+ rf"\b{re.escape(var)}\s*\.(size|length|empty|at)\s*\(",
2148
+ window_text,
2149
+ re.IGNORECASE,
2150
+ ):
1995
2151
  issues.append(
1996
2152
  Issue(
1997
2153
  language="c/cpp",
@@ -2016,37 +2172,43 @@ def _rule_missing_virtual_dtor(lines: Sequence[str], relpath: str) -> List[Issue
2016
2172
  启发式:检测 class 声明,若存在虚函数但析构函数非虚,则提示。
2017
2173
  """
2018
2174
  issues: List[Issue] = []
2019
- classes: dict[str, dict] = {} # class_name -> {"line": int, "has_virtual": bool, "has_virtual_dtor": bool}
2175
+ classes: dict[
2176
+ str, dict
2177
+ ] = {} # class_name -> {"line": int, "has_virtual": bool, "has_virtual_dtor": bool}
2020
2178
  current_class: Optional[str] = None
2021
2179
  in_class = False
2022
2180
  brace_depth = 0
2023
-
2181
+
2024
2182
  for idx, s in enumerate(lines, start=1):
2025
2183
  # 检测 class 声明
2026
2184
  m_class = RE_CLASS_DECL.search(s)
2027
2185
  if m_class:
2028
2186
  class_name = m_class.group(1)
2029
- classes[class_name] = {"line": idx, "has_virtual": False, "has_virtual_dtor": False}
2187
+ classes[class_name] = {
2188
+ "line": idx,
2189
+ "has_virtual": False,
2190
+ "has_virtual_dtor": False,
2191
+ }
2030
2192
  current_class = class_name
2031
2193
  in_class = True
2032
2194
  brace_depth = s.count("{") - s.count("}")
2033
2195
  continue
2034
-
2196
+
2035
2197
  if in_class and current_class:
2036
2198
  brace_depth += s.count("{") - s.count("}")
2037
2199
  if brace_depth <= 0:
2038
2200
  in_class = False
2039
2201
  current_class = None
2040
2202
  continue
2041
-
2203
+
2042
2204
  # 检测虚函数
2043
2205
  if re.search(r"\bvirtual\s+[^~]", s, re.IGNORECASE):
2044
2206
  classes[current_class]["has_virtual"] = True
2045
-
2207
+
2046
2208
  # 检测虚析构函数
2047
2209
  if RE_VIRTUAL_DTOR.search(s):
2048
2210
  classes[current_class]["has_virtual_dtor"] = True
2049
-
2211
+
2050
2212
  # 检查有虚函数但无虚析构函数的类
2051
2213
  for class_name, info in classes.items():
2052
2214
  if info["has_virtual"] and not info["has_virtual_dtor"]:
@@ -2064,7 +2226,7 @@ def _rule_missing_virtual_dtor(lines: Sequence[str], relpath: str) -> List[Issue
2064
2226
  severity="high",
2065
2227
  )
2066
2228
  )
2067
-
2229
+
2068
2230
  return issues
2069
2231
 
2070
2232
 
@@ -2074,20 +2236,22 @@ def _rule_move_after_use(lines: Sequence[str], relpath: str) -> List[Issue]:
2074
2236
  """
2075
2237
  issues: List[Issue] = []
2076
2238
  moved_vars: dict[str, int] = {} # var -> line_no
2077
-
2239
+
2078
2240
  for idx, s in enumerate(lines, start=1):
2079
2241
  # 检测 std::move 赋值
2080
2242
  m = RE_MOVE_ASSIGN.search(s)
2081
2243
  if m:
2082
2244
  var = m.group(1)
2083
2245
  moved_vars[var] = idx
2084
-
2246
+
2085
2247
  # 检测移动后的使用
2086
2248
  vars_to_remove: set[str] = set() # 收集要删除的键,避免在遍历时修改字典
2087
2249
  for var, move_line in moved_vars.items():
2088
2250
  if idx > move_line and idx <= move_line + 10: # 在移动后 10 行内
2089
2251
  # 检测变量使用(排除重新赋值)
2090
- if re.search(rf"\b{re.escape(var)}\b", s) and not re.search(rf"\b{re.escape(var)}\s*=", s):
2252
+ if re.search(rf"\b{re.escape(var)}\b", s) and not re.search(
2253
+ rf"\b{re.escape(var)}\s*=", s
2254
+ ):
2091
2255
  # 检查是否是重新赋值(重置移动状态)
2092
2256
  if re.search(rf"\b{re.escape(var)}\s*=\s*(?!std::move)", s):
2093
2257
  # 重新赋值,移除记录
@@ -2111,11 +2275,11 @@ def _rule_move_after_use(lines: Sequence[str], relpath: str) -> List[Issue]:
2111
2275
  )
2112
2276
  # 移除记录,避免重复报告
2113
2277
  vars_to_remove.add(var)
2114
-
2278
+
2115
2279
  # 遍历结束后再删除
2116
2280
  for var in vars_to_remove:
2117
2281
  moved_vars.pop(var, None)
2118
-
2282
+
2119
2283
  return issues
2120
2284
 
2121
2285
 
@@ -2127,17 +2291,21 @@ def _rule_uncaught_exception(lines: Sequence[str], relpath: str) -> List[Issue]:
2127
2291
  for idx, s in enumerate(lines, start=1):
2128
2292
  if RE_THROW.search(s):
2129
2293
  # 检查附近是否有 try-catch
2130
- window_text = " ".join(t for _, t in _window(lines, idx, before=10, after=10))
2294
+ window_text = " ".join(
2295
+ t for _, t in _window(lines, idx, before=10, after=10)
2296
+ )
2131
2297
  has_try = RE_TRY.search(window_text) is not None
2132
2298
  has_catch = RE_CATCH.search(window_text) is not None
2133
-
2299
+
2134
2300
  if not (has_try and has_catch):
2135
2301
  conf = 0.6
2136
2302
  # 如果在 noexcept 函数中抛出异常,风险更高
2137
- prev_text = " ".join(t for _, t in _window(lines, idx, before=5, after=0))
2303
+ prev_text = " ".join(
2304
+ t for _, t in _window(lines, idx, before=5, after=0)
2305
+ )
2138
2306
  if RE_NOEXCEPT.search(prev_text):
2139
2307
  conf += 0.2
2140
-
2308
+
2141
2309
  issues.append(
2142
2310
  Issue(
2143
2311
  language="c/cpp",
@@ -2162,7 +2330,7 @@ def _rule_smart_ptr_cycle(lines: Sequence[str], relpath: str) -> List[Issue]:
2162
2330
  """
2163
2331
  issues: List[Issue] = []
2164
2332
  shared_ptr_vars: set[str] = set()
2165
-
2333
+
2166
2334
  for idx, s in enumerate(lines, start=1):
2167
2335
  # 收集 shared_ptr 变量
2168
2336
  if RE_SHARED_PTR.search(s):
@@ -2170,23 +2338,26 @@ def _rule_smart_ptr_cycle(lines: Sequence[str], relpath: str) -> List[Issue]:
2170
2338
  if m:
2171
2339
  var = m.group(1)
2172
2340
  shared_ptr_vars.add(var)
2173
-
2341
+
2174
2342
  # 检测 shared_ptr 之间的相互引用(简单启发式)
2175
2343
  if RE_SHARED_PTR.search(s) and shared_ptr_vars:
2176
2344
  # 检查是否在 shared_ptr 初始化中使用了另一个 shared_ptr
2177
2345
  for var in shared_ptr_vars:
2178
- if re.search(rf"\b{re.escape(var)}\b", s) and "make_shared" in s.lower():
2346
+ if (
2347
+ re.search(rf"\b{re.escape(var)}\b", s)
2348
+ and "make_shared" in s.lower()
2349
+ ):
2179
2350
  # 简单启发:如果两个 shared_ptr 相互引用,可能存在循环
2180
2351
  # 这里仅做提示,实际需要更复杂的分析
2181
2352
  pass
2182
-
2353
+
2183
2354
  # 检测 weak_ptr 的使用(通常用于打破循环引用)
2184
2355
  has_weak_ptr = False
2185
2356
  for idx, s in enumerate(lines, start=1):
2186
2357
  if RE_WEAK_PTR.search(s):
2187
2358
  has_weak_ptr = True
2188
2359
  break
2189
-
2360
+
2190
2361
  # 如果大量使用 shared_ptr 但未见 weak_ptr,提示可能的循环引用风险
2191
2362
  if len(shared_ptr_vars) > 3 and not has_weak_ptr:
2192
2363
  # 在第一个 shared_ptr 使用处提示
@@ -2207,7 +2378,7 @@ def _rule_smart_ptr_cycle(lines: Sequence[str], relpath: str) -> List[Issue]:
2207
2378
  )
2208
2379
  )
2209
2380
  break
2210
-
2381
+
2211
2382
  return issues
2212
2383
 
2213
2384
 
@@ -2224,13 +2395,13 @@ def _rule_cpp_deadlock_patterns(lines: Sequence[str], relpath: str) -> List[Issu
2224
2395
  lock_stack: list[str] = [] # 当前持有的锁栈
2225
2396
  order_pairs: dict[tuple[str, str], int] = {} # 加锁顺序对 -> 行号
2226
2397
  mutex_vars: set[str] = set() # 所有 mutex 变量名
2227
-
2398
+
2228
2399
  # 先收集所有 mutex 变量
2229
2400
  for idx, s in enumerate(lines, start=1):
2230
2401
  m = RE_STD_MUTEX.search(s)
2231
2402
  if m:
2232
2403
  mutex_vars.add(m.group(1))
2233
-
2404
+
2234
2405
  # 扫描加锁/解锁操作
2235
2406
  for idx, s in enumerate(lines, start=1):
2236
2407
  # 检测 lock() 调用
@@ -2259,7 +2430,7 @@ def _rule_cpp_deadlock_patterns(lines: Sequence[str], relpath: str) -> List[Issu
2259
2430
  pair = (lock_stack[-1], mtx)
2260
2431
  order_pairs.setdefault(pair, idx)
2261
2432
  lock_stack.append(mtx)
2262
-
2433
+
2263
2434
  # 检测 unlock() 调用
2264
2435
  m_unlock = RE_MUTEX_UNLOCK.search(s)
2265
2436
  if m_unlock:
@@ -2270,26 +2441,30 @@ def _rule_cpp_deadlock_patterns(lines: Sequence[str], relpath: str) -> List[Issu
2270
2441
  if lock_stack[k] == mtx:
2271
2442
  del lock_stack[k]
2272
2443
  break
2273
-
2444
+
2274
2445
  # 检测 lock_guard/unique_lock(RAII,自动解锁,通常更安全)
2275
2446
  RE_LOCK_GUARD.search(s) or RE_UNIQUE_LOCK.search(s) or RE_SHARED_LOCK.search(s)
2276
-
2447
+
2277
2448
  # 检测 std::lock 或 scoped_lock(死锁避免机制)
2278
2449
  has_safe_lock = RE_STD_LOCK.search(s) or RE_SCOPED_LOCK.search(s)
2279
-
2450
+
2280
2451
  # 粗略按作用域结束重置
2281
2452
  if "}" in s and not has_safe_lock:
2282
2453
  # 如果作用域结束且栈中还有锁,可能是问题(但可能是 RAII 锁,所以降低置信度)
2283
2454
  if lock_stack:
2284
2455
  # 这里不直接报错,因为可能是 RAII 锁
2285
2456
  pass
2286
-
2457
+
2287
2458
  # 检测手动锁定多个 mutex 但未使用 std::lock
2288
2459
  if m_lock and len(lock_stack) > 1 and not has_safe_lock:
2289
2460
  # 在锁定第二个 mutex 时,如果之前已持有锁且未使用 std::lock,提示风险
2290
2461
  if idx > 1:
2291
- prev_text = " ".join(_safe_line(lines, j) for j in range(max(1, idx - 3), idx))
2292
- if not RE_STD_LOCK.search(prev_text) and not RE_SCOPED_LOCK.search(prev_text):
2462
+ prev_text = " ".join(
2463
+ _safe_line(lines, j) for j in range(max(1, idx - 3), idx)
2464
+ )
2465
+ if not RE_STD_LOCK.search(prev_text) and not RE_SCOPED_LOCK.search(
2466
+ prev_text
2467
+ ):
2293
2468
  issues.append(
2294
2469
  Issue(
2295
2470
  language="c/cpp",
@@ -2304,7 +2479,7 @@ def _rule_cpp_deadlock_patterns(lines: Sequence[str], relpath: str) -> List[Issu
2304
2479
  severity="high",
2305
2480
  )
2306
2481
  )
2307
-
2482
+
2308
2483
  # 锁顺序反转检测
2309
2484
  for (a, b), ln in order_pairs.items():
2310
2485
  if (b, a) in order_pairs:
@@ -2322,7 +2497,7 @@ def _rule_cpp_deadlock_patterns(lines: Sequence[str], relpath: str) -> List[Issu
2322
2497
  severity="high",
2323
2498
  )
2324
2499
  )
2325
-
2500
+
2326
2501
  # 可能缺失解锁:在 lock() 后的 50 行窗口内未见对应 unlock()
2327
2502
  for idx, s in enumerate(lines, start=1):
2328
2503
  m_lock = RE_MUTEX_LOCK.search(s)
@@ -2331,13 +2506,19 @@ def _rule_cpp_deadlock_patterns(lines: Sequence[str], relpath: str) -> List[Issu
2331
2506
  mtx = m_lock.group(1)
2332
2507
  if mtx not in mutex_vars:
2333
2508
  continue
2334
-
2509
+
2335
2510
  # 检查是否是 lock_guard/unique_lock(RAII,自动解锁)
2336
- window_text = " ".join(_safe_line(lines, j) for j in range(idx, min(idx + 3, len(lines)) + 1))
2337
- is_raii = RE_LOCK_GUARD.search(window_text) or RE_UNIQUE_LOCK.search(window_text) or RE_SHARED_LOCK.search(window_text)
2511
+ window_text = " ".join(
2512
+ _safe_line(lines, j) for j in range(idx, min(idx + 3, len(lines)) + 1)
2513
+ )
2514
+ is_raii = (
2515
+ RE_LOCK_GUARD.search(window_text)
2516
+ or RE_UNIQUE_LOCK.search(window_text)
2517
+ or RE_SHARED_LOCK.search(window_text)
2518
+ )
2338
2519
  if is_raii:
2339
2520
  continue # RAII 锁会自动解锁,跳过
2340
-
2521
+
2341
2522
  end = min(len(lines), idx + 50)
2342
2523
  unlocked = False
2343
2524
  for j in range(idx + 1, end + 1):
@@ -2349,11 +2530,15 @@ def _rule_cpp_deadlock_patterns(lines: Sequence[str], relpath: str) -> List[Issu
2349
2530
  # 检查作用域结束(可能是 RAII 锁)
2350
2531
  if "}" in sj:
2351
2532
  # 检查是否是 lock_guard/unique_lock 的作用域
2352
- prev_scope = " ".join(_safe_line(lines, k) for k in range(max(1, j - 5), j))
2353
- if RE_LOCK_GUARD.search(prev_scope) or RE_UNIQUE_LOCK.search(prev_scope):
2533
+ prev_scope = " ".join(
2534
+ _safe_line(lines, k) for k in range(max(1, j - 5), j)
2535
+ )
2536
+ if RE_LOCK_GUARD.search(prev_scope) or RE_UNIQUE_LOCK.search(
2537
+ prev_scope
2538
+ ):
2354
2539
  unlocked = True
2355
2540
  break
2356
-
2541
+
2357
2542
  if not unlocked:
2358
2543
  issues.append(
2359
2544
  Issue(
@@ -2369,7 +2554,7 @@ def _rule_cpp_deadlock_patterns(lines: Sequence[str], relpath: str) -> List[Issu
2369
2554
  severity="medium",
2370
2555
  )
2371
2556
  )
2372
-
2557
+
2373
2558
  return issues
2374
2559
 
2375
2560
 
@@ -2380,7 +2565,7 @@ def _rule_data_race_suspect(lines: Sequence[str], relpath: str) -> List[Issue]:
2380
2565
  - 检测到线程创建但共享变量访问时未见锁保护
2381
2566
  - volatile 误用(volatile 不能保证线程安全)
2382
2567
  - 未使用原子操作保护共享变量
2383
-
2568
+
2384
2569
  实现基于启发式,需要结合上下文分析。
2385
2570
  """
2386
2571
  issues: List[Issue] = []
@@ -2388,7 +2573,7 @@ def _rule_data_race_suspect(lines: Sequence[str], relpath: str) -> List[Issue]:
2388
2573
  thread_creation_lines: list[int] = [] # 线程创建行号
2389
2574
  atomic_vars: set[str] = set() # 原子变量集合
2390
2575
  volatile_vars: set[str] = set() # volatile 变量集合
2391
-
2576
+
2392
2577
  # 第一遍扫描:收集共享变量、线程创建、原子变量
2393
2578
  for idx, s in enumerate(lines, start=1):
2394
2579
  # 收集全局/静态变量
@@ -2398,13 +2583,13 @@ def _rule_data_race_suspect(lines: Sequence[str], relpath: str) -> List[Issue]:
2398
2583
  # 排除 const 变量(只读,通常安全)
2399
2584
  if "const" not in s.lower():
2400
2585
  shared_vars.add(var)
2401
-
2586
+
2402
2587
  m_extern = RE_EXTERN_VAR.search(s)
2403
2588
  if m_extern:
2404
2589
  var = m_extern.group(1)
2405
2590
  if "const" not in s.lower():
2406
2591
  shared_vars.add(var)
2407
-
2592
+
2408
2593
  # 检测全局变量声明(文件作用域)
2409
2594
  if idx == 1 or (idx > 1 and _safe_line(lines, idx - 1).strip().endswith("}")):
2410
2595
  # 可能是文件作用域的变量
@@ -2412,61 +2597,66 @@ def _rule_data_race_suspect(lines: Sequence[str], relpath: str) -> List[Issue]:
2412
2597
  if m_global and "const" not in s.lower() and "static" not in s.lower():
2413
2598
  var = m_global.group(1)
2414
2599
  shared_vars.add(var)
2415
-
2600
+
2416
2601
  # 检测线程创建
2417
2602
  if RE_PTHREAD_CREATE.search(s) or RE_STD_THREAD.search(s):
2418
2603
  thread_creation_lines.append(idx)
2419
-
2604
+
2420
2605
  # 收集原子变量
2421
2606
  m_atomic = RE_ATOMIC.search(s)
2422
2607
  if m_atomic:
2423
2608
  var = m_atomic.group(1)
2424
2609
  atomic_vars.add(var)
2425
-
2610
+
2426
2611
  # 收集 volatile 变量
2427
2612
  m_volatile = RE_VOLATILE.search(s)
2428
2613
  if m_volatile:
2429
2614
  var = m_volatile.group(1)
2430
2615
  volatile_vars.add(var)
2431
-
2616
+
2432
2617
  # 如果没有线程创建,通常不存在数据竞争风险
2433
2618
  if not thread_creation_lines:
2434
2619
  return issues
2435
-
2620
+
2436
2621
  # 第二遍扫描:检测共享变量访问时的保护情况
2437
2622
  for idx, s in enumerate(lines, start=1):
2438
2623
  # 检测共享变量的访问(赋值或读取)
2439
2624
  for var in shared_vars:
2440
2625
  if var in atomic_vars:
2441
2626
  continue # 原子变量,通常安全
2442
-
2627
+
2443
2628
  # 检测变量访问
2444
2629
  var_pattern = re.compile(rf"\b{re.escape(var)}\b")
2445
2630
  if not var_pattern.search(s):
2446
2631
  continue
2447
-
2632
+
2448
2633
  # 检查是否是赋值操作
2449
- is_write = RE_VAR_ASSIGN.search(s) and var in s[:s.find("=")]
2450
-
2634
+ is_write = RE_VAR_ASSIGN.search(s) and var in s[: s.find("=")]
2635
+
2451
2636
  # 检查附近是否有锁保护
2452
2637
  window_text = " ".join(t for _, t in _window(lines, idx, before=5, after=5))
2453
2638
  has_lock = (
2454
- RE_PTHREAD_LOCK.search(window_text) is not None or
2455
- RE_MUTEX_LOCK.search(window_text) is not None or
2456
- RE_LOCK_GUARD.search(window_text) is not None or
2457
- RE_UNIQUE_LOCK.search(window_text) is not None or
2458
- RE_SHARED_LOCK.search(window_text) is not None
2639
+ RE_PTHREAD_LOCK.search(window_text) is not None
2640
+ or RE_MUTEX_LOCK.search(window_text) is not None
2641
+ or RE_LOCK_GUARD.search(window_text) is not None
2642
+ or RE_UNIQUE_LOCK.search(window_text) is not None
2643
+ or RE_SHARED_LOCK.search(window_text) is not None
2459
2644
  )
2460
-
2645
+
2461
2646
  # 检查是否在锁的作用域内(简单启发式)
2462
2647
  # 查找最近的锁
2463
2648
  lock_line = None
2464
2649
  for j in range(max(1, idx - 10), idx):
2465
2650
  sj = _safe_line(lines, j)
2466
- if RE_PTHREAD_LOCK.search(sj) or RE_MUTEX_LOCK.search(sj) or RE_LOCK_GUARD.search(sj) or RE_UNIQUE_LOCK.search(sj):
2651
+ if (
2652
+ RE_PTHREAD_LOCK.search(sj)
2653
+ or RE_MUTEX_LOCK.search(sj)
2654
+ or RE_LOCK_GUARD.search(sj)
2655
+ or RE_UNIQUE_LOCK.search(sj)
2656
+ ):
2467
2657
  lock_line = j
2468
2658
  break
2469
-
2659
+
2470
2660
  # 检查锁是否已解锁
2471
2661
  unlocked = False
2472
2662
  if lock_line:
@@ -2475,7 +2665,7 @@ def _rule_data_race_suspect(lines: Sequence[str], relpath: str) -> List[Issue]:
2475
2665
  if RE_PTHREAD_UNLOCK.search(sj) or RE_MUTEX_UNLOCK.search(sj):
2476
2666
  unlocked = True
2477
2667
  break
2478
-
2668
+
2479
2669
  # 如果未检测到锁保护,且是写操作,风险更高
2480
2670
  if not has_lock or (lock_line and unlocked):
2481
2671
  conf = 0.6
@@ -2484,12 +2674,12 @@ def _rule_data_race_suspect(lines: Sequence[str], relpath: str) -> List[Issue]:
2484
2674
  if var in volatile_vars:
2485
2675
  # volatile 不能保证线程安全,但可能被误用
2486
2676
  conf += 0.1
2487
-
2677
+
2488
2678
  # 检查是否在函数参数中(可能是局部变量,降低风险)
2489
2679
  if "(" in s and ")" in s:
2490
2680
  # 可能是函数调用参数,降低置信度
2491
2681
  conf -= 0.1
2492
-
2682
+
2493
2683
  issues.append(
2494
2684
  Issue(
2495
2685
  language="c/cpp",
@@ -2504,30 +2694,32 @@ def _rule_data_race_suspect(lines: Sequence[str], relpath: str) -> List[Issue]:
2504
2694
  severity="high" if conf >= 0.7 else "medium",
2505
2695
  )
2506
2696
  )
2507
-
2697
+
2508
2698
  # 检测 volatile 的误用(volatile 不能保证线程安全)
2509
2699
  for idx, s in enumerate(lines, start=1):
2510
2700
  for var in volatile_vars:
2511
2701
  if var in atomic_vars:
2512
2702
  continue # 如果同时是原子变量,跳过
2513
-
2703
+
2514
2704
  if re.search(rf"\b{re.escape(var)}\b", s):
2515
2705
  # 检查是否在多线程上下文中使用 volatile
2516
- window_text = " ".join(t for _, t in _window(lines, idx, before=3, after=3))
2706
+ window_text = " ".join(
2707
+ t for _, t in _window(lines, idx, before=3, after=3)
2708
+ )
2517
2709
  has_thread = (
2518
- RE_PTHREAD_CREATE.search(window_text) is not None or
2519
- RE_STD_THREAD.search(window_text) is not None or
2520
- any(abs(j - idx) < 20 for j in thread_creation_lines)
2710
+ RE_PTHREAD_CREATE.search(window_text) is not None
2711
+ or RE_STD_THREAD.search(window_text) is not None
2712
+ or any(abs(j - idx) < 20 for j in thread_creation_lines)
2521
2713
  )
2522
-
2714
+
2523
2715
  if has_thread:
2524
2716
  # 检查是否有锁保护
2525
2717
  has_lock = (
2526
- RE_PTHREAD_LOCK.search(window_text) is not None or
2527
- RE_MUTEX_LOCK.search(window_text) is not None or
2528
- RE_LOCK_GUARD.search(window_text) is not None
2718
+ RE_PTHREAD_LOCK.search(window_text) is not None
2719
+ or RE_MUTEX_LOCK.search(window_text) is not None
2720
+ or RE_LOCK_GUARD.search(window_text) is not None
2529
2721
  )
2530
-
2722
+
2531
2723
  if not has_lock:
2532
2724
  issues.append(
2533
2725
  Issue(
@@ -2543,7 +2735,7 @@ def _rule_data_race_suspect(lines: Sequence[str], relpath: str) -> List[Issue]:
2543
2735
  severity="high",
2544
2736
  )
2545
2737
  )
2546
-
2738
+
2547
2739
  return issues
2548
2740
 
2549
2741
 
@@ -2553,7 +2745,7 @@ def _rule_smart_ptr_get_unsafe(lines: Sequence[str], relpath: str) -> List[Issue
2553
2745
  """
2554
2746
  issues: List[Issue] = []
2555
2747
  smart_ptr_vars: set[str] = set()
2556
-
2748
+
2557
2749
  # 先收集智能指针变量
2558
2750
  for idx, s in enumerate(lines, start=1):
2559
2751
  m = RE_SMART_PTR_ASSIGN.search(s)
@@ -2564,7 +2756,7 @@ def _rule_smart_ptr_get_unsafe(lines: Sequence[str], relpath: str) -> List[Issue
2564
2756
  m = re.search(r"\b([A-Za-z_]\w*)\s*(?:=|;)", s)
2565
2757
  if m:
2566
2758
  smart_ptr_vars.add(m.group(1))
2567
-
2759
+
2568
2760
  for idx, s in enumerate(lines, start=1):
2569
2761
  # 检测 .get() 调用
2570
2762
  for var in smart_ptr_vars:
@@ -2573,7 +2765,7 @@ def _rule_smart_ptr_get_unsafe(lines: Sequence[str], relpath: str) -> List[Issue
2573
2765
  # 如果 .get() 的结果被存储或传递,风险更高
2574
2766
  if "=" in s or re.search(r"\.get\s*\([^)]*\)\s*[=,\(]", s):
2575
2767
  conf += 0.1
2576
-
2768
+
2577
2769
  issues.append(
2578
2770
  Issue(
2579
2771
  language="c/cpp",
@@ -2677,4 +2869,4 @@ def analyze_files(base_path: str, files: Iterable[str]) -> List[Issue]:
2677
2869
  for f in files:
2678
2870
  rel = Path(f)
2679
2871
  out.extend(analyze_c_cpp_file(base, rel))
2680
- return out
2872
+ return out