langchain-agentx-python 0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (354) hide show
  1. langchain_agentx/__init__.py +46 -0
  2. langchain_agentx/command/__init__.py +28 -0
  3. langchain_agentx/command/builtin/__init__.py +25 -0
  4. langchain_agentx/command/builtin/clear.py +33 -0
  5. langchain_agentx/command/builtin/compact.py +33 -0
  6. langchain_agentx/command/builtin/memory.py +37 -0
  7. langchain_agentx/command/builtin/reload_plugins.py +42 -0
  8. langchain_agentx/command/context.py +30 -0
  9. langchain_agentx/command/dispatcher.py +183 -0
  10. langchain_agentx/command/registry.py +110 -0
  11. langchain_agentx/command/result.py +25 -0
  12. langchain_agentx/command/types.py +41 -0
  13. langchain_agentx/config/__init__.py +14 -0
  14. langchain_agentx/loop/__init__.py +47 -0
  15. langchain_agentx/loop/config/__init__.py +20 -0
  16. langchain_agentx/loop/config/agent_config.py +66 -0
  17. langchain_agentx/loop/config/agent_loop_config.py +72 -0
  18. langchain_agentx/loop/config/model_context_resolver.py +105 -0
  19. langchain_agentx/loop/config/runtime_settings.py +50 -0
  20. langchain_agentx/loop/config/token_estimator.py +133 -0
  21. langchain_agentx/loop/context/__init__.py +66 -0
  22. langchain_agentx/loop/context/blocking_guard.py +97 -0
  23. langchain_agentx/loop/context/compaction_service.py +60 -0
  24. langchain_agentx/loop/context/message_utils.py +56 -0
  25. langchain_agentx/loop/context/pipeline.py +127 -0
  26. langchain_agentx/loop/context/settings.py +103 -0
  27. langchain_agentx/loop/context/stages/__init__.py +29 -0
  28. langchain_agentx/loop/context/stages/autocompact.py +140 -0
  29. langchain_agentx/loop/context/stages/base.py +32 -0
  30. langchain_agentx/loop/context/stages/collapse.py +76 -0
  31. langchain_agentx/loop/context/stages/microcompact.py +76 -0
  32. langchain_agentx/loop/context/stages/noop.py +33 -0
  33. langchain_agentx/loop/context/stages/snip.py +71 -0
  34. langchain_agentx/loop/context/stages/tool_result_budget.py +69 -0
  35. langchain_agentx/loop/context/types.py +79 -0
  36. langchain_agentx/loop/exit/__init__.py +1 -0
  37. langchain_agentx/loop/exit/exit_logic.py +320 -0
  38. langchain_agentx/loop/exit/reason_codes.py +39 -0
  39. langchain_agentx/loop/graph/__init__.py +5 -0
  40. langchain_agentx/loop/graph/builtin_loop_control.py +197 -0
  41. langchain_agentx/loop/graph/factory.py +1409 -0
  42. langchain_agentx/loop/graph/graph_edges.py +820 -0
  43. langchain_agentx/loop/hook/__init__.py +48 -0
  44. langchain_agentx/loop/hook/async_hook_runner.py +62 -0
  45. langchain_agentx/loop/hook/config.py +280 -0
  46. langchain_agentx/loop/hook/engine.py +321 -0
  47. langchain_agentx/loop/hook/executors/__init__.py +9 -0
  48. langchain_agentx/loop/hook/executors/agent.py +107 -0
  49. langchain_agentx/loop/hook/executors/command.py +230 -0
  50. langchain_agentx/loop/hook/executors/http.py +114 -0
  51. langchain_agentx/loop/hook/executors/prompt.py +92 -0
  52. langchain_agentx/loop/hook/graph_wiring.py +134 -0
  53. langchain_agentx/loop/hook/registry.py +262 -0
  54. langchain_agentx/loop/hook/trust.py +43 -0
  55. langchain_agentx/loop/hook/types.py +110 -0
  56. langchain_agentx/loop/injection/__init__.py +13 -0
  57. langchain_agentx/loop/injection/dedup.py +74 -0
  58. langchain_agentx/loop/loop_abort.py +36 -0
  59. langchain_agentx/loop/model/__init__.py +1 -0
  60. langchain_agentx/loop/model/model_node.py +648 -0
  61. langchain_agentx/loop/model/model_nodes.py +661 -0
  62. langchain_agentx/loop/model/orphan_tool_results.py +38 -0
  63. langchain_agentx/loop/model/retrier.py +307 -0
  64. langchain_agentx/loop/model/retry_bridge.py +58 -0
  65. langchain_agentx/loop/model/retry_events.py +35 -0
  66. langchain_agentx/loop/model/retry_policy.py +56 -0
  67. langchain_agentx/loop/model/schema_and_format.py +153 -0
  68. langchain_agentx/loop/model/tool_and_model_binding.py +227 -0
  69. langchain_agentx/loop/model/tool_call_degradation_corrector.py +443 -0
  70. langchain_agentx/loop/model/tool_transcript_guard.py +225 -0
  71. langchain_agentx/loop/prompt/__init__.py +95 -0
  72. langchain_agentx/loop/prompt/builder.py +61 -0
  73. langchain_agentx/loop/prompt/builtin.py +218 -0
  74. langchain_agentx/loop/prompt/compact.py +408 -0
  75. langchain_agentx/loop/prompt/sections.py +120 -0
  76. langchain_agentx/loop/runtime/__init__.py +19 -0
  77. langchain_agentx/loop/runtime/context.py +34 -0
  78. langchain_agentx/loop/runtime/context_factory.py +107 -0
  79. langchain_agentx/loop/runtime/subagent_execution_paths.py +68 -0
  80. langchain_agentx/loop/subagent/__init__.py +53 -0
  81. langchain_agentx/loop/subagent/async_runner.py +215 -0
  82. langchain_agentx/loop/subagent/context.py +209 -0
  83. langchain_agentx/loop/subagent/fork_worktree_notice.py +25 -0
  84. langchain_agentx/loop/subagent/graph.py +72 -0
  85. langchain_agentx/loop/subagent/orchestrator.py +391 -0
  86. langchain_agentx/loop/subagent/progress.py +30 -0
  87. langchain_agentx/loop/subagent/prompt.py +52 -0
  88. langchain_agentx/loop/subagent/runner.py +504 -0
  89. langchain_agentx/loop/subagent/transcript.py +172 -0
  90. langchain_agentx/memory/__init__.py +2 -0
  91. langchain_agentx/memory/instruction/__init__.py +12 -0
  92. langchain_agentx/memory/instruction/loader.py +325 -0
  93. langchain_agentx/memory/instruction/resolver.py +24 -0
  94. langchain_agentx/memory/instruction/runtime.py +83 -0
  95. langchain_agentx/memory/instruction/sections.py +83 -0
  96. langchain_agentx/memory/instruction/types.py +59 -0
  97. langchain_agentx/memory/memdir/__init__.py +77 -0
  98. langchain_agentx/memory/memdir/age.py +36 -0
  99. langchain_agentx/memory/memdir/agent_memory.py +380 -0
  100. langchain_agentx/memory/memdir/extractor.py +309 -0
  101. langchain_agentx/memory/memdir/loader.py +187 -0
  102. langchain_agentx/memory/memdir/paths.py +63 -0
  103. langchain_agentx/memory/memdir/recall.py +45 -0
  104. langchain_agentx/memory/memdir/runtime.py +43 -0
  105. langchain_agentx/memory/memdir/scan.py +135 -0
  106. langchain_agentx/memory/memdir/types.py +104 -0
  107. langchain_agentx/memory/session/__init__.py +76 -0
  108. langchain_agentx/memory/session/compact_bridge.py +208 -0
  109. langchain_agentx/memory/session/prompts.py +172 -0
  110. langchain_agentx/memory/session/session_memory.py +282 -0
  111. langchain_agentx/observability/__init__.py +67 -0
  112. langchain_agentx/observability/evaluation/__init__.py +17 -0
  113. langchain_agentx/observability/evaluation/checkers/__init__.py +18 -0
  114. langchain_agentx/observability/evaluation/checkers/base.py +34 -0
  115. langchain_agentx/observability/evaluation/checkers/compaction.py +38 -0
  116. langchain_agentx/observability/evaluation/checkers/degradation.py +50 -0
  117. langchain_agentx/observability/evaluation/checkers/exit_quality.py +42 -0
  118. langchain_agentx/observability/evaluation/checkers/session_memory.py +45 -0
  119. langchain_agentx/observability/evaluation/checkers/tool_behavior.py +53 -0
  120. langchain_agentx/observability/evaluation/retention_scheduler.py +67 -0
  121. langchain_agentx/observability/evaluation/service.py +102 -0
  122. langchain_agentx/observability/evaluation/state.py +32 -0
  123. langchain_agentx/observability/evaluation/store.py +258 -0
  124. langchain_agentx/observability/events/__init__.py +15 -0
  125. langchain_agentx/observability/events/langchain_agentx_event_adapter.py +832 -0
  126. langchain_agentx/observability/logging/__init__.py +15 -0
  127. langchain_agentx/observability/logging/debug_burst.py +95 -0
  128. langchain_agentx/observability/logging/logging_config.py +178 -0
  129. langchain_agentx/observability/logging/logging_contract.py +65 -0
  130. langchain_agentx/observability/replay/__init__.py +35 -0
  131. langchain_agentx/observability/replay/cli.py +91 -0
  132. langchain_agentx/observability/replay/service.py +83 -0
  133. langchain_agentx/observability/replay/store.py +278 -0
  134. langchain_agentx/observability/replay/ui.py +47 -0
  135. langchain_agentx/observability/trace/__init__.py +25 -0
  136. langchain_agentx/observability/trace/collector.py +560 -0
  137. langchain_agentx/observability/trace/event_emitter.py +183 -0
  138. langchain_agentx/observability/trace/hook_event_emitter.py +49 -0
  139. langchain_agentx/observability/trace/models.py +144 -0
  140. langchain_agentx/observability/trace/sqlite_store.py +873 -0
  141. langchain_agentx/observability/trace/trace_callback.py +295 -0
  142. langchain_agentx/observability/trace/trace_lifecycle_collector.py +114 -0
  143. langchain_agentx/plugin/__init__.py +26 -0
  144. langchain_agentx/plugin/builtin.py +53 -0
  145. langchain_agentx/plugin/config.py +113 -0
  146. langchain_agentx/plugin/loader.py +386 -0
  147. langchain_agentx/plugin/manifest.py +154 -0
  148. langchain_agentx/plugin/registries.py +211 -0
  149. langchain_agentx/plugin/types.py +142 -0
  150. langchain_agentx/provider/__init__.py +27 -0
  151. langchain_agentx/provider/anthropic.py +121 -0
  152. langchain_agentx/provider/compatible_chat_openai.py +86 -0
  153. langchain_agentx/provider/env.py +45 -0
  154. langchain_agentx/provider/model_profile.py +156 -0
  155. langchain_agentx/provider/openai.py +89 -0
  156. langchain_agentx/session/__init__.py +17 -0
  157. langchain_agentx/session/agent_session.py +320 -0
  158. langchain_agentx/session/conversation_factory.py +87 -0
  159. langchain_agentx/session/conversation_recovery.py +156 -0
  160. langchain_agentx/session/conversation_session.py +198 -0
  161. langchain_agentx/session/factory.py +143 -0
  162. langchain_agentx/session/protocol.py +25 -0
  163. langchain_agentx/task_runtime/__init__.py +113 -0
  164. langchain_agentx/task_runtime/core/__init__.py +51 -0
  165. langchain_agentx/task_runtime/core/ids.py +33 -0
  166. langchain_agentx/task_runtime/core/interfaces.py +115 -0
  167. langchain_agentx/task_runtime/core/notification_priority.py +19 -0
  168. langchain_agentx/task_runtime/core/types.py +136 -0
  169. langchain_agentx/task_runtime/integrations/__init__.py +33 -0
  170. langchain_agentx/task_runtime/integrations/loop_adapter.py +91 -0
  171. langchain_agentx/task_runtime/integrations/loop_integration.py +61 -0
  172. langchain_agentx/task_runtime/integrations/prefetch_providers.py +108 -0
  173. langchain_agentx/task_runtime/integrations/provider_factory.py +103 -0
  174. langchain_agentx/task_runtime/integrations/queued_command_provider.py +184 -0
  175. langchain_agentx/task_runtime/integrations/sqlite_queued_command_provider.py +338 -0
  176. langchain_agentx/task_runtime/integrations/tool_use_summary_provider.py +254 -0
  177. langchain_agentx/task_runtime/orchestrator/__init__.py +5 -0
  178. langchain_agentx/task_runtime/orchestrator/runtime.py +386 -0
  179. langchain_agentx/task_runtime/output/__init__.py +5 -0
  180. langchain_agentx/task_runtime/output/sink.py +64 -0
  181. langchain_agentx/task_runtime/policy/__init__.py +11 -0
  182. langchain_agentx/task_runtime/policy/withhold_visibility.py +32 -0
  183. langchain_agentx/task_runtime/queue/__init__.py +5 -0
  184. langchain_agentx/task_runtime/queue/in_memory.py +55 -0
  185. langchain_agentx/task_runtime/skill_prefetch/__init__.py +4 -0
  186. langchain_agentx/task_runtime/skill_prefetch/attachments.py +46 -0
  187. langchain_agentx/task_runtime/skill_prefetch/models.py +37 -0
  188. langchain_agentx/task_runtime/skill_prefetch/provider.py +344 -0
  189. langchain_agentx/task_runtime/store/__init__.py +6 -0
  190. langchain_agentx/task_runtime/store/in_memory.py +81 -0
  191. langchain_agentx/task_runtime/store/sqlite_store.py +281 -0
  192. langchain_agentx/task_runtime/tasks/__init__.py +76 -0
  193. langchain_agentx/task_runtime/tasks/ai_analysis/__init__.py +15 -0
  194. langchain_agentx/task_runtime/tasks/ai_analysis/base.py +41 -0
  195. langchain_agentx/task_runtime/tasks/ai_analysis/evaluation.py +67 -0
  196. langchain_agentx/task_runtime/tasks/ai_analysis/registry.py +36 -0
  197. langchain_agentx/task_runtime/tasks/ai_analysis/scheduler.py +70 -0
  198. langchain_agentx/task_runtime/tasks/base/__init__.py +6 -0
  199. langchain_agentx/task_runtime/tasks/base/contracts.py +24 -0
  200. langchain_agentx/task_runtime/tasks/custom/__init__.py +7 -0
  201. langchain_agentx/task_runtime/tasks/custom/executor.py +60 -0
  202. langchain_agentx/task_runtime/tasks/custom/notification.py +7 -0
  203. langchain_agentx/task_runtime/tasks/custom/semantics.py +13 -0
  204. langchain_agentx/task_runtime/tasks/custom/spec.py +33 -0
  205. langchain_agentx/task_runtime/tasks/dream_task/__init__.py +15 -0
  206. langchain_agentx/task_runtime/tasks/dream_task/executor.py +61 -0
  207. langchain_agentx/task_runtime/tasks/dream_task/notification.py +19 -0
  208. langchain_agentx/task_runtime/tasks/dream_task/semantics.py +13 -0
  209. langchain_agentx/task_runtime/tasks/dream_task/spec.py +35 -0
  210. langchain_agentx/task_runtime/tasks/dream_task/state.py +17 -0
  211. langchain_agentx/task_runtime/tasks/in_process_teammate/__init__.py +12 -0
  212. langchain_agentx/task_runtime/tasks/in_process_teammate/executor.py +36 -0
  213. langchain_agentx/task_runtime/tasks/in_process_teammate/notification.py +25 -0
  214. langchain_agentx/task_runtime/tasks/in_process_teammate/semantics.py +13 -0
  215. langchain_agentx/task_runtime/tasks/in_process_teammate/spec.py +63 -0
  216. langchain_agentx/task_runtime/tasks/local_agent/__init__.py +14 -0
  217. langchain_agentx/task_runtime/tasks/local_agent/executor.py +33 -0
  218. langchain_agentx/task_runtime/tasks/local_agent/notification.py +21 -0
  219. langchain_agentx/task_runtime/tasks/local_agent/runner.py +43 -0
  220. langchain_agentx/task_runtime/tasks/local_agent/semantics.py +13 -0
  221. langchain_agentx/task_runtime/tasks/local_agent/spec.py +31 -0
  222. langchain_agentx/task_runtime/tasks/local_bash/__init__.py +13 -0
  223. langchain_agentx/task_runtime/tasks/local_bash/executor.py +95 -0
  224. langchain_agentx/task_runtime/tasks/local_bash/notification.py +22 -0
  225. langchain_agentx/task_runtime/tasks/local_bash/semantics.py +13 -0
  226. langchain_agentx/task_runtime/tasks/local_bash/spec.py +55 -0
  227. langchain_agentx/task_runtime/tasks/remote_agent/__init__.py +19 -0
  228. langchain_agentx/task_runtime/tasks/remote_agent/backend.py +76 -0
  229. langchain_agentx/task_runtime/tasks/remote_agent/executor.py +37 -0
  230. langchain_agentx/task_runtime/tasks/remote_agent/notification.py +22 -0
  231. langchain_agentx/task_runtime/tasks/remote_agent/semantics.py +13 -0
  232. langchain_agentx/task_runtime/tasks/remote_agent/spec.py +34 -0
  233. langchain_agentx/task_runtime/tasks/trace_cleanup/__init__.py +19 -0
  234. langchain_agentx/task_runtime/tasks/trace_cleanup/bootstrap.py +95 -0
  235. langchain_agentx/task_runtime/tasks/trace_cleanup/executor.py +66 -0
  236. langchain_agentx/task_runtime/tasks/trace_cleanup/scheduler.py +169 -0
  237. langchain_agentx/tool_runtime/__init__.py +90 -0
  238. langchain_agentx/tool_runtime/adapter.py +365 -0
  239. langchain_agentx/tool_runtime/base.py +319 -0
  240. langchain_agentx/tool_runtime/errors.py +190 -0
  241. langchain_agentx/tool_runtime/identical_call_cache.py +110 -0
  242. langchain_agentx/tool_runtime/loader.py +195 -0
  243. langchain_agentx/tool_runtime/models.py +260 -0
  244. langchain_agentx/tool_runtime/permission_context.py +78 -0
  245. langchain_agentx/tool_runtime/pipeline.py +621 -0
  246. langchain_agentx/tool_runtime/policy.py +447 -0
  247. langchain_agentx/tool_runtime/registry.py +81 -0
  248. langchain_agentx/tool_runtime/resolvers/__init__.py +27 -0
  249. langchain_agentx/tool_runtime/resolvers/agent_session.py +125 -0
  250. langchain_agentx/tool_runtime/resolvers/background.py +32 -0
  251. langchain_agentx/tool_runtime/resolvers/base.py +20 -0
  252. langchain_agentx/tool_runtime/resolvers/conversation.py +22 -0
  253. langchain_agentx/tool_runtime/resolvers/workflow.py +73 -0
  254. langchain_agentx/tool_runtime/session_store.py +132 -0
  255. langchain_agentx/tool_runtime/smoke_test_runtime.py +294 -0
  256. langchain_agentx/tool_runtime/state_bridge.py +164 -0
  257. langchain_agentx/tools/__init__.py +26 -0
  258. langchain_agentx/tools/agent/__init__.py +9 -0
  259. langchain_agentx/tools/agent/backend.py +53 -0
  260. langchain_agentx/tools/agent/built_in/__init__.py +19 -0
  261. langchain_agentx/tools/agent/built_in/agentx_guide.py +65 -0
  262. langchain_agentx/tools/agent/built_in/explore.py +80 -0
  263. langchain_agentx/tools/agent/built_in/general.py +57 -0
  264. langchain_agentx/tools/agent/built_in/plan.py +89 -0
  265. langchain_agentx/tools/agent/built_in/statusline_setup.py +64 -0
  266. langchain_agentx/tools/agent/built_in/verification.py +120 -0
  267. langchain_agentx/tools/agent/builtin_subagent_loader.py +89 -0
  268. langchain_agentx/tools/agent/cwd_resolution.py +119 -0
  269. langchain_agentx/tools/agent/limits.py +26 -0
  270. langchain_agentx/tools/agent/loader.py +270 -0
  271. langchain_agentx/tools/agent/models.py +85 -0
  272. langchain_agentx/tools/agent/prompt.py +120 -0
  273. langchain_agentx/tools/agent/registry/__init__.py +18 -0
  274. langchain_agentx/tools/agent/registry/config.py +29 -0
  275. langchain_agentx/tools/agent/registry/registry.py +47 -0
  276. langchain_agentx/tools/agent/scope.py +137 -0
  277. langchain_agentx/tools/agent/tool.py +256 -0
  278. langchain_agentx/tools/bash/__init__.py +9 -0
  279. langchain_agentx/tools/bash/ast_security.py +571 -0
  280. langchain_agentx/tools/bash/backend.py +1447 -0
  281. langchain_agentx/tools/bash/bash_hardening.py +734 -0
  282. langchain_agentx/tools/bash/bash_runtime_contract.py +41 -0
  283. langchain_agentx/tools/bash/cwd_reporter.py +95 -0
  284. langchain_agentx/tools/bash/limits.py +71 -0
  285. langchain_agentx/tools/bash/mode_validation.py +282 -0
  286. langchain_agentx/tools/bash/models.py +131 -0
  287. langchain_agentx/tools/bash/observability.py +148 -0
  288. langchain_agentx/tools/bash/output_utils.py +200 -0
  289. langchain_agentx/tools/bash/path_security.py +2429 -0
  290. langchain_agentx/tools/bash/prompt.py +68 -0
  291. langchain_agentx/tools/bash/read_only_validation.py +589 -0
  292. langchain_agentx/tools/bash/result_presenter.py +324 -0
  293. langchain_agentx/tools/bash/sandbox_decision.py +133 -0
  294. langchain_agentx/tools/bash/security.py +311 -0
  295. langchain_agentx/tools/bash/sed_edit_parser.py +243 -0
  296. langchain_agentx/tools/bash/sed_validation.py +163 -0
  297. langchain_agentx/tools/bash/semantics.py +111 -0
  298. langchain_agentx/tools/bash/session_manager.py +205 -0
  299. langchain_agentx/tools/bash/session_runtime.py +290 -0
  300. langchain_agentx/tools/bash/shell_locator.py +191 -0
  301. langchain_agentx/tools/bash/task_runtime.py +91 -0
  302. langchain_agentx/tools/bash/tool.py +939 -0
  303. langchain_agentx/tools/bash/windows_shell_quoting.py +45 -0
  304. langchain_agentx/tools/glob/__init__.py +9 -0
  305. langchain_agentx/tools/glob/models.py +57 -0
  306. langchain_agentx/tools/glob/pagination.py +30 -0
  307. langchain_agentx/tools/glob/prompt.py +24 -0
  308. langchain_agentx/tools/glob/rg_list_backend.py +139 -0
  309. langchain_agentx/tools/glob/rg_pattern.py +44 -0
  310. langchain_agentx/tools/glob/tool.py +327 -0
  311. langchain_agentx/tools/grep/__init__.py +7 -0
  312. langchain_agentx/tools/grep/backend.py +375 -0
  313. langchain_agentx/tools/grep/models.py +127 -0
  314. langchain_agentx/tools/grep/prompt.py +30 -0
  315. langchain_agentx/tools/grep/rg_subprocess_controller.py +114 -0
  316. langchain_agentx/tools/grep/tool.py +475 -0
  317. langchain_agentx/tools/read/__init__.py +9 -0
  318. langchain_agentx/tools/read/backend.py +415 -0
  319. langchain_agentx/tools/read/limits.py +67 -0
  320. langchain_agentx/tools/read/models.py +156 -0
  321. langchain_agentx/tools/read/prompt.py +73 -0
  322. langchain_agentx/tools/read/tool.py +494 -0
  323. langchain_agentx/tools/ripgrep_plugin_exclusions.py +137 -0
  324. langchain_agentx/tools/skill/__init__.py +4 -0
  325. langchain_agentx/tools/skill/argument_substitution.py +80 -0
  326. langchain_agentx/tools/skill/loader.py +196 -0
  327. langchain_agentx/tools/skill/models.py +88 -0
  328. langchain_agentx/tools/skill/policy.py +80 -0
  329. langchain_agentx/tools/skill/prompt.py +35 -0
  330. langchain_agentx/tools/skill/tool.py +222 -0
  331. langchain_agentx/utils/__init__.py +0 -0
  332. langchain_agentx/utils/cwd.py +124 -0
  333. langchain_agentx/utils/host_platform.py +112 -0
  334. langchain_agentx/utils/path_hierarchy.py +48 -0
  335. langchain_agentx/utils/path_user_input.py +66 -0
  336. langchain_agentx/utils/rg_executable.py +18 -0
  337. langchain_agentx/utils/subprocess_text.py +101 -0
  338. langchain_agentx/utils/temp_paths.py +77 -0
  339. langchain_agentx/utils/unc_path.py +25 -0
  340. langchain_agentx/utils/win_reserved_paths.py +51 -0
  341. langchain_agentx/workflow/__init__.py +7 -0
  342. langchain_agentx/workflow/base.py +97 -0
  343. langchain_agentx/workflow/batch.py +55 -0
  344. langchain_agentx/workflow/dag.py +54 -0
  345. langchain_agentx/workspace/__init__.py +13 -0
  346. langchain_agentx/workspace/config.py +140 -0
  347. langchain_agentx/workspace/path_key_normalizer.py +30 -0
  348. langchain_agentx/workspace/resolver.py +74 -0
  349. langchain_agentx/workspace/validators.py +41 -0
  350. langchain_agentx_python-0.1.dist-info/LICENSE +201 -0
  351. langchain_agentx_python-0.1.dist-info/METADATA +513 -0
  352. langchain_agentx_python-0.1.dist-info/RECORD +354 -0
  353. langchain_agentx_python-0.1.dist-info/WHEEL +5 -0
  354. langchain_agentx_python-0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2429 @@
1
+ """
2
+ tools/bash/path_security.py — BashRuntimeTool v2 路径安全分析
3
+
4
+ 职责:
5
+ 对照 CC `pathValidation.ts`,为 bash 子命令提供:
6
+ - 路径参数提取
7
+ - `--` 结束选项语义
8
+ - 命令级路径操作类型识别(read / write / create)
9
+ - 危险删除路径检查
10
+ - compound command 中 `cd + write` 的保守审批
11
+ - 调用 PolicyEngine 的路径级授权
12
+
13
+ 设计原则:
14
+ - 保持 OOP 风格,与 `BashAstAnalyzer` 配套
15
+ - 只迁移高价值、高风险的 v2 能力,不直接复制 CC 全量 1300+ 行实现
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import os
21
+ import re
22
+ import shlex
23
+ from dataclasses import dataclass
24
+ from typing import Literal
25
+
26
+ from langchain_agentx.tool_runtime.models import AuthorizationDecision
27
+
28
+ from .ast_security import BashAstAnalysis, BashWrapperStripper
29
+
30
+
31
+ OperationType = Literal["read", "write", "create"]
32
+
33
+
34
+ @dataclass(frozen=True)
35
+ class BashPathTarget:
36
+ raw_path: str
37
+ operation_type: OperationType
38
+ target_source: Literal["argv", "redirection"] = "argv"
39
+
40
+
41
+ @dataclass(frozen=True)
42
+ class BashCommandPathInfo:
43
+ base_command: str
44
+ operation_type: OperationType
45
+ path_targets: tuple[BashPathTarget, ...]
46
+ requires_manual_approval: bool = False
47
+ manual_approval_reason: str | None = None
48
+
49
+
50
+ @dataclass(frozen=True)
51
+ class BashCompressionCommandSpec:
52
+ output_suffix: str
53
+ source_mutates_by_default: bool = True
54
+ explicit_output_flags: tuple[str, ...] = ()
55
+ stdout_flags: tuple[str, ...] = ("-c", "--stdout", "--to-stdout")
56
+ keep_source_flags: tuple[str, ...] = ("-k", "--keep")
57
+ second_positional_output: bool = False
58
+
59
+
60
+ class BashArgumentTokenizer:
61
+ """封装命令参数切分与 `--` 语义。"""
62
+
63
+ def split(self, command_text: str) -> list[str]:
64
+ try:
65
+ return shlex.split(command_text)
66
+ except ValueError:
67
+ return command_text.split()
68
+
69
+ def filter_out_flags(self, args: list[str]) -> list[str]:
70
+ result: list[str] = []
71
+ after_double_dash = False
72
+ for arg in args:
73
+ if after_double_dash:
74
+ result.append(arg)
75
+ elif arg == "--":
76
+ after_double_dash = True
77
+ elif not arg.startswith("-"):
78
+ result.append(arg)
79
+ return result
80
+
81
+ def parse_pattern_command(
82
+ self,
83
+ args: list[str],
84
+ flags_with_args: set[str],
85
+ defaults: list[str] | None = None,
86
+ ) -> list[str]:
87
+ paths: list[str] = []
88
+ pattern_found = False
89
+ after_double_dash = False
90
+ idx = 0
91
+ while idx < len(args):
92
+ arg = args[idx]
93
+ if not arg:
94
+ idx += 1
95
+ continue
96
+ if not after_double_dash and arg == "--":
97
+ after_double_dash = True
98
+ idx += 1
99
+ continue
100
+ if not after_double_dash and arg.startswith("-"):
101
+ flag = arg.split("=")[0]
102
+ if flag in {"-e", "--regexp", "-f", "--file"}:
103
+ pattern_found = True
104
+ if flag in flags_with_args and "=" not in arg and idx + 1 < len(args):
105
+ idx += 2
106
+ continue
107
+ idx += 1
108
+ continue
109
+ if not pattern_found:
110
+ pattern_found = True
111
+ idx += 1
112
+ continue
113
+ paths.append(arg)
114
+ idx += 1
115
+
116
+ if paths:
117
+ return paths
118
+ return defaults or []
119
+
120
+
121
+ class BashCommandPathInfoFactory:
122
+ """统一构造 `BashCommandPathInfo`,集中处理重定向附加逻辑。"""
123
+
124
+ def build(
125
+ self,
126
+ *,
127
+ base_command: str,
128
+ operation_type: OperationType,
129
+ path_targets: list[BashPathTarget],
130
+ analysis: BashAstAnalysis,
131
+ requires_manual_approval: bool = False,
132
+ manual_approval_reason: str | None = None,
133
+ ) -> BashCommandPathInfo:
134
+ if analysis.output_redirection_paths:
135
+ path_targets.extend(
136
+ BashPathTarget(path, "write", target_source="redirection")
137
+ for path in analysis.output_redirection_paths
138
+ )
139
+ operation_type = "write"
140
+ return BashCommandPathInfo(
141
+ base_command=base_command,
142
+ operation_type=operation_type,
143
+ path_targets=tuple(path_targets),
144
+ requires_manual_approval=requires_manual_approval,
145
+ manual_approval_reason=manual_approval_reason,
146
+ )
147
+
148
+
149
+ class BashRemotePathClassifier:
150
+ """识别 scp / rclone 一类命令中的 remote endpoint。"""
151
+
152
+ _LOCAL_PREFIXES = ("/", "./", "../", "~/", "~+", "~-", "$", "'", '"', "{", "[")
153
+
154
+ def is_remote_spec(self, value: str) -> bool:
155
+ if not value:
156
+ return False
157
+ if value.startswith("scp://"):
158
+ return True
159
+ if value.startswith(":") and value.count(":") >= 2:
160
+ return True
161
+ if value.startswith(self._LOCAL_PREFIXES):
162
+ return False
163
+ if re.match(r"^[A-Za-z]:[\\/]", value):
164
+ return False
165
+ return ":" in value
166
+
167
+ def build_local_target(
168
+ self,
169
+ value: str,
170
+ operation_type: OperationType,
171
+ ) -> BashPathTarget | None:
172
+ if self.is_remote_spec(value):
173
+ return None
174
+ return BashPathTarget(value, operation_type)
175
+
176
+
177
+ class BashSpecialPathExtractor:
178
+ """
179
+ 聚合特殊命令的路径提取逻辑。
180
+
181
+ 这样 `BashPathExtractorRegistry` 负责分发,复杂命令解析交给专门协作者。
182
+ """
183
+
184
+ def __init__(
185
+ self,
186
+ tokenizer: BashArgumentTokenizer,
187
+ path_info_factory: BashCommandPathInfoFactory,
188
+ remote_classifier: BashRemotePathClassifier | None = None,
189
+ ) -> None:
190
+ self._tokenizer = tokenizer
191
+ self._path_info_factory = path_info_factory
192
+ self._remote_classifier = remote_classifier or BashRemotePathClassifier()
193
+
194
+ def extract_mktemp(
195
+ self,
196
+ base_command: str,
197
+ args: list[str],
198
+ analysis: BashAstAnalysis,
199
+ ) -> BashCommandPathInfo:
200
+ target_directory: str | None = None
201
+ template_path: str | None = None
202
+ after_double_dash = False
203
+ i = 0
204
+ while i < len(args):
205
+ arg = args[i]
206
+ if not arg:
207
+ i += 1
208
+ continue
209
+ if not after_double_dash and arg == "--":
210
+ after_double_dash = True
211
+ i += 1
212
+ continue
213
+ if not after_double_dash and arg.startswith("--tmpdir="):
214
+ target_directory = arg.split("=", 1)[1]
215
+ i += 1
216
+ continue
217
+ if not after_double_dash and arg in {"-p", "--tmpdir"} and i + 1 < len(args):
218
+ target_directory = args[i + 1]
219
+ i += 2
220
+ continue
221
+ if not after_double_dash and arg.startswith("-"):
222
+ i += 1
223
+ continue
224
+ template_path = arg
225
+ i += 1
226
+
227
+ path_targets: list[BashPathTarget] = []
228
+ if target_directory:
229
+ path_targets.append(BashPathTarget(target_directory, "create"))
230
+ return self._path_info_factory.build(
231
+ base_command=base_command,
232
+ operation_type="create",
233
+ path_targets=path_targets,
234
+ analysis=analysis,
235
+ )
236
+ if template_path and ("/" in template_path or template_path.startswith(".")):
237
+ path_targets.append(BashPathTarget(template_path, "create"))
238
+ return self._path_info_factory.build(
239
+ base_command=base_command,
240
+ operation_type="create",
241
+ path_targets=path_targets,
242
+ analysis=analysis,
243
+ )
244
+ return self._path_info_factory.build(
245
+ base_command=base_command,
246
+ operation_type="create",
247
+ path_targets=path_targets,
248
+ analysis=analysis,
249
+ requires_manual_approval=True,
250
+ manual_approval_reason=(
251
+ "mktemp without an explicit target directory or path requires explicit approval "
252
+ "because the created location cannot be determined safely."
253
+ ),
254
+ )
255
+
256
+ def extract_tee(
257
+ self,
258
+ base_command: str,
259
+ args: list[str],
260
+ analysis: BashAstAnalysis,
261
+ ) -> BashCommandPathInfo:
262
+ paths = self._tokenizer.filter_out_flags(args)
263
+ path_targets = [BashPathTarget(path, "write") for path in paths]
264
+ return self._path_info_factory.build(
265
+ base_command=base_command,
266
+ operation_type="write",
267
+ path_targets=path_targets,
268
+ analysis=analysis,
269
+ )
270
+
271
+ def extract_install(
272
+ self,
273
+ base_command: str,
274
+ args: list[str],
275
+ analysis: BashAstAnalysis,
276
+ ) -> BashCommandPathInfo:
277
+ positional: list[str] = []
278
+ path_targets: list[BashPathTarget] = []
279
+ target_directory: str | None = None
280
+ no_target_directory = False
281
+ after_double_dash = False
282
+ i = 0
283
+ flags_with_args = {
284
+ "-m", "--mode", "-o", "--owner", "-g", "--group", "-S", "--suffix",
285
+ "-t", "--target-directory",
286
+ }
287
+ while i < len(args):
288
+ arg = args[i]
289
+ if not arg:
290
+ i += 1
291
+ continue
292
+ if not after_double_dash and arg == "--":
293
+ after_double_dash = True
294
+ i += 1
295
+ continue
296
+ if not after_double_dash and arg.startswith("--target-directory="):
297
+ target_directory = arg.split("=", 1)[1]
298
+ i += 1
299
+ continue
300
+ if not after_double_dash and arg in {"-T", "--no-target-directory"}:
301
+ no_target_directory = True
302
+ i += 1
303
+ continue
304
+ if not after_double_dash and arg.startswith("-"):
305
+ flag = arg.split("=")[0]
306
+ if flag in {"-t", "--target-directory"} and i + 1 < len(args):
307
+ target_directory = args[i + 1]
308
+ i += 2
309
+ continue
310
+ if flag in flags_with_args and "=" not in arg and i + 1 < len(args):
311
+ i += 2
312
+ continue
313
+ i += 1
314
+ continue
315
+ positional.append(arg)
316
+ i += 1
317
+
318
+ if target_directory:
319
+ path_targets.extend(BashPathTarget(path, "read") for path in positional)
320
+ path_targets.append(BashPathTarget(target_directory, "write"))
321
+ return self._path_info_factory.build(
322
+ base_command=base_command,
323
+ operation_type="write",
324
+ path_targets=path_targets,
325
+ analysis=analysis,
326
+ )
327
+ if len(positional) >= 2:
328
+ path_targets.extend(BashPathTarget(path, "read") for path in positional[:-1])
329
+ path_targets.append(BashPathTarget(positional[-1], "write"))
330
+ return self._path_info_factory.build(
331
+ base_command=base_command,
332
+ operation_type="write",
333
+ path_targets=path_targets,
334
+ analysis=analysis,
335
+ )
336
+ return self._path_info_factory.build(
337
+ base_command=base_command,
338
+ operation_type="write",
339
+ path_targets=path_targets,
340
+ analysis=analysis,
341
+ requires_manual_approval=True,
342
+ manual_approval_reason=(
343
+ "install command requires explicit approval when the destination path "
344
+ "cannot be determined safely."
345
+ ),
346
+ )
347
+
348
+ def extract_ln(
349
+ self,
350
+ base_command: str,
351
+ args: list[str],
352
+ analysis: BashAstAnalysis,
353
+ ) -> BashCommandPathInfo:
354
+ positional: list[str] = []
355
+ path_targets: list[BashPathTarget] = []
356
+ target_directory: str | None = None
357
+ after_double_dash = False
358
+ i = 0
359
+ flags_with_args = {"-S", "--suffix", "-t", "--target-directory"}
360
+ while i < len(args):
361
+ arg = args[i]
362
+ if not arg:
363
+ i += 1
364
+ continue
365
+ if not after_double_dash and arg == "--":
366
+ after_double_dash = True
367
+ i += 1
368
+ continue
369
+ if not after_double_dash and arg.startswith("--target-directory="):
370
+ target_directory = arg.split("=", 1)[1]
371
+ i += 1
372
+ continue
373
+ if not after_double_dash and arg.startswith("-"):
374
+ flag = arg.split("=")[0]
375
+ if flag in {"-t", "--target-directory"} and i + 1 < len(args):
376
+ target_directory = args[i + 1]
377
+ i += 2
378
+ continue
379
+ if flag in flags_with_args and "=" not in arg and i + 1 < len(args):
380
+ i += 2
381
+ continue
382
+ i += 1
383
+ continue
384
+ positional.append(arg)
385
+ i += 1
386
+
387
+ if target_directory:
388
+ path_targets.extend(BashPathTarget(path, "read") for path in positional)
389
+ path_targets.append(BashPathTarget(target_directory, "write"))
390
+ return self._path_info_factory.build(
391
+ base_command=base_command,
392
+ operation_type="write",
393
+ path_targets=path_targets,
394
+ analysis=analysis,
395
+ )
396
+ if len(positional) >= 2:
397
+ path_targets.extend(BashPathTarget(path, "read") for path in positional[:-1])
398
+ path_targets.append(BashPathTarget(positional[-1], "write"))
399
+ return self._path_info_factory.build(
400
+ base_command=base_command,
401
+ operation_type="write",
402
+ path_targets=path_targets,
403
+ analysis=analysis,
404
+ )
405
+ return self._path_info_factory.build(
406
+ base_command=base_command,
407
+ operation_type="write",
408
+ path_targets=path_targets,
409
+ analysis=analysis,
410
+ requires_manual_approval=True,
411
+ manual_approval_reason=(
412
+ "ln command requires explicit approval when the destination path "
413
+ "cannot be determined safely."
414
+ ),
415
+ )
416
+
417
+ def extract_tar(
418
+ self,
419
+ base_command: str,
420
+ args: list[str],
421
+ analysis: BashAstAnalysis,
422
+ ) -> BashCommandPathInfo:
423
+ archive_path: str | None = None
424
+ chdir_path: str | None = None
425
+ positional: list[str] = []
426
+ after_double_dash = False
427
+ create_mode = False
428
+ extract_mode = False
429
+ list_mode = False
430
+ i = 0
431
+ while i < len(args):
432
+ arg = args[i]
433
+ if not arg:
434
+ i += 1
435
+ continue
436
+ if not after_double_dash and arg == "--":
437
+ after_double_dash = True
438
+ i += 1
439
+ continue
440
+ if not after_double_dash and arg.startswith("--file="):
441
+ archive_path = arg.split("=", 1)[1]
442
+ i += 1
443
+ continue
444
+ if not after_double_dash and arg.startswith("--directory="):
445
+ chdir_path = arg.split("=", 1)[1]
446
+ i += 1
447
+ continue
448
+ if not after_double_dash and arg == "--create":
449
+ create_mode = True
450
+ i += 1
451
+ continue
452
+ if not after_double_dash and arg == "--extract":
453
+ extract_mode = True
454
+ i += 1
455
+ continue
456
+ if not after_double_dash and arg == "--list":
457
+ list_mode = True
458
+ i += 1
459
+ continue
460
+ if not after_double_dash and arg.startswith("-") and arg != "-":
461
+ if "c" in arg and not arg.startswith("--"):
462
+ create_mode = True
463
+ if "x" in arg and not arg.startswith("--"):
464
+ extract_mode = True
465
+ if "t" in arg and not arg.startswith("--"):
466
+ list_mode = True
467
+ if "f" in arg and not arg.startswith("--"):
468
+ value = self._extract_attached_short_option_value(arg, "f")
469
+ if value:
470
+ archive_path = value
471
+ i += 1
472
+ continue
473
+ if i + 1 < len(args):
474
+ archive_path = args[i + 1]
475
+ i += 2
476
+ continue
477
+ if arg in {"-f", "--file"} and i + 1 < len(args):
478
+ archive_path = args[i + 1]
479
+ i += 2
480
+ continue
481
+ if arg in {"-C", "--directory"} and i + 1 < len(args):
482
+ chdir_path = args[i + 1]
483
+ i += 2
484
+ continue
485
+ i += 1
486
+ continue
487
+ positional.append(arg)
488
+ i += 1
489
+
490
+ if extract_mode:
491
+ path_targets: list[BashPathTarget] = []
492
+ if archive_path:
493
+ path_targets.append(BashPathTarget(archive_path, "read"))
494
+ if chdir_path:
495
+ path_targets.append(BashPathTarget(chdir_path, "write"))
496
+ return self._path_info_factory.build(
497
+ base_command=base_command,
498
+ operation_type="write",
499
+ path_targets=path_targets,
500
+ analysis=analysis,
501
+ requires_manual_approval=True,
502
+ manual_approval_reason=(
503
+ "tar extract commands require explicit approval because they can "
504
+ "materialize many files that are not explicit command arguments."
505
+ ),
506
+ )
507
+
508
+ path_targets: list[BashPathTarget] = []
509
+ if archive_path:
510
+ path_targets.append(BashPathTarget(archive_path, "write" if create_mode else "read"))
511
+ if chdir_path:
512
+ path_targets.append(BashPathTarget(chdir_path, "read" if create_mode else "write"))
513
+ if create_mode:
514
+ path_targets.extend(BashPathTarget(path, "read") for path in positional)
515
+ return self._path_info_factory.build(
516
+ base_command=base_command,
517
+ operation_type="write",
518
+ path_targets=path_targets,
519
+ analysis=analysis,
520
+ )
521
+ if list_mode and archive_path:
522
+ return self._path_info_factory.build(
523
+ base_command=base_command,
524
+ operation_type="read",
525
+ path_targets=path_targets,
526
+ analysis=analysis,
527
+ )
528
+ if archive_path:
529
+ return self._path_info_factory.build(
530
+ base_command=base_command,
531
+ operation_type="read",
532
+ path_targets=path_targets,
533
+ analysis=analysis,
534
+ )
535
+ return self._path_info_factory.build(
536
+ base_command=base_command,
537
+ operation_type="write",
538
+ path_targets=path_targets,
539
+ analysis=analysis,
540
+ requires_manual_approval=True,
541
+ manual_approval_reason=(
542
+ "tar command requires explicit approval when its read/write targets "
543
+ "cannot be determined safely."
544
+ ),
545
+ )
546
+
547
+ def extract_dd(
548
+ self,
549
+ base_command: str,
550
+ args: list[str],
551
+ analysis: BashAstAnalysis,
552
+ ) -> BashCommandPathInfo:
553
+ path_targets: list[BashPathTarget] = []
554
+ for arg in args:
555
+ if not arg or "=" not in arg:
556
+ continue
557
+ key, value = arg.split("=", 1)
558
+ if not value:
559
+ continue
560
+ if key == "if":
561
+ path_targets.append(BashPathTarget(value, "read"))
562
+ elif key == "of":
563
+ path_targets.append(BashPathTarget(value, "write"))
564
+
565
+ operation_type: OperationType = (
566
+ "write" if any(target.operation_type == "write" for target in path_targets) else "read"
567
+ )
568
+
569
+ for target in path_targets:
570
+ if target.operation_type == "write" and target.raw_path.startswith("/dev/"):
571
+ return self._path_info_factory.build(
572
+ base_command=base_command,
573
+ operation_type="write",
574
+ path_targets=path_targets,
575
+ analysis=analysis,
576
+ requires_manual_approval=True,
577
+ manual_approval_reason=(
578
+ "dd writing directly to a device path requires explicit approval."
579
+ ),
580
+ )
581
+
582
+ return self._path_info_factory.build(
583
+ base_command=base_command,
584
+ operation_type=operation_type,
585
+ path_targets=path_targets,
586
+ analysis=analysis,
587
+ )
588
+
589
+ def extract_rsync(
590
+ self,
591
+ base_command: str,
592
+ args: list[str],
593
+ analysis: BashAstAnalysis,
594
+ ) -> BashCommandPathInfo:
595
+ positional: list[str] = []
596
+ path_targets: list[BashPathTarget] = []
597
+ after_double_dash = False
598
+ i = 0
599
+ risky_path_flags = {
600
+ "--backup-dir", "--compare-dest", "--copy-dest", "--link-dest",
601
+ "--partial-dir", "--temp-dir", "--files-from", "--exclude-from",
602
+ "--include-from",
603
+ }
604
+ simple_flags_with_args = {
605
+ "-e", "--rsh", "-T", "--temp-dir", "--backup-dir", "--files-from",
606
+ "--exclude-from", "--include-from", "--compare-dest", "--copy-dest",
607
+ "--link-dest",
608
+ }
609
+ while i < len(args):
610
+ arg = args[i]
611
+ if not arg:
612
+ i += 1
613
+ continue
614
+ if not after_double_dash and arg == "--":
615
+ after_double_dash = True
616
+ i += 1
617
+ continue
618
+ if not after_double_dash and arg.startswith("--"):
619
+ flag = arg.split("=", 1)[0]
620
+ if flag in risky_path_flags:
621
+ return self._path_info_factory.build(
622
+ base_command=base_command,
623
+ operation_type="write",
624
+ path_targets=path_targets,
625
+ analysis=analysis,
626
+ requires_manual_approval=True,
627
+ manual_approval_reason=(
628
+ "rsync with path-bearing control flags requires explicit approval "
629
+ "to ensure all filesystem targets are validated."
630
+ ),
631
+ )
632
+ if "=" in arg:
633
+ i += 1
634
+ continue
635
+ if flag in simple_flags_with_args and i + 1 < len(args):
636
+ i += 2
637
+ continue
638
+ i += 1
639
+ continue
640
+ if not after_double_dash and arg.startswith("-"):
641
+ if any(char in arg for char in {"e", "T"}):
642
+ return self._path_info_factory.build(
643
+ base_command=base_command,
644
+ operation_type="write",
645
+ path_targets=path_targets,
646
+ analysis=analysis,
647
+ requires_manual_approval=True,
648
+ manual_approval_reason=(
649
+ "rsync with complex short flags requires explicit approval "
650
+ "to ensure all filesystem targets are validated."
651
+ ),
652
+ )
653
+ i += 1
654
+ continue
655
+ positional.append(arg)
656
+ i += 1
657
+
658
+ if len(positional) < 2:
659
+ return self._path_info_factory.build(
660
+ base_command=base_command,
661
+ operation_type="write",
662
+ path_targets=path_targets,
663
+ analysis=analysis,
664
+ requires_manual_approval=True,
665
+ manual_approval_reason=(
666
+ "rsync command requires explicit approval when source or destination "
667
+ "cannot be determined safely."
668
+ ),
669
+ )
670
+
671
+ path_targets.extend(BashPathTarget(path, "read") for path in positional[:-1])
672
+ path_targets.append(BashPathTarget(positional[-1], "write"))
673
+ return self._path_info_factory.build(
674
+ base_command=base_command,
675
+ operation_type="write",
676
+ path_targets=path_targets,
677
+ analysis=analysis,
678
+ )
679
+
680
+ def extract_zip(
681
+ self,
682
+ base_command: str,
683
+ args: list[str],
684
+ analysis: BashAstAnalysis,
685
+ ) -> BashCommandPathInfo:
686
+ positional: list[str] = []
687
+ path_targets: list[BashPathTarget] = []
688
+ temp_path: str | None = None
689
+ out_path: str | None = None
690
+ move_mode = False
691
+ delete_mode = False
692
+ names_from_stdin = False
693
+ after_double_dash = False
694
+ i = 0
695
+ while i < len(args):
696
+ arg = args[i]
697
+ if not arg:
698
+ i += 1
699
+ continue
700
+ if not after_double_dash and arg == "--":
701
+ after_double_dash = True
702
+ i += 1
703
+ continue
704
+ if not after_double_dash and arg.startswith("--temp-path="):
705
+ temp_path = arg.split("=", 1)[1]
706
+ i += 1
707
+ continue
708
+ if not after_double_dash and arg.startswith("--out="):
709
+ out_path = arg.split("=", 1)[1]
710
+ i += 1
711
+ continue
712
+ if not after_double_dash and arg in {"--move"}:
713
+ move_mode = True
714
+ i += 1
715
+ continue
716
+ if not after_double_dash and arg in {"--delete"}:
717
+ delete_mode = True
718
+ i += 1
719
+ continue
720
+ if not after_double_dash and arg.startswith("-") and arg != "-":
721
+ if arg in {"-b", "--temp-path"} and i + 1 < len(args):
722
+ temp_path = args[i + 1]
723
+ i += 2
724
+ continue
725
+ if arg == "--out" and i + 1 < len(args):
726
+ out_path = args[i + 1]
727
+ i += 2
728
+ continue
729
+ if "m" in arg and not arg.startswith("--"):
730
+ move_mode = True
731
+ if "d" in arg and not arg.startswith("--"):
732
+ delete_mode = True
733
+ if "@" in arg and not arg.startswith("--"):
734
+ names_from_stdin = True
735
+ if "b" in arg and not arg.startswith("--"):
736
+ value = self._extract_attached_short_option_value(arg, "b")
737
+ if value:
738
+ temp_path = value
739
+ i += 1
740
+ continue
741
+ if arg == "-b" and i + 1 < len(args):
742
+ temp_path = args[i + 1]
743
+ i += 2
744
+ continue
745
+ i += 1
746
+ continue
747
+ positional.append(arg)
748
+ i += 1
749
+
750
+ if temp_path:
751
+ path_targets.append(BashPathTarget(temp_path, "write"))
752
+
753
+ if out_path:
754
+ if positional:
755
+ path_targets.append(BashPathTarget(positional[0], "read"))
756
+ source_paths = [] if delete_mode else positional[1:]
757
+ else:
758
+ source_paths = []
759
+ path_targets.append(BashPathTarget(out_path, "write"))
760
+ operation_for_sources: OperationType = "write" if move_mode else "read"
761
+ path_targets.extend(
762
+ BashPathTarget(path, operation_for_sources) for path in source_paths
763
+ )
764
+ if not positional or (names_from_stdin and not delete_mode):
765
+ return self._path_info_factory.build(
766
+ base_command=base_command,
767
+ operation_type="write",
768
+ path_targets=path_targets,
769
+ analysis=analysis,
770
+ requires_manual_approval=True,
771
+ manual_approval_reason=(
772
+ "zip with --out or stdin-provided file lists requires explicit approval "
773
+ "when the full archive inputs cannot be determined safely."
774
+ ),
775
+ )
776
+ return self._path_info_factory.build(
777
+ base_command=base_command,
778
+ operation_type="write",
779
+ path_targets=path_targets,
780
+ analysis=analysis,
781
+ )
782
+
783
+ if positional:
784
+ archive_path = positional[0]
785
+ source_paths = positional[1:]
786
+ path_targets.append(BashPathTarget(archive_path, "write"))
787
+ if delete_mode:
788
+ return self._path_info_factory.build(
789
+ base_command=base_command,
790
+ operation_type="write",
791
+ path_targets=path_targets,
792
+ analysis=analysis,
793
+ )
794
+ operation_for_sources = "write" if move_mode else "read"
795
+ path_targets.extend(
796
+ BashPathTarget(path, operation_for_sources) for path in source_paths
797
+ )
798
+ if source_paths and not names_from_stdin:
799
+ return self._path_info_factory.build(
800
+ base_command=base_command,
801
+ operation_type="write",
802
+ path_targets=path_targets,
803
+ analysis=analysis,
804
+ )
805
+
806
+ return self._path_info_factory.build(
807
+ base_command=base_command,
808
+ operation_type="write",
809
+ path_targets=path_targets,
810
+ analysis=analysis,
811
+ requires_manual_approval=True,
812
+ manual_approval_reason=(
813
+ "zip command requires explicit approval when archive inputs, source files, "
814
+ "or generated outputs cannot be determined safely."
815
+ ),
816
+ )
817
+
818
+ def extract_unzip(
819
+ self,
820
+ base_command: str,
821
+ args: list[str],
822
+ analysis: BashAstAnalysis,
823
+ ) -> BashCommandPathInfo:
824
+ positional: list[str] = []
825
+ path_targets: list[BashPathTarget] = []
826
+ destination_directory: str | None = None
827
+ read_only_mode = False
828
+ after_double_dash = False
829
+ i = 0
830
+ while i < len(args):
831
+ arg = args[i]
832
+ if not arg:
833
+ i += 1
834
+ continue
835
+ if not after_double_dash and arg == "--":
836
+ after_double_dash = True
837
+ i += 1
838
+ continue
839
+ if not after_double_dash and arg.startswith("-") and arg != "-":
840
+ if arg == "-d" and i + 1 < len(args):
841
+ destination_directory = args[i + 1]
842
+ i += 2
843
+ continue
844
+ if "d" in arg and not arg.startswith("--"):
845
+ value = self._extract_attached_short_option_value(arg, "d")
846
+ if value:
847
+ destination_directory = value
848
+ i += 1
849
+ continue
850
+ if not arg.startswith("--") and any(flag in arg for flag in {"l", "t", "p", "v", "Z"}):
851
+ read_only_mode = True
852
+ i += 1
853
+ continue
854
+ positional.append(arg)
855
+ i += 1
856
+
857
+ archive_path = positional[0] if positional else None
858
+ if archive_path:
859
+ path_targets.append(BashPathTarget(archive_path, "read"))
860
+ if destination_directory:
861
+ path_targets.append(BashPathTarget(destination_directory, "write"))
862
+
863
+ if read_only_mode and archive_path:
864
+ return self._path_info_factory.build(
865
+ base_command=base_command,
866
+ operation_type="read",
867
+ path_targets=path_targets,
868
+ analysis=analysis,
869
+ )
870
+
871
+ if archive_path:
872
+ return self._path_info_factory.build(
873
+ base_command=base_command,
874
+ operation_type="write",
875
+ path_targets=path_targets,
876
+ analysis=analysis,
877
+ requires_manual_approval=True,
878
+ manual_approval_reason=(
879
+ "unzip extraction commands require explicit approval because they can "
880
+ "materialize many files beyond explicit argv paths."
881
+ ),
882
+ )
883
+
884
+ return self._path_info_factory.build(
885
+ base_command=base_command,
886
+ operation_type="write",
887
+ path_targets=path_targets,
888
+ analysis=analysis,
889
+ requires_manual_approval=True,
890
+ manual_approval_reason=(
891
+ "unzip command requires explicit approval when the archive path or "
892
+ "extraction target cannot be determined safely."
893
+ ),
894
+ )
895
+
896
+ def extract_7z_family(
897
+ self,
898
+ base_command: str,
899
+ args: list[str],
900
+ analysis: BashAstAnalysis,
901
+ ) -> BashCommandPathInfo:
902
+ action: str | None = None
903
+ archive_path: str | None = None
904
+ destination_directory: str | None = None
905
+ source_paths: list[str] = []
906
+ path_targets: list[BashPathTarget] = []
907
+ i = 0
908
+ while i < len(args):
909
+ arg = args[i]
910
+ if not arg:
911
+ i += 1
912
+ continue
913
+ if action is None and not arg.startswith("-"):
914
+ action = arg.lower()
915
+ i += 1
916
+ continue
917
+ if arg.startswith("-o") and arg != "-o":
918
+ destination_directory = arg[2:]
919
+ i += 1
920
+ continue
921
+ if arg == "-o" and i + 1 < len(args):
922
+ destination_directory = args[i + 1]
923
+ i += 2
924
+ continue
925
+ if archive_path is None and not arg.startswith("-"):
926
+ archive_path = arg
927
+ i += 1
928
+ continue
929
+ if not arg.startswith("-"):
930
+ source_paths.append(arg)
931
+ i += 1
932
+
933
+ if destination_directory:
934
+ path_targets.append(BashPathTarget(destination_directory, "write"))
935
+ if archive_path:
936
+ archive_operation: OperationType = "read"
937
+ if action in {"a", "u", "d"}:
938
+ archive_operation = "write"
939
+ path_targets.append(BashPathTarget(archive_path, archive_operation))
940
+
941
+ if action in {"l", "t", "i"} and archive_path:
942
+ return self._path_info_factory.build(
943
+ base_command=base_command,
944
+ operation_type="read",
945
+ path_targets=path_targets,
946
+ analysis=analysis,
947
+ )
948
+
949
+ if action in {"x", "e"} and archive_path:
950
+ return self._path_info_factory.build(
951
+ base_command=base_command,
952
+ operation_type="write",
953
+ path_targets=path_targets,
954
+ analysis=analysis,
955
+ requires_manual_approval=True,
956
+ manual_approval_reason=(
957
+ f"{base_command} extract commands require explicit approval because they can "
958
+ "materialize many files beyond explicit argv paths."
959
+ ),
960
+ )
961
+
962
+ if action == "d" and archive_path:
963
+ return self._path_info_factory.build(
964
+ base_command=base_command,
965
+ operation_type="write",
966
+ path_targets=path_targets,
967
+ analysis=analysis,
968
+ )
969
+
970
+ if action in {"a", "u", "d"} and archive_path:
971
+ path_targets.extend(BashPathTarget(path, "read") for path in source_paths)
972
+ return self._path_info_factory.build(
973
+ base_command=base_command,
974
+ operation_type="write",
975
+ path_targets=path_targets,
976
+ analysis=analysis,
977
+ requires_manual_approval=not bool(source_paths),
978
+ manual_approval_reason=(
979
+ None if source_paths else
980
+ f"{base_command} requires explicit approval when archive inputs cannot be "
981
+ "determined safely."
982
+ ),
983
+ )
984
+
985
+ return self._path_info_factory.build(
986
+ base_command=base_command,
987
+ operation_type="write",
988
+ path_targets=path_targets,
989
+ analysis=analysis,
990
+ requires_manual_approval=True,
991
+ manual_approval_reason=(
992
+ f"{base_command} command requires explicit approval when its action, archive path, "
993
+ "or destination cannot be determined safely."
994
+ ),
995
+ )
996
+
997
+ def extract_zip_metadata(
998
+ self,
999
+ base_command: str,
1000
+ args: list[str],
1001
+ analysis: BashAstAnalysis,
1002
+ ) -> BashCommandPathInfo:
1003
+ paths = self._tokenizer.filter_out_flags(args)
1004
+ path_targets = [BashPathTarget(path, "read") for path in paths[:1]]
1005
+ if path_targets:
1006
+ return self._path_info_factory.build(
1007
+ base_command=base_command,
1008
+ operation_type="read",
1009
+ path_targets=path_targets,
1010
+ analysis=analysis,
1011
+ )
1012
+ return self._path_info_factory.build(
1013
+ base_command=base_command,
1014
+ operation_type="read",
1015
+ path_targets=[],
1016
+ analysis=analysis,
1017
+ requires_manual_approval=True,
1018
+ manual_approval_reason=(
1019
+ f"{base_command} requires explicit approval when the target archive cannot be "
1020
+ "determined safely."
1021
+ ),
1022
+ )
1023
+
1024
+ def extract_single_file_archive(
1025
+ self,
1026
+ base_command: str,
1027
+ args: list[str],
1028
+ analysis: BashAstAnalysis,
1029
+ ) -> BashCommandPathInfo:
1030
+ specs = {
1031
+ "gzip": BashCompressionCommandSpec(".gz"),
1032
+ "gunzip": BashCompressionCommandSpec(".gz"),
1033
+ "xz": BashCompressionCommandSpec(".xz"),
1034
+ "unxz": BashCompressionCommandSpec(".xz"),
1035
+ "bzip2": BashCompressionCommandSpec(".bz2"),
1036
+ "bunzip2": BashCompressionCommandSpec(".bz2"),
1037
+ "zstd": BashCompressionCommandSpec(".zst", explicit_output_flags=("-o", "--output")),
1038
+ "unzstd": BashCompressionCommandSpec(".zst", explicit_output_flags=("-o", "--output")),
1039
+ "lz4": BashCompressionCommandSpec(
1040
+ ".lz4",
1041
+ source_mutates_by_default=False,
1042
+ explicit_output_flags=("-o", "--output"),
1043
+ second_positional_output=True,
1044
+ ),
1045
+ }
1046
+ spec = specs[base_command]
1047
+ decompress_mode = base_command in {"gunzip", "unxz", "bunzip2", "unzstd"}
1048
+ stdout_mode = False
1049
+ keep_source = False
1050
+ explicit_output: str | None = None
1051
+ positional: list[str] = []
1052
+ after_double_dash = False
1053
+ i = 0
1054
+ while i < len(args):
1055
+ arg = args[i]
1056
+ if not arg:
1057
+ i += 1
1058
+ continue
1059
+ if not after_double_dash and arg == "--":
1060
+ after_double_dash = True
1061
+ i += 1
1062
+ continue
1063
+ if not after_double_dash:
1064
+ if arg in spec.stdout_flags:
1065
+ stdout_mode = True
1066
+ i += 1
1067
+ continue
1068
+ if arg in spec.keep_source_flags:
1069
+ keep_source = True
1070
+ i += 1
1071
+ continue
1072
+ if arg in {"-d", "--decompress", "--uncompress"}:
1073
+ decompress_mode = True
1074
+ i += 1
1075
+ continue
1076
+ if arg in {"-z", "--compress"}:
1077
+ decompress_mode = False
1078
+ i += 1
1079
+ continue
1080
+ if arg in spec.explicit_output_flags and i + 1 < len(args):
1081
+ explicit_output = args[i + 1]
1082
+ i += 2
1083
+ continue
1084
+ if arg.startswith("-") and arg != "-":
1085
+ if "c" in arg and not arg.startswith("--"):
1086
+ stdout_mode = True
1087
+ if "k" in arg and not arg.startswith("--"):
1088
+ keep_source = True
1089
+ if "d" in arg and not arg.startswith("--"):
1090
+ decompress_mode = True
1091
+ i += 1
1092
+ continue
1093
+ positional.append(arg)
1094
+ i += 1
1095
+
1096
+ if not positional:
1097
+ return self._path_info_factory.build(
1098
+ base_command=base_command,
1099
+ operation_type="write",
1100
+ path_targets=[],
1101
+ analysis=analysis,
1102
+ requires_manual_approval=True,
1103
+ manual_approval_reason=(
1104
+ f"{base_command} requires explicit approval when operating on stdin/stdout "
1105
+ "without explicit filesystem paths."
1106
+ ),
1107
+ )
1108
+
1109
+ if spec.second_positional_output and explicit_output is None and len(positional) >= 2:
1110
+ explicit_output = positional[-1]
1111
+ positional = positional[:-1]
1112
+
1113
+ input_operation: OperationType = (
1114
+ "read" if stdout_mode or keep_source or not spec.source_mutates_by_default else "write"
1115
+ )
1116
+ path_targets = [BashPathTarget(path, input_operation) for path in positional]
1117
+
1118
+ if stdout_mode:
1119
+ return self._path_info_factory.build(
1120
+ base_command=base_command,
1121
+ operation_type="read",
1122
+ path_targets=path_targets,
1123
+ analysis=analysis,
1124
+ )
1125
+
1126
+ output_paths: list[str] = []
1127
+ if explicit_output is not None:
1128
+ if len(positional) != 1:
1129
+ return self._path_info_factory.build(
1130
+ base_command=base_command,
1131
+ operation_type="write",
1132
+ path_targets=path_targets,
1133
+ analysis=analysis,
1134
+ requires_manual_approval=True,
1135
+ manual_approval_reason=(
1136
+ f"{base_command} with an explicit output path requires explicit approval "
1137
+ "when multiple input files are provided."
1138
+ ),
1139
+ )
1140
+ output_paths.append(explicit_output)
1141
+ else:
1142
+ for input_path in positional:
1143
+ derived_output = self._derive_single_file_archive_output(
1144
+ input_path=input_path,
1145
+ suffix=spec.output_suffix,
1146
+ decompress_mode=decompress_mode,
1147
+ )
1148
+ if derived_output is None:
1149
+ return self._path_info_factory.build(
1150
+ base_command=base_command,
1151
+ operation_type="write",
1152
+ path_targets=path_targets,
1153
+ analysis=analysis,
1154
+ requires_manual_approval=True,
1155
+ manual_approval_reason=(
1156
+ f"{base_command} requires explicit approval when the generated output "
1157
+ "path cannot be derived safely."
1158
+ ),
1159
+ )
1160
+ output_paths.append(derived_output)
1161
+
1162
+ path_targets.extend(BashPathTarget(path, "write") for path in output_paths)
1163
+ return self._path_info_factory.build(
1164
+ base_command=base_command,
1165
+ operation_type="write",
1166
+ path_targets=path_targets,
1167
+ analysis=analysis,
1168
+ )
1169
+
1170
+ def extract_jar(
1171
+ self,
1172
+ base_command: str,
1173
+ args: list[str],
1174
+ analysis: BashAstAnalysis,
1175
+ ) -> BashCommandPathInfo:
1176
+ if not args:
1177
+ return self._path_info_factory.build(
1178
+ base_command=base_command,
1179
+ operation_type="write",
1180
+ path_targets=[],
1181
+ analysis=analysis,
1182
+ requires_manual_approval=True,
1183
+ manual_approval_reason="jar requires explicit approval when its mode is missing.",
1184
+ )
1185
+ mode_token = args[0].lstrip("-")
1186
+ positional: list[str] = []
1187
+ path_targets: list[BashPathTarget] = []
1188
+ i = 1
1189
+ while i < len(args):
1190
+ arg = args[i]
1191
+ if arg == "-C" and i + 1 < len(args):
1192
+ target_dir = args[i + 1]
1193
+ operation = "write" if "x" in mode_token else "read"
1194
+ path_targets.append(BashPathTarget(target_dir, operation))
1195
+ if "x" not in mode_token and i + 2 < len(args):
1196
+ target_path = args[i + 2]
1197
+ path_targets.append(BashPathTarget(target_path, "read"))
1198
+ i += 3
1199
+ continue
1200
+ i += 2
1201
+ continue
1202
+ if not arg.startswith("-"):
1203
+ positional.append(arg)
1204
+ i += 1
1205
+
1206
+ archive_path = positional[0] if positional else None
1207
+ if archive_path:
1208
+ archive_operation: OperationType = "read"
1209
+ if "c" in mode_token or "u" in mode_token:
1210
+ archive_operation = "write"
1211
+ path_targets.insert(0, BashPathTarget(archive_path, archive_operation))
1212
+
1213
+ if "t" in mode_token and archive_path:
1214
+ return self._path_info_factory.build(
1215
+ base_command=base_command,
1216
+ operation_type="read",
1217
+ path_targets=path_targets,
1218
+ analysis=analysis,
1219
+ )
1220
+ if "x" in mode_token and archive_path:
1221
+ return self._path_info_factory.build(
1222
+ base_command=base_command,
1223
+ operation_type="write",
1224
+ path_targets=path_targets,
1225
+ analysis=analysis,
1226
+ requires_manual_approval=True,
1227
+ manual_approval_reason=(
1228
+ "jar extract commands require explicit approval because they can materialize "
1229
+ "many files beyond explicit argv paths."
1230
+ ),
1231
+ )
1232
+ if ("c" in mode_token or "u" in mode_token) and archive_path:
1233
+ path_targets.extend(BashPathTarget(path, "read") for path in positional[1:])
1234
+ return self._path_info_factory.build(
1235
+ base_command=base_command,
1236
+ operation_type="write",
1237
+ path_targets=path_targets,
1238
+ analysis=analysis,
1239
+ )
1240
+ return self._path_info_factory.build(
1241
+ base_command=base_command,
1242
+ operation_type="write",
1243
+ path_targets=path_targets,
1244
+ analysis=analysis,
1245
+ requires_manual_approval=True,
1246
+ manual_approval_reason=(
1247
+ "jar command requires explicit approval when its archive path or mode cannot be "
1248
+ "determined safely."
1249
+ ),
1250
+ )
1251
+
1252
+ def extract_rar_family(
1253
+ self,
1254
+ base_command: str,
1255
+ args: list[str],
1256
+ analysis: BashAstAnalysis,
1257
+ ) -> BashCommandPathInfo:
1258
+ if not args:
1259
+ return self._path_info_factory.build(
1260
+ base_command=base_command,
1261
+ operation_type="write",
1262
+ path_targets=[],
1263
+ analysis=analysis,
1264
+ requires_manual_approval=True,
1265
+ manual_approval_reason=f"{base_command} requires explicit approval when no mode is provided.",
1266
+ )
1267
+ action = args[0].lower()
1268
+ positional = [arg for arg in args[1:] if arg and not arg.startswith("-")]
1269
+ archive_path = positional[0] if positional else None
1270
+ path_targets: list[BashPathTarget] = []
1271
+ if archive_path:
1272
+ archive_operation: OperationType = "read"
1273
+ if action in {"a", "u", "d"}:
1274
+ archive_operation = "write"
1275
+ path_targets.append(BashPathTarget(archive_path, archive_operation))
1276
+
1277
+ if action in {"l", "lb", "lt", "t", "p"} and archive_path:
1278
+ return self._path_info_factory.build(
1279
+ base_command=base_command,
1280
+ operation_type="read",
1281
+ path_targets=path_targets,
1282
+ analysis=analysis,
1283
+ )
1284
+ if action in {"x", "e"} and archive_path:
1285
+ if len(positional) >= 2:
1286
+ path_targets.append(BashPathTarget(positional[1], "write"))
1287
+ return self._path_info_factory.build(
1288
+ base_command=base_command,
1289
+ operation_type="write",
1290
+ path_targets=path_targets,
1291
+ analysis=analysis,
1292
+ requires_manual_approval=True,
1293
+ manual_approval_reason=(
1294
+ f"{base_command} extract commands require explicit approval because they can "
1295
+ "materialize many files beyond explicit argv paths."
1296
+ ),
1297
+ )
1298
+ if action == "d" and archive_path:
1299
+ return self._path_info_factory.build(
1300
+ base_command=base_command,
1301
+ operation_type="write",
1302
+ path_targets=path_targets,
1303
+ analysis=analysis,
1304
+ )
1305
+ if action in {"a", "u", "d"} and archive_path:
1306
+ path_targets.extend(BashPathTarget(path, "read") for path in positional[1:])
1307
+ return self._path_info_factory.build(
1308
+ base_command=base_command,
1309
+ operation_type="write",
1310
+ path_targets=path_targets,
1311
+ analysis=analysis,
1312
+ )
1313
+ return self._path_info_factory.build(
1314
+ base_command=base_command,
1315
+ operation_type="write",
1316
+ path_targets=path_targets,
1317
+ analysis=analysis,
1318
+ requires_manual_approval=True,
1319
+ manual_approval_reason=(
1320
+ f"{base_command} command requires explicit approval when its archive action cannot "
1321
+ "be determined safely."
1322
+ ),
1323
+ )
1324
+
1325
+ def extract_scp(
1326
+ self,
1327
+ base_command: str,
1328
+ args: list[str],
1329
+ analysis: BashAstAnalysis,
1330
+ ) -> BashCommandPathInfo:
1331
+ positional: list[str] = []
1332
+ path_targets: list[BashPathTarget] = []
1333
+ requires_manual_approval = False
1334
+ control_flag_seen = False
1335
+ i = 0
1336
+ flags_with_args = {"-P", "-S", "-J", "-F", "-i", "-c", "-l", "-o"}
1337
+ path_flags = {"-S", "-F", "-i"}
1338
+ while i < len(args):
1339
+ arg = args[i]
1340
+ if not arg:
1341
+ i += 1
1342
+ continue
1343
+ if arg.startswith("-") and arg != "-":
1344
+ if arg in flags_with_args and i + 1 < len(args):
1345
+ if arg in path_flags:
1346
+ path_targets.append(BashPathTarget(args[i + 1], "read"))
1347
+ control_flag_seen = True
1348
+ i += 2
1349
+ continue
1350
+ i += 1
1351
+ continue
1352
+ positional.append(arg)
1353
+ i += 1
1354
+
1355
+ if len(positional) >= 2:
1356
+ source_endpoints = positional[:-1]
1357
+ destination_endpoint = positional[-1]
1358
+ for source in source_endpoints:
1359
+ target = self._remote_classifier.build_local_target(source, "read")
1360
+ if target is not None:
1361
+ path_targets.append(target)
1362
+ else:
1363
+ requires_manual_approval = True
1364
+ destination_target = self._remote_classifier.build_local_target(
1365
+ destination_endpoint,
1366
+ "write",
1367
+ )
1368
+ if destination_target is not None:
1369
+ path_targets.append(destination_target)
1370
+ else:
1371
+ requires_manual_approval = True
1372
+ if control_flag_seen:
1373
+ requires_manual_approval = True
1374
+ return self._path_info_factory.build(
1375
+ base_command=base_command,
1376
+ operation_type="write",
1377
+ path_targets=path_targets,
1378
+ analysis=analysis,
1379
+ requires_manual_approval=requires_manual_approval,
1380
+ manual_approval_reason=(
1381
+ None if not requires_manual_approval else
1382
+ "scp with remote endpoints or transport-control files requires explicit approval."
1383
+ ),
1384
+ )
1385
+
1386
+ return self._path_info_factory.build(
1387
+ base_command=base_command,
1388
+ operation_type="write",
1389
+ path_targets=path_targets,
1390
+ analysis=analysis,
1391
+ requires_manual_approval=True,
1392
+ manual_approval_reason=(
1393
+ "scp command requires explicit approval when source or destination endpoints "
1394
+ "cannot be determined safely."
1395
+ ),
1396
+ )
1397
+
1398
+ def extract_sftp(
1399
+ self,
1400
+ base_command: str,
1401
+ args: list[str],
1402
+ analysis: BashAstAnalysis,
1403
+ ) -> BashCommandPathInfo:
1404
+ positional: list[str] = []
1405
+ path_targets: list[BashPathTarget] = []
1406
+ i = 0
1407
+ flags_with_args = {"-b", "-B", "-c", "-D", "-F", "-i", "-J", "-P", "-R", "-S", "-s", "-X"}
1408
+ local_path_flags = {"-b", "-D", "-F", "-i", "-S", "-s"}
1409
+ while i < len(args):
1410
+ arg = args[i]
1411
+ if not arg:
1412
+ i += 1
1413
+ continue
1414
+ if arg.startswith("-") and arg != "-":
1415
+ if arg in flags_with_args and i + 1 < len(args):
1416
+ if arg in local_path_flags:
1417
+ path_targets.append(BashPathTarget(args[i + 1], "read"))
1418
+ i += 2
1419
+ continue
1420
+ i += 1
1421
+ continue
1422
+ positional.append(arg)
1423
+ i += 1
1424
+
1425
+ if len(positional) >= 2:
1426
+ first, second = positional[0], positional[1]
1427
+ first_remote = self._remote_classifier.is_remote_spec(first)
1428
+ second_remote = self._remote_classifier.is_remote_spec(second)
1429
+ if first_remote and not second_remote:
1430
+ path_targets.append(BashPathTarget(second, "write"))
1431
+ elif second_remote and not first_remote:
1432
+ path_targets.append(BashPathTarget(first, "read"))
1433
+ elif not first_remote and not second_remote:
1434
+ path_targets.append(BashPathTarget(first, "read"))
1435
+ path_targets.append(BashPathTarget(second, "write"))
1436
+
1437
+ return self._path_info_factory.build(
1438
+ base_command=base_command,
1439
+ operation_type="write",
1440
+ path_targets=path_targets,
1441
+ analysis=analysis,
1442
+ requires_manual_approval=True,
1443
+ manual_approval_reason=(
1444
+ "sftp commands require explicit approval because remote session semantics and "
1445
+ "batch operations cannot be determined safely from argv alone."
1446
+ ),
1447
+ )
1448
+
1449
+ def extract_rclone(
1450
+ self,
1451
+ base_command: str,
1452
+ args: list[str],
1453
+ analysis: BashAstAnalysis,
1454
+ ) -> BashCommandPathInfo:
1455
+ action: str | None = None
1456
+ positional: list[str] = []
1457
+ path_targets: list[BashPathTarget] = []
1458
+ requires_manual_approval = False
1459
+ i = 0
1460
+ risky_path_flags = {
1461
+ "--backup-dir", "--compare-dest", "--copy-dest", "--link-dest",
1462
+ "--files-from", "--files-from-raw", "--include-from", "--exclude-from",
1463
+ "--filter-from", "--config",
1464
+ }
1465
+ while i < len(args):
1466
+ arg = args[i]
1467
+ if not arg:
1468
+ i += 1
1469
+ continue
1470
+ if action is None and not arg.startswith("-"):
1471
+ action = arg
1472
+ i += 1
1473
+ continue
1474
+ if arg.startswith("--"):
1475
+ flag = arg.split("=", 1)[0]
1476
+ if flag in risky_path_flags:
1477
+ requires_manual_approval = True
1478
+ if "=" in arg:
1479
+ value = arg.split("=", 1)[1]
1480
+ path_targets.append(BashPathTarget(value, "read"))
1481
+ i += 1
1482
+ continue
1483
+ if i + 1 < len(args):
1484
+ path_targets.append(BashPathTarget(args[i + 1], "read"))
1485
+ i += 2
1486
+ continue
1487
+ if "=" not in arg and i + 1 < len(args):
1488
+ if flag in {"--config"}:
1489
+ path_targets.append(BashPathTarget(args[i + 1], "read"))
1490
+ requires_manual_approval = True
1491
+ i += 2
1492
+ continue
1493
+ i += 1
1494
+ continue
1495
+ if arg.startswith("-") and arg != "-":
1496
+ i += 1
1497
+ continue
1498
+ positional.append(arg)
1499
+ i += 1
1500
+
1501
+ action = (action or "").lower()
1502
+ read_actions = {"ls", "lsl", "lsd", "lsf", "cat", "size", "about", "check"}
1503
+ write_actions = {"copy", "copyto", "sync", "move", "moveto"}
1504
+ destructive_actions = {"delete", "deletefile", "purge", "mkdir", "rmdir", "touch"}
1505
+
1506
+ if action in read_actions:
1507
+ for endpoint in positional:
1508
+ target = self._remote_classifier.build_local_target(endpoint, "read")
1509
+ if target is not None:
1510
+ path_targets.append(target)
1511
+ else:
1512
+ requires_manual_approval = True
1513
+ return self._path_info_factory.build(
1514
+ base_command=base_command,
1515
+ operation_type="read",
1516
+ path_targets=path_targets,
1517
+ analysis=analysis,
1518
+ requires_manual_approval=requires_manual_approval,
1519
+ manual_approval_reason=(
1520
+ None if not requires_manual_approval else
1521
+ "rclone read commands with remote endpoints require explicit approval."
1522
+ ),
1523
+ )
1524
+
1525
+ if action in write_actions and len(positional) >= 2:
1526
+ for source in positional[:-1]:
1527
+ target = self._remote_classifier.build_local_target(source, "read")
1528
+ if target is not None:
1529
+ path_targets.append(target)
1530
+ else:
1531
+ requires_manual_approval = True
1532
+ destination_target = self._remote_classifier.build_local_target(positional[-1], "write")
1533
+ if destination_target is not None:
1534
+ path_targets.append(destination_target)
1535
+ else:
1536
+ requires_manual_approval = True
1537
+ return self._path_info_factory.build(
1538
+ base_command=base_command,
1539
+ operation_type="write",
1540
+ path_targets=path_targets,
1541
+ analysis=analysis,
1542
+ requires_manual_approval=requires_manual_approval,
1543
+ manual_approval_reason=(
1544
+ None if not requires_manual_approval else
1545
+ "rclone copy/sync commands with remote endpoints require explicit approval."
1546
+ ),
1547
+ )
1548
+
1549
+ if action in destructive_actions and positional:
1550
+ target = self._remote_classifier.build_local_target(positional[-1], "write")
1551
+ if target is not None:
1552
+ path_targets.append(target)
1553
+ else:
1554
+ requires_manual_approval = True
1555
+ return self._path_info_factory.build(
1556
+ base_command=base_command,
1557
+ operation_type="write",
1558
+ path_targets=path_targets,
1559
+ analysis=analysis,
1560
+ requires_manual_approval=True,
1561
+ manual_approval_reason=(
1562
+ "rclone destructive or remote-aware commands require explicit approval."
1563
+ ),
1564
+ )
1565
+
1566
+ return self._path_info_factory.build(
1567
+ base_command=base_command,
1568
+ operation_type="write",
1569
+ path_targets=path_targets,
1570
+ analysis=analysis,
1571
+ requires_manual_approval=True,
1572
+ manual_approval_reason=(
1573
+ "rclone command requires explicit approval when its subcommand or endpoints "
1574
+ "cannot be determined safely."
1575
+ ),
1576
+ )
1577
+
1578
+ def extract_cpio(
1579
+ self,
1580
+ base_command: str,
1581
+ args: list[str],
1582
+ analysis: BashAstAnalysis,
1583
+ ) -> BashCommandPathInfo:
1584
+ archive_path: str | None = None
1585
+ directory_path: str | None = None
1586
+ create_mode = False
1587
+ extract_mode = False
1588
+ pass_mode = False
1589
+ list_mode = False
1590
+ after_double_dash = False
1591
+ positional: list[str] = []
1592
+ i = 0
1593
+ while i < len(args):
1594
+ arg = args[i]
1595
+ if not arg:
1596
+ i += 1
1597
+ continue
1598
+ if not after_double_dash and arg == "--":
1599
+ after_double_dash = True
1600
+ i += 1
1601
+ continue
1602
+ if not after_double_dash and arg.startswith("--file="):
1603
+ archive_path = arg.split("=", 1)[1]
1604
+ i += 1
1605
+ continue
1606
+ if not after_double_dash and arg == "--list":
1607
+ list_mode = True
1608
+ i += 1
1609
+ continue
1610
+ if not after_double_dash and arg.startswith("--directory="):
1611
+ directory_path = arg.split("=", 1)[1]
1612
+ i += 1
1613
+ continue
1614
+ if not after_double_dash and arg in {"-F", "-O", "--file"} and i + 1 < len(args):
1615
+ archive_path = args[i + 1]
1616
+ i += 2
1617
+ continue
1618
+ if not after_double_dash and arg in {"-D", "--directory"} and i + 1 < len(args):
1619
+ directory_path = args[i + 1]
1620
+ i += 2
1621
+ continue
1622
+ if not after_double_dash and arg.startswith("-") and arg != "-":
1623
+ if "o" in arg and not arg.startswith("--"):
1624
+ create_mode = True
1625
+ if "i" in arg and not arg.startswith("--"):
1626
+ extract_mode = True
1627
+ if "p" in arg and not arg.startswith("--"):
1628
+ pass_mode = True
1629
+ if "t" in arg and not arg.startswith("--"):
1630
+ list_mode = True
1631
+ i += 1
1632
+ continue
1633
+ positional.append(arg)
1634
+ i += 1
1635
+
1636
+ if list_mode:
1637
+ path_targets: list[BashPathTarget] = []
1638
+ if archive_path:
1639
+ path_targets.append(BashPathTarget(archive_path, "read"))
1640
+ if directory_path:
1641
+ path_targets.append(BashPathTarget(directory_path, "read"))
1642
+ if archive_path:
1643
+ return self._path_info_factory.build(
1644
+ base_command=base_command,
1645
+ operation_type="read",
1646
+ path_targets=path_targets,
1647
+ analysis=analysis,
1648
+ )
1649
+
1650
+ if extract_mode or pass_mode:
1651
+ path_targets: list[BashPathTarget] = []
1652
+ if archive_path:
1653
+ path_targets.append(BashPathTarget(archive_path, "read"))
1654
+ if directory_path:
1655
+ path_targets.append(BashPathTarget(directory_path, "write"))
1656
+ if pass_mode and positional:
1657
+ path_targets.append(BashPathTarget(positional[-1], "write"))
1658
+ return self._path_info_factory.build(
1659
+ base_command=base_command,
1660
+ operation_type="write",
1661
+ path_targets=path_targets,
1662
+ analysis=analysis,
1663
+ requires_manual_approval=True,
1664
+ manual_approval_reason=(
1665
+ "cpio extract/pass-through commands require explicit approval because "
1666
+ "they can materialize filesystem writes beyond explicit argv paths."
1667
+ ),
1668
+ )
1669
+
1670
+ path_targets: list[BashPathTarget] = []
1671
+ if archive_path:
1672
+ path_targets.append(BashPathTarget(archive_path, "write" if create_mode else "read"))
1673
+ if directory_path:
1674
+ path_targets.append(BashPathTarget(directory_path, "write"))
1675
+ if create_mode and archive_path:
1676
+ return self._path_info_factory.build(
1677
+ base_command=base_command,
1678
+ operation_type="write",
1679
+ path_targets=path_targets,
1680
+ analysis=analysis,
1681
+ )
1682
+ return self._path_info_factory.build(
1683
+ base_command=base_command,
1684
+ operation_type="write",
1685
+ path_targets=path_targets,
1686
+ analysis=analysis,
1687
+ requires_manual_approval=True,
1688
+ manual_approval_reason=(
1689
+ "cpio command requires explicit approval when its archive target "
1690
+ "or write behavior cannot be determined safely."
1691
+ ),
1692
+ )
1693
+
1694
+ @staticmethod
1695
+ def _extract_attached_short_option_value(arg: str, option_char: str) -> str | None:
1696
+ if not arg.startswith("-") or arg.startswith("--"):
1697
+ return None
1698
+ option_index = arg.find(option_char)
1699
+ if option_index < 0:
1700
+ return None
1701
+ attached_value = arg[option_index + 1:]
1702
+ return attached_value or None
1703
+
1704
+ @staticmethod
1705
+ def _derive_single_file_archive_output(
1706
+ *,
1707
+ input_path: str,
1708
+ suffix: str,
1709
+ decompress_mode: bool,
1710
+ ) -> str | None:
1711
+ if decompress_mode:
1712
+ if not input_path.endswith(suffix):
1713
+ return None
1714
+ return input_path[: -len(suffix)]
1715
+ return f"{input_path}{suffix}"
1716
+
1717
+
1718
+ class BashPathExtractorRegistry:
1719
+ """
1720
+ 命令 -> 路径提取规则注册表。
1721
+
1722
+ 对照 CC PATH_EXTRACTORS,但先聚焦高频/高风险命令。
1723
+ """
1724
+
1725
+ _READ_COMMANDS = {
1726
+ "cd", "ls", "find", "cat", "head", "tail", "sort", "uniq", "wc",
1727
+ "cut", "paste", "column", "tr", "file", "stat", "diff", "awk",
1728
+ "strings", "hexdump", "od", "base64", "nl", "grep", "rg", "git",
1729
+ "jq", "sha256sum", "sha1sum", "md5sum", "zipinfo", "zipnote",
1730
+ }
1731
+ _CREATE_COMMANDS = {"mkdir", "touch", "mktemp"}
1732
+ _WRITE_COMMANDS = {
1733
+ "rm", "rmdir", "mv", "cp", "sed", "tee", "install", "ln", "tar", "dd", "rsync", "cpio",
1734
+ "zip", "unzip", "7z", "7za", "7zr", "gzip", "gunzip", "xz", "unxz", "bzip2",
1735
+ "bunzip2", "zstd", "unzstd", "lz4", "jar", "bsdtar", "rar", "unrar",
1736
+ "scp", "sftp", "rclone",
1737
+ }
1738
+
1739
+ def __init__(
1740
+ self,
1741
+ tokenizer: BashArgumentTokenizer | None = None,
1742
+ wrapper_stripper: BashWrapperStripper | None = None,
1743
+ ) -> None:
1744
+ self._tokenizer = tokenizer or BashArgumentTokenizer()
1745
+ self._wrapper_stripper = wrapper_stripper or BashWrapperStripper()
1746
+ self._path_info_factory = BashCommandPathInfoFactory()
1747
+ self._remote_classifier = BashRemotePathClassifier()
1748
+ self._special_extractor = BashSpecialPathExtractor(
1749
+ tokenizer=self._tokenizer,
1750
+ path_info_factory=self._path_info_factory,
1751
+ remote_classifier=self._remote_classifier,
1752
+ )
1753
+
1754
+ def extract(self, command_text: str, analysis: BashAstAnalysis) -> BashCommandPathInfo | None:
1755
+ normalized = analysis.normalized_base_commands
1756
+ if not normalized:
1757
+ return self._extract_redirection_only(analysis)
1758
+
1759
+ base_command = normalized[0]
1760
+ tokens = self._get_effective_argv(command_text, analysis)
1761
+ if not tokens:
1762
+ return self._extract_redirection_only(analysis)
1763
+
1764
+ args = tokens[1:]
1765
+ if base_command in self._READ_COMMANDS:
1766
+ return self._extract_read_command(base_command, args, analysis)
1767
+ if base_command in self._CREATE_COMMANDS:
1768
+ if base_command == "mktemp":
1769
+ return self._special_extractor.extract_mktemp(base_command, args, analysis)
1770
+ return self._extract_simple_command(base_command, args, "create", analysis)
1771
+ if base_command in self._WRITE_COMMANDS:
1772
+ return self._extract_write_command(base_command, args, analysis)
1773
+
1774
+ return self._extract_redirection_only(analysis)
1775
+
1776
+ def _get_effective_argv(self, command_text: str, analysis: BashAstAnalysis) -> list[str]:
1777
+ if len(analysis.simple_commands) == 1:
1778
+ simple = analysis.simple_commands[0]
1779
+ if simple.text == command_text.strip() and simple.argv:
1780
+ stripped = self._wrapper_stripper.strip(list(simple.argv))
1781
+ if stripped:
1782
+ return stripped
1783
+ return self._wrapper_stripper.strip(self._tokenizer.split(command_text))
1784
+
1785
+ def _extract_redirection_only(self, analysis: BashAstAnalysis) -> BashCommandPathInfo | None:
1786
+ if not analysis.output_redirection_paths:
1787
+ return None
1788
+ return self._path_info_factory.build(
1789
+ base_command="<redirect>",
1790
+ operation_type="write",
1791
+ path_targets=[],
1792
+ analysis=analysis,
1793
+ )
1794
+
1795
+ def _extract_simple_command(
1796
+ self,
1797
+ base_command: str,
1798
+ args: list[str],
1799
+ operation_type: OperationType,
1800
+ analysis: BashAstAnalysis,
1801
+ ) -> BashCommandPathInfo:
1802
+ paths = self._tokenizer.filter_out_flags(args)
1803
+ path_targets = [BashPathTarget(path, operation_type) for path in paths]
1804
+ return self._path_info_factory.build(
1805
+ base_command=base_command,
1806
+ operation_type=operation_type,
1807
+ path_targets=path_targets,
1808
+ analysis=analysis,
1809
+ )
1810
+
1811
+ def _extract_write_command(
1812
+ self,
1813
+ base_command: str,
1814
+ args: list[str],
1815
+ analysis: BashAstAnalysis,
1816
+ ) -> BashCommandPathInfo:
1817
+ if base_command in {"mv", "cp"} and any(arg.startswith("-") for arg in args):
1818
+ return BashCommandPathInfo(
1819
+ base_command=base_command,
1820
+ operation_type="write",
1821
+ path_targets=(),
1822
+ requires_manual_approval=True,
1823
+ manual_approval_reason=(
1824
+ f"{base_command} with flags requires manual approval to ensure path safety."
1825
+ ),
1826
+ )
1827
+
1828
+ if base_command == "sed":
1829
+ paths = self._extract_sed_paths(args)
1830
+ path_targets = [BashPathTarget(path, "write") for path in paths]
1831
+ return self._build_path_info(base_command, "write", path_targets, analysis)
1832
+ if base_command == "tee":
1833
+ return self._special_extractor.extract_tee(base_command, args, analysis)
1834
+ if base_command == "install":
1835
+ return self._special_extractor.extract_install(base_command, args, analysis)
1836
+ if base_command == "ln":
1837
+ return self._special_extractor.extract_ln(base_command, args, analysis)
1838
+ if base_command == "tar":
1839
+ return self._special_extractor.extract_tar(base_command, args, analysis)
1840
+ if base_command == "bsdtar":
1841
+ return self._special_extractor.extract_tar(base_command, args, analysis)
1842
+ if base_command == "dd":
1843
+ return self._special_extractor.extract_dd(base_command, args, analysis)
1844
+ if base_command == "rsync":
1845
+ return self._special_extractor.extract_rsync(base_command, args, analysis)
1846
+ if base_command == "zip":
1847
+ return self._special_extractor.extract_zip(base_command, args, analysis)
1848
+ if base_command == "unzip":
1849
+ return self._special_extractor.extract_unzip(base_command, args, analysis)
1850
+ if base_command in {"7z", "7za", "7zr"}:
1851
+ return self._special_extractor.extract_7z_family(base_command, args, analysis)
1852
+ if base_command in {"gzip", "gunzip", "xz", "unxz", "bzip2", "bunzip2", "zstd", "unzstd", "lz4"}:
1853
+ return self._special_extractor.extract_single_file_archive(base_command, args, analysis)
1854
+ if base_command == "jar":
1855
+ return self._special_extractor.extract_jar(base_command, args, analysis)
1856
+ if base_command in {"rar", "unrar"}:
1857
+ return self._special_extractor.extract_rar_family(base_command, args, analysis)
1858
+ if base_command in {"scp"}:
1859
+ return self._special_extractor.extract_scp(base_command, args, analysis)
1860
+ if base_command in {"sftp"}:
1861
+ return self._special_extractor.extract_sftp(base_command, args, analysis)
1862
+ if base_command == "rclone":
1863
+ return self._special_extractor.extract_rclone(base_command, args, analysis)
1864
+ if base_command == "cpio":
1865
+ return self._special_extractor.extract_cpio(base_command, args, analysis)
1866
+ else:
1867
+ paths = self._tokenizer.filter_out_flags(args)
1868
+ path_targets = [BashPathTarget(path, "write") for path in paths]
1869
+ return self._build_path_info(base_command, "write", path_targets, analysis)
1870
+
1871
+ def _extract_read_command(
1872
+ self,
1873
+ base_command: str,
1874
+ args: list[str],
1875
+ analysis: BashAstAnalysis,
1876
+ ) -> BashCommandPathInfo:
1877
+ if base_command in {"zipinfo", "zipnote"}:
1878
+ return self._special_extractor.extract_zip_metadata(base_command, args, analysis)
1879
+ extracted_paths: list[str]
1880
+ if base_command == "cd":
1881
+ extracted_paths = ["~"] if not args else [" ".join(args)]
1882
+ elif base_command == "ls":
1883
+ extracted_paths = self._tokenizer.filter_out_flags(args) or ["."]
1884
+ elif base_command == "find":
1885
+ extracted_paths = self._extract_find_paths(args)
1886
+ elif base_command == "grep":
1887
+ paths = self._tokenizer.parse_pattern_command(
1888
+ args,
1889
+ flags_with_args={
1890
+ "-e", "--regexp", "-f", "--file", "--exclude", "--include",
1891
+ "--exclude-dir", "--include-dir", "-m", "--max-count",
1892
+ "-A", "--after-context", "-B", "--before-context", "-C", "--context",
1893
+ },
1894
+ )
1895
+ if not paths and any(flag in {"-r", "-R", "--recursive"} for flag in args):
1896
+ paths = ["."]
1897
+ extracted_paths = paths
1898
+ elif base_command == "rg":
1899
+ extracted_paths = self._tokenizer.parse_pattern_command(
1900
+ args,
1901
+ flags_with_args={
1902
+ "-e", "--regexp", "-f", "--file", "-t", "--type",
1903
+ "-T", "--type-not", "-g", "--glob", "-m", "--max-count",
1904
+ "--max-depth", "-r", "--replace", "-A", "--after-context",
1905
+ "-B", "--before-context", "-C", "--context",
1906
+ },
1907
+ defaults=["."],
1908
+ )
1909
+ elif base_command == "jq":
1910
+ extracted_paths = self._extract_jq_paths(args)
1911
+ elif base_command == "git":
1912
+ extracted_paths = self._extract_git_paths(args)
1913
+ elif base_command == "tr":
1914
+ extracted_paths = self._extract_tr_paths(args)
1915
+ elif base_command == "awk":
1916
+ extracted_paths = self._extract_awk_paths(args)
1917
+ else:
1918
+ extracted_paths = self._tokenizer.filter_out_flags(args)
1919
+
1920
+ path_targets = [BashPathTarget(path, "read") for path in extracted_paths]
1921
+ return self._path_info_factory.build(
1922
+ base_command=base_command,
1923
+ operation_type="read",
1924
+ path_targets=path_targets,
1925
+ analysis=analysis,
1926
+ )
1927
+
1928
+ def _build_path_info(
1929
+ self,
1930
+ base_command: str,
1931
+ operation_type: OperationType,
1932
+ path_targets: list[BashPathTarget],
1933
+ analysis: BashAstAnalysis,
1934
+ *,
1935
+ requires_manual_approval: bool = False,
1936
+ manual_approval_reason: str | None = None,
1937
+ ) -> BashCommandPathInfo:
1938
+ return self._path_info_factory.build(
1939
+ base_command=base_command,
1940
+ operation_type=operation_type,
1941
+ path_targets=path_targets,
1942
+ analysis=analysis,
1943
+ requires_manual_approval=requires_manual_approval,
1944
+ manual_approval_reason=manual_approval_reason,
1945
+ )
1946
+
1947
+ def _extract_find_paths(self, args: list[str]) -> list[str]:
1948
+ paths: list[str] = []
1949
+ path_flags = {
1950
+ "-newer", "-anewer", "-cnewer", "-mnewer", "-samefile",
1951
+ "-path", "-wholename", "-ilname", "-lname", "-ipath", "-iwholename",
1952
+ }
1953
+ found_non_global_flag = False
1954
+ after_double_dash = False
1955
+
1956
+ i = 0
1957
+ while i < len(args):
1958
+ arg = args[i]
1959
+ if not arg:
1960
+ i += 1
1961
+ continue
1962
+ if after_double_dash:
1963
+ paths.append(arg)
1964
+ i += 1
1965
+ continue
1966
+ if arg == "--":
1967
+ after_double_dash = True
1968
+ i += 1
1969
+ continue
1970
+ if arg.startswith("-"):
1971
+ if arg in {"-H", "-L", "-P"}:
1972
+ i += 1
1973
+ continue
1974
+ found_non_global_flag = True
1975
+ if arg in path_flags and i + 1 < len(args):
1976
+ paths.append(args[i + 1])
1977
+ i += 2
1978
+ continue
1979
+ i += 1
1980
+ continue
1981
+ if not found_non_global_flag:
1982
+ paths.append(arg)
1983
+ i += 1
1984
+
1985
+ return paths or ["."]
1986
+
1987
+ def _extract_jq_paths(self, args: list[str]) -> list[str]:
1988
+ paths: list[str] = []
1989
+ flags_with_args = {
1990
+ "-e", "--expression", "-f", "--from-file", "--arg", "--argjson",
1991
+ "--slurpfile", "--rawfile", "--args", "--jsonargs", "-L",
1992
+ "--library-path", "--indent", "--tab",
1993
+ }
1994
+ filter_found = False
1995
+ after_double_dash = False
1996
+ i = 0
1997
+ while i < len(args):
1998
+ arg = args[i]
1999
+ if arg is None:
2000
+ i += 1
2001
+ continue
2002
+ if not after_double_dash and arg == "--":
2003
+ after_double_dash = True
2004
+ i += 1
2005
+ continue
2006
+ if not after_double_dash and arg.startswith("-"):
2007
+ flag = arg.split("=")[0]
2008
+ if flag in {"-e", "--expression"}:
2009
+ filter_found = True
2010
+ if flag in flags_with_args and "=" not in arg and i + 1 < len(args):
2011
+ i += 2
2012
+ continue
2013
+ i += 1
2014
+ continue
2015
+ if not filter_found:
2016
+ filter_found = True
2017
+ else:
2018
+ paths.append(arg)
2019
+ i += 1
2020
+ return paths
2021
+
2022
+ def _extract_git_paths(self, args: list[str]) -> list[str]:
2023
+ if len(args) >= 1 and args[0] == "diff" and "--no-index" in args:
2024
+ file_paths = self._tokenizer.filter_out_flags(args[1:])
2025
+ return file_paths[:2]
2026
+ return []
2027
+
2028
+ def _extract_awk_paths(self, args: list[str]) -> list[str]:
2029
+ paths: list[str] = []
2030
+ after_double_dash = False
2031
+ script_consumed = False
2032
+ i = 0
2033
+ while i < len(args):
2034
+ arg = args[i]
2035
+ if not arg:
2036
+ i += 1
2037
+ continue
2038
+ if not after_double_dash and arg == "--":
2039
+ after_double_dash = True
2040
+ i += 1
2041
+ continue
2042
+ if not after_double_dash and arg.startswith("-"):
2043
+ if arg in {"-f", "--file"} and i + 1 < len(args):
2044
+ paths.append(args[i + 1]) # awk 脚本文件本身需要校验
2045
+ script_consumed = True
2046
+ i += 2
2047
+ continue
2048
+ if arg in {"-v", "-F"} and i + 1 < len(args):
2049
+ i += 2
2050
+ continue
2051
+ i += 1
2052
+ continue
2053
+ if not script_consumed:
2054
+ script_consumed = True
2055
+ i += 1
2056
+ continue
2057
+ paths.append(arg)
2058
+ i += 1
2059
+ return paths
2060
+
2061
+ def _extract_tr_paths(self, args: list[str]) -> list[str]:
2062
+ has_delete = any(
2063
+ arg == "-d" or arg == "--delete" or (arg.startswith("-") and "d" in arg)
2064
+ for arg in args
2065
+ )
2066
+ non_flags = self._tokenizer.filter_out_flags(args)
2067
+ return non_flags[1:] if has_delete else non_flags[2:]
2068
+
2069
+ def _extract_sed_paths(self, args: list[str]) -> list[str]:
2070
+ paths: list[str] = []
2071
+ skip_next = False
2072
+ script_found = False
2073
+ after_double_dash = False
2074
+ capture_next_as_script_file = False
2075
+
2076
+ for arg in args:
2077
+ if skip_next:
2078
+ if capture_next_as_script_file:
2079
+ paths.append(arg)
2080
+ capture_next_as_script_file = False
2081
+ skip_next = False
2082
+ continue
2083
+ if not arg:
2084
+ continue
2085
+ if not after_double_dash and arg == "--":
2086
+ after_double_dash = True
2087
+ continue
2088
+ if not after_double_dash and arg.startswith("-"):
2089
+ if arg in {"-f", "--file"}:
2090
+ skip_next = True
2091
+ script_found = True
2092
+ capture_next_as_script_file = True
2093
+ elif arg in {"-e", "--expression"}:
2094
+ skip_next = True
2095
+ script_found = True
2096
+ elif "e" in arg or "f" in arg:
2097
+ script_found = True
2098
+ continue
2099
+ if not script_found:
2100
+ script_found = True
2101
+ continue
2102
+ paths.append(arg)
2103
+
2104
+ return paths
2105
+
2106
+
2107
+ class BashPathSyntaxGuard:
2108
+ """识别无法安全静态解析的路径语法。"""
2109
+
2110
+ _PERCENT_ENV_RE = re.compile(r"[A-Za-z_][A-Za-z0-9_]*")
2111
+
2112
+ def get_manual_approval_reason(self, path_info: BashCommandPathInfo) -> str | None:
2113
+ for target in path_info.path_targets:
2114
+ if self._contains_shell_expansion(target.raw_path):
2115
+ if target.target_source == "redirection":
2116
+ return (
2117
+ "Shell expansion syntax in redirection targets requires explicit approval "
2118
+ "because the write path cannot be determined safely."
2119
+ )
2120
+ return (
2121
+ "Shell expansion syntax in filesystem path arguments requires explicit approval "
2122
+ "because the target path cannot be determined safely."
2123
+ )
2124
+ return None
2125
+
2126
+ def _contains_shell_expansion(self, raw_path: str) -> bool:
2127
+ stripped = raw_path.strip()
2128
+ if not stripped:
2129
+ return False
2130
+
2131
+ scanner = BashPathLexicalScanner(stripped)
2132
+ if self._contains_parameter_like_expansion(scanner):
2133
+ return True
2134
+ if self._contains_unquoted_dynamic_tilde(scanner):
2135
+ return True
2136
+ if self._contains_unquoted_extglob(scanner):
2137
+ return True
2138
+ if self._contains_unquoted_glob(scanner):
2139
+ return True
2140
+ if self._contains_unquoted_brace_expansion(scanner):
2141
+ return True
2142
+ return False
2143
+
2144
+ def _contains_parameter_like_expansion(self, scanner: "BashPathLexicalScanner") -> bool:
2145
+ for index, char in enumerate(scanner.text):
2146
+ if scanner.is_single(index) or scanner.is_escaped(index):
2147
+ continue
2148
+ if char == "`":
2149
+ return True
2150
+ if char == "$":
2151
+ next_char = scanner.text[index + 1] if index + 1 < len(scanner.text) else ""
2152
+ if next_char in {"(", "{"}:
2153
+ return True
2154
+ if next_char.isalpha() or next_char == "_":
2155
+ return True
2156
+ if char == "%" and not scanner.is_double(index):
2157
+ end_index = scanner.text.find("%", index + 1)
2158
+ if end_index > index + 1:
2159
+ if any(
2160
+ scanner.is_single(inner_idx) or scanner.is_double(inner_idx)
2161
+ for inner_idx in range(index + 1, end_index)
2162
+ ):
2163
+ continue
2164
+ candidate = scanner.text[index + 1:end_index]
2165
+ if self._PERCENT_ENV_RE.fullmatch(candidate):
2166
+ return True
2167
+ return False
2168
+
2169
+ def _contains_unquoted_dynamic_tilde(self, scanner: "BashPathLexicalScanner") -> bool:
2170
+ if (
2171
+ not scanner.text
2172
+ or not scanner.is_unquoted(0)
2173
+ or scanner.is_escaped(0)
2174
+ or scanner.text[0] != "~"
2175
+ ):
2176
+ return False
2177
+ next_char = scanner.text[1] if len(scanner.text) > 1 else ""
2178
+ if next_char in {"+", "-"}:
2179
+ if len(scanner.text) == 2:
2180
+ return True
2181
+ tail = scanner.text[2:]
2182
+ if not tail:
2183
+ return True
2184
+ if tail[0].isdigit():
2185
+ digit_end = 0
2186
+ while digit_end < len(tail) and tail[digit_end].isdigit():
2187
+ digit_end += 1
2188
+ return digit_end == len(tail) or tail[digit_end] == "/"
2189
+ return tail[0] == "/"
2190
+ if next_char.isdigit():
2191
+ digit_end = 1
2192
+ while digit_end < len(scanner.text) and scanner.text[digit_end].isdigit():
2193
+ digit_end += 1
2194
+ return digit_end == len(scanner.text) or scanner.text[digit_end] == "/"
2195
+ return False
2196
+
2197
+ def _contains_unquoted_extglob(self, scanner: "BashPathLexicalScanner") -> bool:
2198
+ index = 0
2199
+ while index < len(scanner.text) - 1:
2200
+ if not scanner.is_unquoted(index) or scanner.is_escaped(index):
2201
+ index += 1
2202
+ continue
2203
+ if (
2204
+ scanner.text[index] in {"@", "!", "+", "?", "*"}
2205
+ and scanner.text[index + 1] == "("
2206
+ and scanner.is_unquoted(index + 1)
2207
+ and not scanner.is_escaped(index + 1)
2208
+ ):
2209
+ return True
2210
+ index += 1
2211
+ return False
2212
+
2213
+ def _contains_unquoted_glob(self, scanner: "BashPathLexicalScanner") -> bool:
2214
+ for index, char in enumerate(scanner.text):
2215
+ if not scanner.is_unquoted(index) or scanner.is_escaped(index):
2216
+ continue
2217
+ if char in {"*", "?"}:
2218
+ if index + 1 < len(scanner.text) and scanner.text[index + 1] == "(":
2219
+ continue
2220
+ return True
2221
+ if char == "[" and scanner.has_unquoted_closing_bracket(index + 1):
2222
+ return True
2223
+ return False
2224
+
2225
+ def _contains_unquoted_brace_expansion(self, scanner: "BashPathLexicalScanner") -> bool:
2226
+ index = 0
2227
+ while index < len(scanner.text):
2228
+ if (
2229
+ scanner.text[index] != "{"
2230
+ or not scanner.is_unquoted(index)
2231
+ or scanner.is_escaped(index)
2232
+ ):
2233
+ index += 1
2234
+ continue
2235
+ end_index = scanner.find_matching_unquoted_brace(index + 1)
2236
+ if end_index == -1:
2237
+ index += 1
2238
+ continue
2239
+ content = scanner.text[index + 1:end_index]
2240
+ if "," in content or ".." in content:
2241
+ return True
2242
+ index = end_index + 1
2243
+ return False
2244
+
2245
+
2246
+ class BashPathLexicalScanner:
2247
+ """按 shell quoting / escaping 规则扫描路径片段。"""
2248
+
2249
+ def __init__(self, text: str) -> None:
2250
+ self.text = text
2251
+ self._states: list[str] = []
2252
+ self._escaped: list[bool] = []
2253
+ self._scan()
2254
+
2255
+ def is_unquoted(self, index: int) -> bool:
2256
+ return self._states[index] == "unquoted"
2257
+
2258
+ def is_single(self, index: int) -> bool:
2259
+ return self._states[index] == "single"
2260
+
2261
+ def is_double(self, index: int) -> bool:
2262
+ return self._states[index] == "double"
2263
+
2264
+ def is_escaped(self, index: int) -> bool:
2265
+ return self._escaped[index]
2266
+
2267
+ def has_unquoted_closing_bracket(self, start_idx: int) -> bool:
2268
+ for index in range(start_idx, len(self.text)):
2269
+ if not self.is_unquoted(index) or self.is_escaped(index):
2270
+ continue
2271
+ if self.text[index] == "]":
2272
+ return True
2273
+ return False
2274
+
2275
+ def find_matching_unquoted_brace(self, start_idx: int) -> int:
2276
+ depth = 0
2277
+ for index in range(start_idx, len(self.text)):
2278
+ if not self.is_unquoted(index) or self.is_escaped(index):
2279
+ continue
2280
+ if self.text[index] == "{":
2281
+ depth += 1
2282
+ elif self.text[index] == "}":
2283
+ if depth == 0:
2284
+ return index
2285
+ depth -= 1
2286
+ return -1
2287
+
2288
+ def _scan(self) -> None:
2289
+ in_single = False
2290
+ in_double = False
2291
+ next_escaped = False
2292
+ for char in self.text:
2293
+ state = "double" if in_double else "single" if in_single else "unquoted"
2294
+ self._states.append(state)
2295
+ self._escaped.append(next_escaped)
2296
+
2297
+ if next_escaped:
2298
+ next_escaped = False
2299
+ continue
2300
+ if char == "\\" and not in_single:
2301
+ next_escaped = True
2302
+ continue
2303
+ if char == "'" and not in_double:
2304
+ in_single = not in_single
2305
+ continue
2306
+ if char == '"' and not in_single:
2307
+ in_double = not in_double
2308
+
2309
+
2310
+ class BashDangerousPathChecker:
2311
+ """危险删除路径检查。"""
2312
+
2313
+ _CRITICAL_PATHS = frozenset({
2314
+ "/", "/etc", "/usr", "/bin", "/sbin", "/var", "/home", "/root",
2315
+ "/boot", "/dev", "/proc", "/sys",
2316
+ })
2317
+
2318
+ def check(self, path_info: BashCommandPathInfo, cwd: str) -> AuthorizationDecision | None:
2319
+ if path_info.base_command not in {"rm", "rmdir"}:
2320
+ return None
2321
+
2322
+ for target in path_info.path_targets:
2323
+ resolved = self._resolve_path(target.raw_path, cwd)
2324
+ if resolved in self._CRITICAL_PATHS:
2325
+ return AuthorizationDecision(
2326
+ behavior="ask",
2327
+ message=(
2328
+ f"Dangerous {path_info.base_command} operation detected on critical path: "
2329
+ f"{resolved}"
2330
+ ),
2331
+ policy_id="dangerous_removal_path",
2332
+ ask_prompt=(
2333
+ f"Dangerous removal target detected: {resolved}\n"
2334
+ "This requires explicit approval."
2335
+ ),
2336
+ )
2337
+ return None
2338
+
2339
+ @staticmethod
2340
+ def _resolve_path(raw_path: str, cwd: str) -> str:
2341
+ expanded = os.path.expanduser(raw_path.strip("\"'"))
2342
+ absolute = expanded if os.path.isabs(expanded) else os.path.join(cwd, expanded)
2343
+ return os.path.realpath(absolute)
2344
+
2345
+
2346
+ class BashPathSecurityAnalyzer:
2347
+ """
2348
+ Bash 路径安全分析器。
2349
+
2350
+ 负责将“命令字符串 + AST 结构摘要”映射为路径授权决策。
2351
+ """
2352
+
2353
+ def __init__(
2354
+ self,
2355
+ extractor_registry: BashPathExtractorRegistry | None = None,
2356
+ dangerous_path_checker: BashDangerousPathChecker | None = None,
2357
+ path_syntax_guard: BashPathSyntaxGuard | None = None,
2358
+ ) -> None:
2359
+ self._extractor_registry = extractor_registry or BashPathExtractorRegistry()
2360
+ self._dangerous_path_checker = dangerous_path_checker or BashDangerousPathChecker()
2361
+ self._path_syntax_guard = path_syntax_guard or BashPathSyntaxGuard()
2362
+
2363
+ def authorize_segment(
2364
+ self,
2365
+ *,
2366
+ command: str,
2367
+ analysis: BashAstAnalysis,
2368
+ cwd: str,
2369
+ policy: object | None,
2370
+ compound_has_cd: bool,
2371
+ ) -> AuthorizationDecision:
2372
+ path_info = self._extractor_registry.extract(command, analysis)
2373
+ if path_info is None:
2374
+ return AuthorizationDecision(behavior="allow")
2375
+
2376
+ if compound_has_cd and path_info.operation_type != "read":
2377
+ reason = (
2378
+ "Commands that change directories and perform write operations require "
2379
+ "explicit approval to ensure paths are evaluated correctly."
2380
+ )
2381
+ return AuthorizationDecision(
2382
+ behavior="ask",
2383
+ message=reason,
2384
+ policy_id="compound_cd_write",
2385
+ ask_prompt=reason,
2386
+ )
2387
+
2388
+ syntax_reason = self._path_syntax_guard.get_manual_approval_reason(path_info)
2389
+ if syntax_reason is not None:
2390
+ return AuthorizationDecision(
2391
+ behavior="ask",
2392
+ message=syntax_reason,
2393
+ policy_id="bash_path_validator",
2394
+ ask_prompt=syntax_reason,
2395
+ )
2396
+
2397
+ dangerous_decision = self._dangerous_path_checker.check(path_info, cwd)
2398
+ if dangerous_decision is not None:
2399
+ return dangerous_decision
2400
+
2401
+ authorize_path = getattr(policy, "authorize_path", None)
2402
+ if authorize_path is None:
2403
+ return AuthorizationDecision(behavior="allow")
2404
+
2405
+ for target in path_info.path_targets:
2406
+ resolved_path = self._resolve_path(target.raw_path, cwd)
2407
+ decision = authorize_path(
2408
+ resolved_path,
2409
+ is_write=target.operation_type in {"write", "create"},
2410
+ )
2411
+ if decision.behavior != "allow":
2412
+ return decision
2413
+
2414
+ if path_info.requires_manual_approval:
2415
+ reason = path_info.manual_approval_reason or "Command requires manual approval."
2416
+ return AuthorizationDecision(
2417
+ behavior="ask",
2418
+ message=reason,
2419
+ policy_id="bash_path_validator",
2420
+ ask_prompt=reason,
2421
+ )
2422
+
2423
+ return AuthorizationDecision(behavior="allow")
2424
+
2425
+ @staticmethod
2426
+ def _resolve_path(raw_path: str, cwd: str) -> str:
2427
+ expanded = os.path.expanduser(raw_path.strip("\"'"))
2428
+ absolute = expanded if os.path.isabs(expanded) else os.path.join(cwd, expanded)
2429
+ return os.path.realpath(absolute)