decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
utils/reset_helpers.py ADDED
@@ -0,0 +1,318 @@
1
+ import asyncio
2
+ import subprocess
3
+ import urllib.request
4
+ import urllib.error
5
+ from pathlib import Path
6
+ from typing import Dict, Any, Optional
7
+
8
+ from .template_helpers import render_template
9
+
10
+
11
+ # Check if sudo is needed for docker commands
12
+ _USE_SUDO = False
13
+
14
+
15
+ async def reset_via_endpoints(
16
+ env_name: str,
17
+ ports: Dict[str, int],
18
+ env_config: Dict[str, Any],
19
+ timeout: int = 30,
20
+ max_retries: int = 10,
21
+ retry_delay: float = 2.0,
22
+ ) -> None:
23
+ """
24
+ Reset an environment via HTTP API endpoints.
25
+
26
+ Args:
27
+ env_name: Name of the environment (e.g., "salesforce", "gmail")
28
+ ports: Dict mapping port variable names to allocated port numbers
29
+ env_config: Full env.yaml config dict
30
+ timeout: HTTP request timeout in seconds
31
+ max_retries: Maximum number of retry attempts per endpoint
32
+ retry_delay: Delay between retries in seconds
33
+
34
+ Raises:
35
+ RuntimeError: If any endpoint call fails after all retries
36
+ """
37
+ environments = env_config.get("environments", {})
38
+ env_def = environments.get(env_name, {})
39
+ reset_endpoints = env_def.get("reset_endpoints", {})
40
+
41
+ if not reset_endpoints:
42
+ raise RuntimeError(f"No reset endpoints configured for {env_name}")
43
+
44
+ print(f"[RESET] Resetting {env_name} via API endpoints", flush=True)
45
+
46
+ for endpoint_name, endpoint_config in reset_endpoints.items():
47
+ url_template = endpoint_config.get("url", "")
48
+ method = endpoint_config.get("method", "POST").upper()
49
+
50
+ # Resolve ${VAR_NAME} placeholders using ports
51
+ url = render_template(url_template, ports)
52
+
53
+ # Check if URL still has unresolved placeholders
54
+ if "${" in url:
55
+ print(f"[RESET] Skipping {endpoint_name}: unresolved placeholders in URL", flush=True)
56
+ continue
57
+
58
+ # Make the HTTP request with retries
59
+ last_error = None
60
+
61
+ for attempt in range(1, max_retries + 1):
62
+ try:
63
+ req = urllib.request.Request(url, method=method, data=b"")
64
+ req.add_header("Content-Type", "application/json")
65
+
66
+ with urllib.request.urlopen(req, timeout=timeout) as response:
67
+ status = response.status
68
+ if 200 <= status < 300:
69
+ print(f"[RESET] {env_name}/{endpoint_name}: OK (HTTP {status})", flush=True)
70
+ break
71
+ else:
72
+ last_error = RuntimeError(f"Reset endpoint '{endpoint_name}' returned HTTP {status}")
73
+ if attempt < max_retries:
74
+ print(f"[RESET] {endpoint_name} returned HTTP {status}, retrying ({attempt}/{max_retries})...", flush=True)
75
+ await asyncio.sleep(retry_delay)
76
+ else:
77
+ raise last_error
78
+ except urllib.error.HTTPError as e:
79
+ last_error = RuntimeError(f"Reset endpoint '{endpoint_name}' failed: HTTP {e.code}")
80
+ if attempt < max_retries:
81
+ print(f"[RESET] {endpoint_name} failed with HTTP {e.code}, retrying ({attempt}/{max_retries})...", flush=True)
82
+ await asyncio.sleep(retry_delay)
83
+ else:
84
+ raise last_error
85
+ except urllib.error.URLError as e:
86
+ last_error = RuntimeError(f"Reset endpoint '{endpoint_name}' failed: {e.reason}")
87
+ if attempt < max_retries:
88
+ print(f"[RESET] {endpoint_name} failed: {e.reason}, retrying ({attempt}/{max_retries})...", flush=True)
89
+ await asyncio.sleep(retry_delay)
90
+ else:
91
+ raise last_error
92
+
93
+
94
+ async def reset_via_scripts(
95
+ env_name: str,
96
+ project_name: str,
97
+ compose_file: Path,
98
+ env_config: Dict[str, Any],
99
+ timeout: int = 30,
100
+ ) -> None:
101
+ """
102
+ Reset an environment via docker exec scripts.
103
+
104
+ Args:
105
+ env_name: Name of the environment (e.g., "salesforce", "gmail")
106
+ project_name: Docker compose project name
107
+ compose_file: Path to docker-compose.yml file
108
+ env_config: Full env.yaml config dict
109
+ timeout: Script execution timeout in seconds
110
+
111
+ Raises:
112
+ RuntimeError: If any script execution fails
113
+ """
114
+ environments = env_config.get("environments", {})
115
+ env_def = environments.get(env_name, {})
116
+ reset_scripts = env_def.get("reset_scripts", {})
117
+
118
+ if not reset_scripts:
119
+ raise RuntimeError(f"No reset scripts configured for {env_name}")
120
+
121
+ print(f"[RESET] Resetting {env_name} via scripts", flush=True)
122
+
123
+ for service, script_path in reset_scripts.items():
124
+ cmd = [
125
+ "docker", "compose", "-p", project_name,
126
+ "-f", str(compose_file), "exec", "-T", service,
127
+ "/bin/sh", "-c", script_path
128
+ ]
129
+ if _USE_SUDO:
130
+ cmd = ["sudo"] + cmd
131
+
132
+ proc = await asyncio.create_subprocess_exec(
133
+ *cmd,
134
+ cwd=str(compose_file.parent),
135
+ stdout=asyncio.subprocess.PIPE,
136
+ stderr=asyncio.subprocess.PIPE,
137
+ )
138
+ try:
139
+ stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
140
+ except asyncio.TimeoutError:
141
+ raise RuntimeError(f"Reset script for {service} timed out")
142
+
143
+ if proc.returncode != 0:
144
+ raise RuntimeError(f"Reset script for {service} failed: {stderr.decode()}")
145
+
146
+ print(f"[RESET] {env_name}/{service}: OK (via script)", flush=True)
147
+
148
+
149
+ async def reset_environment(
150
+ env_name: str,
151
+ ports: Dict[str, int],
152
+ env_config: Dict[str, Any],
153
+ project_name: Optional[str] = None,
154
+ compose_file: Optional[Path] = None,
155
+ endpoint_timeout: int = 30,
156
+ script_timeout: int = 30,
157
+ max_retries: int = 10,
158
+ ) -> None:
159
+ """
160
+ Reset an environment's data state.
161
+
162
+ Priority order:
163
+ 1. API endpoints (reset_endpoints) - preferred, safer
164
+ 2. Docker exec scripts (reset_scripts) - fallback if endpoints fail
165
+
166
+ Args:
167
+ env_name: Name of the environment (e.g., "salesforce", "gmail")
168
+ ports: Dict mapping port variable names to allocated port numbers
169
+ env_config: Full env.yaml config dict
170
+ project_name: Docker compose project name (required for script fallback)
171
+ compose_file: Path to docker-compose.yml (required for script fallback)
172
+ endpoint_timeout: HTTP request timeout in seconds
173
+ script_timeout: Script execution timeout in seconds
174
+ max_retries: Maximum retry attempts for endpoints
175
+
176
+ Raises:
177
+ RuntimeError: If reset fails (both endpoints and scripts failed)
178
+ """
179
+ environments = env_config.get("environments", {})
180
+ env_def = environments.get(env_name, {})
181
+ reset_endpoints = env_def.get("reset_endpoints", {})
182
+ reset_scripts = env_def.get("reset_scripts", {})
183
+
184
+ if not reset_endpoints and not reset_scripts:
185
+ return # No reset configured, skip silently
186
+
187
+ # Try endpoints first (prioritized)
188
+ if reset_endpoints:
189
+ try:
190
+ await reset_via_endpoints(
191
+ env_name=env_name,
192
+ ports=ports,
193
+ env_config=env_config,
194
+ timeout=endpoint_timeout,
195
+ max_retries=max_retries,
196
+ )
197
+ return
198
+ except RuntimeError as e:
199
+ # If endpoints failed and we have scripts, fall back to scripts
200
+ if reset_scripts and project_name and compose_file:
201
+ print(f"[RESET] Endpoint reset failed ({e}), falling back to scripts", flush=True)
202
+ elif not reset_scripts:
203
+ raise
204
+ else:
205
+ # Scripts configured but no project_name/compose_file provided
206
+ print(f"[RESET] Endpoint reset failed, scripts available but project info not provided", flush=True)
207
+ raise
208
+
209
+ # Use scripts (either as primary or fallback)
210
+ if reset_scripts:
211
+ if not project_name or not compose_file:
212
+ raise RuntimeError(f"Reset scripts configured for {env_name} but project_name/compose_file not provided")
213
+ await reset_via_scripts(
214
+ env_name=env_name,
215
+ project_name=project_name,
216
+ compose_file=compose_file,
217
+ env_config=env_config,
218
+ timeout=script_timeout,
219
+ )
220
+
221
+
222
+ async def reset_all_environments(
223
+ ports: Dict[str, int],
224
+ env_config: Dict[str, Any],
225
+ project_name_func: Optional[callable] = None,
226
+ compose_file_func: Optional[callable] = None,
227
+ endpoint_timeout: int = 5,
228
+ max_retries: int = 3,
229
+ ) -> int:
230
+ """
231
+ Reset all environments that have allocated ports.
232
+
233
+ This function identifies which environments have ports allocated and resets them.
234
+
235
+ Args:
236
+ ports: Dict mapping port variable names to allocated port numbers
237
+ (e.g., {"SALESFORCE_API_PORT": 8080, "GMAIL_AUTH_PORT": 8030})
238
+ env_config: Full env.yaml config dict
239
+ project_name_func: Optional callable(env_name) -> project_name for script fallback
240
+ compose_file_func: Optional callable(env_name) -> compose_file path for script fallback
241
+ endpoint_timeout: HTTP request timeout in seconds (shorter for batch reset)
242
+ max_retries: Maximum retry attempts (fewer for batch reset)
243
+
244
+ Returns:
245
+ Number of environments successfully reset
246
+ """
247
+ environments = env_config.get("environments", {})
248
+ reset_count = 0
249
+
250
+ # Build a mapping of port vars to environment names
251
+ port_to_env: Dict[str, str] = {}
252
+ for env_name, env_def in environments.items():
253
+ env_ports = env_def.get("ports", {})
254
+ for port_var in env_ports.keys():
255
+ port_to_env[port_var] = env_name
256
+
257
+ # Find which environments have allocated ports
258
+ envs_to_reset = set()
259
+ for port_var in ports.keys():
260
+ if port_var in port_to_env:
261
+ envs_to_reset.add(port_to_env[port_var])
262
+
263
+ if not envs_to_reset:
264
+ return 0
265
+
266
+ # Reset each environment
267
+ for env_name in envs_to_reset:
268
+ env_def = environments.get(env_name, {})
269
+ reset_endpoints = env_def.get("reset_endpoints", {})
270
+ reset_scripts = env_def.get("reset_scripts", {})
271
+
272
+ if not reset_endpoints and not reset_scripts:
273
+ continue
274
+
275
+ # Get project info for script fallback if functions provided
276
+ project_name = project_name_func(env_name) if project_name_func else None
277
+ compose_file = compose_file_func(env_name) if compose_file_func else None
278
+
279
+ try:
280
+ await reset_environment(
281
+ env_name=env_name,
282
+ ports=ports,
283
+ env_config=env_config,
284
+ project_name=project_name,
285
+ compose_file=compose_file,
286
+ endpoint_timeout=endpoint_timeout,
287
+ max_retries=max_retries,
288
+ )
289
+ reset_count += 1
290
+ except Exception as e:
291
+ print(f"[RESET] Failed to reset {env_name}: {e}", flush=True)
292
+
293
+ return reset_count
294
+
295
+
296
+ def reset_all_environments_sync(
297
+ ports: Dict[str, int],
298
+ env_config: Dict[str, Any],
299
+ project_name_func: Optional[callable] = None,
300
+ compose_file_func: Optional[callable] = None,
301
+ endpoint_timeout: int = 5,
302
+ max_retries: int = 3,
303
+ ) -> int:
304
+ """
305
+ Synchronous wrapper for reset_all_environments.
306
+
307
+ Useful for calling from synchronous code (like query_victim.exec).
308
+ """
309
+ return asyncio.get_event_loop().run_until_complete(
310
+ reset_all_environments(
311
+ ports=ports,
312
+ env_config=env_config,
313
+ project_name_func=project_name_func,
314
+ compose_file_func=compose_file_func,
315
+ endpoint_timeout=endpoint_timeout,
316
+ max_retries=max_retries,
317
+ )
318
+ )
@@ -0,0 +1,370 @@
1
+ import grp
2
+ import os
3
+ import socket
4
+ import subprocess
5
+ import threading
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+ from typing import Dict, List, Optional, Set
9
+ import random
10
+
11
+
12
+ DEFAULT_PORT_START = 8000
13
+ DEFAULT_PORT_END = 20000
14
+
15
+
16
+ def _needs_sudo_for_docker() -> bool:
17
+ """Check if we need sudo to run docker commands."""
18
+ # First, try running docker directly
19
+ try:
20
+ result = subprocess.run(
21
+ ["docker", "ps"],
22
+ capture_output=True,
23
+ timeout=5
24
+ )
25
+ if result.returncode == 0:
26
+ return False
27
+ except (subprocess.SubprocessError, FileNotFoundError):
28
+ pass
29
+
30
+ # Fallback to group check
31
+ try:
32
+ docker_gid = grp.getgrnam("docker").gr_gid
33
+ if docker_gid in os.getgroups():
34
+ return False
35
+ except (KeyError, OSError):
36
+ pass
37
+ return True
38
+
39
+
40
+ @dataclass
41
+ class TaskResources:
42
+ """Resources allocated for a single task."""
43
+ task_id: str
44
+ # Port allocations: var_name -> port
45
+ ports: Dict[str, int] = field(default_factory=dict)
46
+ # Docker compose projects started for this task
47
+ docker_projects: List[str] = field(default_factory=list)
48
+ # Docker compose file paths for teardown
49
+ compose_files: Dict[str, Path] = field(default_factory=dict)
50
+
51
+
52
+ class ResourceManager:
53
+ """
54
+ Singleton in-process manager for ports and Docker resources.
55
+
56
+ Thread-safe for concurrent access from parallel tasks.
57
+ """
58
+
59
+ _instance: Optional["ResourceManager"] = None
60
+ _lock = threading.Lock()
61
+
62
+ def __init__(self):
63
+ self._tasks: Dict[str, TaskResources] = {}
64
+ self._used_ports: Set[int] = set()
65
+ self._port_range = self._get_port_range()
66
+ self._mutex = threading.Lock()
67
+
68
+ @classmethod
69
+ def instance(cls) -> "ResourceManager":
70
+ """Get or create the singleton instance."""
71
+ if cls._instance is None:
72
+ with cls._lock:
73
+ if cls._instance is None:
74
+ cls._instance = cls()
75
+ return cls._instance
76
+
77
+ @classmethod
78
+ def reset(cls) -> None:
79
+ """Reset the singleton (for testing)."""
80
+ with cls._lock:
81
+ if cls._instance is not None:
82
+ # Cleanup all tasks before reset
83
+ for task_id in list(cls._instance._tasks.keys()):
84
+ try:
85
+ cls._instance.cleanup_task(task_id)
86
+ except Exception:
87
+ pass
88
+ cls._instance = None
89
+
90
+ def _get_port_range(self) -> tuple:
91
+ """Get port range from environment or use defaults."""
92
+ env_range = os.getenv("DT_PORT_RANGE")
93
+ if env_range:
94
+ try:
95
+ start_str, end_str = env_range.split("-", 1)
96
+ return (int(start_str.strip()), int(end_str.strip()))
97
+ except Exception:
98
+ pass
99
+
100
+ start = int(os.getenv("DT_PORT_RANGE_START", str(DEFAULT_PORT_START)))
101
+ end = int(os.getenv("DT_PORT_RANGE_END", str(DEFAULT_PORT_END)))
102
+ return (start, end)
103
+
104
+ def _is_port_available(self, port: int) -> bool:
105
+ """Check if a port is available on localhost.
106
+
107
+ Performs comprehensive checks:
108
+ 1. Internal check against already-allocated ports
109
+ 2. Check Docker containers for port mappings
110
+ 3. Check if something is listening on IPv4/IPv6
111
+ 4. Attempt to bind to the port on both IPv4 and IPv6
112
+ """
113
+ # Check internal tracking first
114
+ if port in self._used_ports:
115
+ return False
116
+
117
+ # Check if Docker has containers using this port (even if not listening yet)
118
+ try:
119
+ cmd = ["docker", "ps", "--format", "{{.Ports}}"]
120
+ if _needs_sudo_for_docker():
121
+ cmd = ["sudo"] + cmd
122
+
123
+ result = subprocess.run(
124
+ cmd,
125
+ capture_output=True,
126
+ text=True,
127
+ timeout=2
128
+ )
129
+ if result.returncode == 0:
130
+ # Parse port mappings like "0.0.0.0:8080->8080/tcp, [::]:8080->8080/tcp"
131
+ for line in result.stdout.splitlines():
132
+ if not line.strip():
133
+ continue
134
+
135
+ # Split by comma to handle multiple port mappings
136
+ for port_mapping in line.split(','):
137
+ port_mapping = port_mapping.strip()
138
+
139
+ # Check for host port mappings: "0.0.0.0:8080->..." or "[::]:8080->..."
140
+ # The format is: HOST_IP:HOST_PORT->CONTAINER_PORT/PROTO
141
+ if '->' in port_mapping:
142
+ # Extract the host port (before ->)
143
+ host_part = port_mapping.split('->')[0]
144
+ if ':' in host_part:
145
+ # Get the port number after the last colon
146
+ host_port_str = host_part.split(':')[-1]
147
+ try:
148
+ host_port = int(host_port_str)
149
+ if host_port == port:
150
+ return False
151
+ except ValueError:
152
+ continue
153
+ except (subprocess.TimeoutExpired, FileNotFoundError):
154
+ pass
155
+
156
+ # Check if something is already listening (IPv4)
157
+ try:
158
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
159
+ sock.settimeout(0.1)
160
+ result = sock.connect_ex(("127.0.0.1", port))
161
+ if result == 0:
162
+ return False
163
+ except OSError:
164
+ pass
165
+
166
+ # Check if something is already listening (IPv6)
167
+ try:
168
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as sock:
169
+ sock.settimeout(0.1)
170
+ result = sock.connect_ex(("::1", port))
171
+ if result == 0:
172
+ return False
173
+ except OSError:
174
+ pass
175
+
176
+ # Try to bind on IPv4
177
+ try:
178
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
179
+ sock.bind(("0.0.0.0", port))
180
+ except OSError:
181
+ return False
182
+
183
+ # Try to bind on IPv6 (Docker often binds to IPv6)
184
+ try:
185
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as sock:
186
+ sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1)
187
+ sock.bind(("::", port))
188
+ except (OSError, AttributeError):
189
+ return False
190
+
191
+ return True
192
+
193
+ def _get_or_create_task(self, task_id: str) -> TaskResources:
194
+ """Get or create task resources."""
195
+ if task_id not in self._tasks:
196
+ self._tasks[task_id] = TaskResources(task_id=task_id)
197
+ return self._tasks[task_id]
198
+
199
+ def allocate_port(self, task_id: str, var_name: str, default: Optional[int] = None) -> int:
200
+ """
201
+ Allocate a port for a task.
202
+
203
+ Args:
204
+ task_id: Unique identifier for the task
205
+ var_name: Environment variable name (e.g., "GMAIL_AUTH_PORT")
206
+ default: Default port to try first (optional)
207
+
208
+ Returns:
209
+ Allocated port number
210
+
211
+ Raises:
212
+ RuntimeError: If no port could be allocated
213
+ """
214
+ with self._mutex:
215
+ task = self._get_or_create_task(task_id)
216
+
217
+ # If already allocated for this task, return it
218
+ if var_name in task.ports:
219
+ return task.ports[var_name]
220
+
221
+ # Try default port first if specified
222
+ if default is not None:
223
+ if self._is_port_available(default):
224
+ task.ports[var_name] = default
225
+ self._used_ports.add(default)
226
+ print(f"[PORT] Allocated default port {default} for {task_id}:{var_name}", flush=True)
227
+ return default
228
+ else:
229
+ print(f"[PORT] Default port {default} unavailable for {task_id}:{var_name}, trying random", flush=True)
230
+
231
+ # Find a free port in range
232
+ start, end = self._port_range
233
+ attempts = min(1000, end - start)
234
+
235
+ for _ in range(attempts):
236
+ port = random.randint(start, end)
237
+ if self._is_port_available(port):
238
+ task.ports[var_name] = port
239
+ self._used_ports.add(port)
240
+ print(f"[PORT] Allocated random port {port} for {task_id}:{var_name}", flush=True)
241
+ return port
242
+
243
+ # Sequential scan as fallback
244
+ for port in range(start, end + 1):
245
+ if self._is_port_available(port):
246
+ task.ports[var_name] = port
247
+ self._used_ports.add(port)
248
+ return port
249
+
250
+ raise RuntimeError(f"Unable to allocate port for {var_name} in range [{start}, {end}]")
251
+
252
+ def get_port(self, task_id: str, var_name: str) -> Optional[int]:
253
+ """Get an allocated port for a task (returns None if not allocated)."""
254
+ with self._mutex:
255
+ task = self._tasks.get(task_id)
256
+ if task:
257
+ return task.ports.get(var_name)
258
+ return None
259
+
260
+ def get_all_ports(self, task_id: str) -> Dict[str, int]:
261
+ """Get all allocated ports for a task."""
262
+ with self._mutex:
263
+ task = self._tasks.get(task_id)
264
+ if task:
265
+ return dict(task.ports)
266
+ return {}
267
+
268
+ def register_docker_project(
269
+ self,
270
+ task_id: str,
271
+ project_name: str,
272
+ compose_file: Path
273
+ ) -> None:
274
+ """Register a Docker compose project for a task."""
275
+ with self._mutex:
276
+ task = self._get_or_create_task(task_id)
277
+ if project_name not in task.docker_projects:
278
+ task.docker_projects.append(project_name)
279
+ task.compose_files[project_name] = compose_file
280
+
281
+ def get_docker_projects(self, task_id: str) -> List[str]:
282
+ """Get all Docker projects for a task."""
283
+ with self._mutex:
284
+ task = self._tasks.get(task_id)
285
+ if task:
286
+ return list(task.docker_projects)
287
+ return []
288
+
289
+ def cleanup_task(self, task_id: str, verbose: bool = True) -> None:
290
+ """
291
+ Cleanup all resources for a task.
292
+
293
+ This will:
294
+ 1. Stop and remove all Docker compose projects for this task
295
+ 2. Release all allocated ports
296
+ """
297
+ with self._mutex:
298
+ task = self._tasks.get(task_id)
299
+ if not task:
300
+ return
301
+
302
+ # Teardown Docker projects
303
+ for project_name in task.docker_projects:
304
+ compose_file = task.compose_files.get(project_name)
305
+ if compose_file and compose_file.exists():
306
+ try:
307
+ if verbose:
308
+ print(f"[CLEANUP] Stopping Docker project: {project_name}")
309
+ subprocess.run(
310
+ [
311
+ "docker", "compose",
312
+ "-p", project_name,
313
+ "-f", str(compose_file),
314
+ "down", "--remove-orphans"
315
+ ],
316
+ cwd=str(compose_file.parent),
317
+ capture_output=True,
318
+ check=False,
319
+ timeout=60,
320
+ )
321
+ except Exception as e:
322
+ if verbose:
323
+ print(f"[CLEANUP] Error stopping {project_name}: {e}")
324
+
325
+ # Release ports
326
+ for port in task.ports.values():
327
+ self._used_ports.discard(port)
328
+
329
+ if verbose and task.ports:
330
+ print(f"[CLEANUP] Released {len(task.ports)} port(s) for task {task_id}")
331
+
332
+ # Remove task from tracking
333
+ del self._tasks[task_id]
334
+
335
+ def cleanup_all(self, verbose: bool = True) -> None:
336
+ """Cleanup all tracked resources."""
337
+ with self._mutex:
338
+ task_ids = list(self._tasks.keys())
339
+
340
+ for task_id in task_ids:
341
+ self.cleanup_task(task_id, verbose=verbose)
342
+
343
+ def snapshot(self) -> Dict[str, dict]:
344
+ """Get a snapshot of all tracked resources (for debugging)."""
345
+ with self._mutex:
346
+ return {
347
+ task_id: {
348
+ "ports": dict(task.ports),
349
+ "docker_projects": list(task.docker_projects),
350
+ }
351
+ for task_id, task in self._tasks.items()
352
+ }
353
+
354
+
355
+ def generate_task_id(task_dir: Path) -> str:
356
+ """Generate a unique task ID based on task directory and PID."""
357
+ task_name = task_dir.name
358
+ pid = os.getpid()
359
+ return f"{task_name}_{pid}"
360
+
361
+
362
+ def generate_project_name(task_id: str, env_name: Optional[str] = None) -> str:
363
+ """Generate a Docker compose project name."""
364
+ base = f"wf_{task_id}".lower()
365
+ # Sanitize for Docker compose requirements
366
+ base = "".join(c if c.isalnum() or c in "-_" else "_" for c in base)
367
+ if env_name:
368
+ return f"{base}_{env_name}"
369
+ return base
370
+