decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1,361 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ YFinance - Main Entry Point
4
+
5
+ Provides CLI for starting web server, MCP server, or both.
6
+
7
+ Usage:
8
+ # Start web server only
9
+ python main.py web --port 5000
10
+
11
+ # Start MCP server only
12
+ python main.py mcp --port 8862 --web-url http://localhost:5000
13
+
14
+ # Start both servers
15
+ python main.py both --web-port 5000 --mcp-port 8862
16
+
17
+ # Start with injection enabled
18
+ python main.py web --port 5000 --mode malicious --preset buy_apple_instead
19
+ """
20
+
21
+ import os
22
+ import sys
23
+ import argparse
24
+ from pathlib import Path
25
+
26
+ # Add parent directories to path for imports
27
+ yfinance_dir = Path(__file__).parent
28
+ mcp_server_dir = yfinance_dir.parent
29
+ sys.path.insert(0, str(mcp_server_dir)) # For yfinance package
30
+ sys.path.insert(0, str(yfinance_dir)) # For local imports
31
+
32
+
33
+ def get_paths():
34
+ """Get default paths relative to this file."""
35
+ base = Path(__file__).parent
36
+ return {
37
+ 'data_dir': str(base / 'data' / 'cache_2025_h2'),
38
+ 'template_dir': str(base / 'templates'),
39
+ 'static_dir': str(base / 'static'),
40
+ }
41
+
42
+
43
+
44
+ def _is_port_open(host: str, port: int) -> bool:
45
+ """Check whether a TCP port is accepting connections."""
46
+ import socket
47
+ try:
48
+ with socket.create_connection((host, port), timeout=1):
49
+ return True
50
+ except OSError:
51
+ return False
52
+
53
+
54
+ def _wait_web_ready(host: str, port: int, timeout: float = 10.0) -> bool:
55
+ """Wait until the web server is accepting HTTP requests (not just TCP)."""
56
+ import time
57
+ import httpx
58
+ deadline = time.time() + timeout
59
+ url = f"http://{host}:{port}/api/health"
60
+ while time.time() < deadline:
61
+ try:
62
+ resp = httpx.get(url, timeout=2.0)
63
+ if resp.status_code in (200, 404):
64
+ # 404 is OK — means Flask is responding, just no /api/health route
65
+ return True
66
+ except Exception:
67
+ pass
68
+ time.sleep(0.25)
69
+ return False
70
+
71
+
72
+ def _start_web_in_background(web_port: int, args) -> None:
73
+ """Start a standalone web process if not already running."""
74
+ import subprocess
75
+ import time
76
+ import tempfile
77
+ import errno
78
+
79
+ host = "127.0.0.1"
80
+ if _is_port_open(host, web_port):
81
+ # Web server already running — verify it's actually responsive
82
+ if _wait_web_ready(host, web_port, timeout=5.0):
83
+ return
84
+ # Port open but not responding — kill and restart
85
+ print(f"[!] Web server on port {web_port} not responding, restarting...")
86
+ import signal
87
+ try:
88
+ import subprocess as sp
89
+ result = sp.run(["lsof", "-ti", f":{web_port}"], capture_output=True, text=True)
90
+ for pid in result.stdout.strip().split('\n'):
91
+ if pid.strip():
92
+ os.kill(int(pid.strip()), signal.SIGTERM)
93
+ time.sleep(1)
94
+ except Exception as e:
95
+ print(f"[!] Failed to kill old web server: {e}")
96
+
97
+ lock_path = Path(tempfile.gettempdir()) / f"finance_web_{web_port}.lock"
98
+ lock_fd = None
99
+ try:
100
+ lock_fd = os.open(str(lock_path), os.O_CREAT | os.O_EXCL | os.O_WRONLY)
101
+ except OSError as exc:
102
+ if exc.errno != errno.EEXIST:
103
+ raise
104
+
105
+ if lock_fd is None:
106
+ # Another process is starting the web server; wait for it to be ready.
107
+ if _wait_web_ready(host, web_port, timeout=10.0):
108
+ return
109
+ # Fallback: at least wait for TCP
110
+ for _ in range(20):
111
+ if _is_port_open(host, web_port):
112
+ return
113
+ time.sleep(0.25)
114
+ return
115
+
116
+ try:
117
+ if _is_port_open(host, web_port):
118
+ return
119
+
120
+ cmd = [
121
+ sys.executable,
122
+ str(Path(__file__).resolve()),
123
+ "web",
124
+ "--port",
125
+ str(web_port),
126
+ "--host",
127
+ str(args.host),
128
+ "--mode",
129
+ str(args.mode),
130
+ ]
131
+ if getattr(args, 'preset', None):
132
+ cmd += ["--preset", str(args.preset)]
133
+ if getattr(args, 'data_dir', None):
134
+ cmd += ["--data-dir", str(args.data_dir)]
135
+ if getattr(args, 'template_dir', None):
136
+ cmd += ["--template-dir", str(args.template_dir)]
137
+
138
+ subprocess.Popen(
139
+ cmd,
140
+ stdout=subprocess.DEVNULL,
141
+ stderr=subprocess.DEVNULL,
142
+ start_new_session=True,
143
+ )
144
+
145
+ # Wait for the web server to be fully ready (HTTP-level check).
146
+ if _wait_web_ready(host, web_port, timeout=10.0):
147
+ print(f"[*] Web server started on port {web_port}")
148
+ else:
149
+ print(f"[!] Web server on port {web_port} did not become ready in time")
150
+ finally:
151
+ try:
152
+ if lock_fd is not None:
153
+ os.close(lock_fd)
154
+ if lock_path.exists():
155
+ lock_path.unlink()
156
+ except OSError:
157
+ pass
158
+
159
+
160
+ def start_web_server(args):
161
+ """Start the Flask web server."""
162
+ import json
163
+ from server.web import FinanceWebServer
164
+
165
+ paths = get_paths()
166
+ server = FinanceWebServer(
167
+ data_dir=args.data_dir or paths['data_dir'],
168
+ host=args.host,
169
+ port=args.port,
170
+ debug=args.debug
171
+ )
172
+
173
+ if args.mode == 'malicious' and args.preset:
174
+ print(f"[*] Injection mode: malicious (preset: {args.preset})")
175
+ server._set_injection('malicious', args.preset)
176
+ else:
177
+ print("[*] Injection mode: benign")
178
+
179
+ # Static injection: load from INJECTION_CONFIG env var at startup
180
+ injection_config_str = os.environ.get('INJECTION_CONFIG')
181
+ if injection_config_str:
182
+ try:
183
+ injection_config = json.loads(injection_config_str)
184
+ server.load_injection_config(injection_config)
185
+ print("[*] Static injection config loaded from INJECTION_CONFIG env var")
186
+ except json.JSONDecodeError as e:
187
+ print(f"[!] Failed to parse INJECTION_CONFIG: {e}")
188
+ except Exception as e:
189
+ print(f"[!] Failed to load injection config: {e}")
190
+
191
+ # Portfolio initialization: load from FINANCE_ACCOUNTS_JSON env var at startup
192
+ accounts_json_str = os.environ.get('FINANCE_ACCOUNTS_JSON')
193
+ if accounts_json_str:
194
+ try:
195
+ accounts_config = json.loads(accounts_json_str)
196
+ # Call the reset endpoint internally
197
+ with server.app.test_client() as client:
198
+ resp = client.post('/reset', json=accounts_config,
199
+ content_type='application/json')
200
+ if resp.status_code == 200:
201
+ print("[*] Portfolio initialized from FINANCE_ACCOUNTS_JSON env var")
202
+ else:
203
+ print(f"[!] Failed to initialize portfolio: {resp.data.decode()[:100]}")
204
+ except Exception as e:
205
+ print(f"[!] Failed to parse FINANCE_ACCOUNTS_JSON: {e}")
206
+
207
+ print(f"[*] Starting web server on port {args.port}")
208
+ server.run()
209
+
210
+
211
+ def start_mcp_server(args):
212
+ """Start the MCP server (Docker provides Flask web server)."""
213
+ import json
214
+ import httpx
215
+ from server.mcp import FinanceMCPServer
216
+
217
+ if getattr(args, 'auto_web', False):
218
+ web_port = int(args.web_url.rsplit(":", 1)[-1])
219
+ _start_web_in_background(web_port, args)
220
+
221
+ # Portfolio initialization: POST accounts JSON to the web server's /reset endpoint
222
+ accounts_json_str = os.environ.get('FINANCE_ACCOUNTS_JSON')
223
+ if accounts_json_str:
224
+ try:
225
+ accounts_config = json.loads(accounts_json_str)
226
+ resp = httpx.post(f"{args.web_url}/reset", json=accounts_config, timeout=10.0)
227
+ if resp.status_code == 200:
228
+ print("[*] Portfolio initialized from FINANCE_ACCOUNTS_JSON")
229
+ else:
230
+ print(f"[!] Portfolio init failed: {resp.text[:100]}")
231
+ except Exception as e:
232
+ print(f"[!] Failed to initialize portfolio: {e}")
233
+
234
+ server = FinanceMCPServer(web_server_url=args.web_url)
235
+ print(f"[*] Starting MCP server (web: {args.web_url})")
236
+ server.run(port=args.port)
237
+
238
+
239
+ def start_both(args):
240
+ """Start both web and MCP servers."""
241
+ import threading
242
+ import time
243
+ from server.web import FinanceWebServer
244
+ from server.mcp import FinanceMCPServer
245
+
246
+ paths = get_paths()
247
+
248
+ # Start web server in thread
249
+ web_server = FinanceWebServer(
250
+ data_dir=args.data_dir or paths['data_dir'],
251
+ host=args.host,
252
+ port=args.web_port,
253
+ debug=False
254
+ )
255
+
256
+ if args.mode == 'malicious' and args.preset:
257
+ print(f"[*] Injection mode: malicious (preset: {args.preset})")
258
+ web_server._set_injection('malicious', args.preset)
259
+
260
+ def run_web():
261
+ web_server.run()
262
+
263
+ web_thread = threading.Thread(target=run_web, daemon=True)
264
+ web_thread.start()
265
+
266
+ print(f"[*] Web server started on port {args.web_port}")
267
+ time.sleep(2) # Wait for web server to start
268
+
269
+ # Start MCP server
270
+ web_url = f"http://127.0.0.1:{args.web_port}"
271
+ mcp_server = FinanceMCPServer(web_server_url=web_url)
272
+ print(f"[*] Starting MCP server on port {args.mcp_port}")
273
+ mcp_server.run(port=args.mcp_port)
274
+
275
+
276
+ def list_presets(args):
277
+ """List available attack presets."""
278
+ from injection import list_presets
279
+
280
+ presets = list_presets()
281
+ print("\nAvailable Attack Presets:")
282
+ print("=" * 60)
283
+ for name, desc in presets.items():
284
+ print(f" {name:25} - {desc}")
285
+ print()
286
+
287
+
288
+ def main():
289
+ parser = argparse.ArgumentParser(
290
+ description="YFinance - Financial Web Server for Agent Testing",
291
+ formatter_class=argparse.RawDescriptionHelpFormatter,
292
+ epilog="""
293
+ Examples:
294
+ # Start web server
295
+ python main.py web --port 5000
296
+
297
+ # Start with injection
298
+ python main.py web --mode malicious --preset buy_apple_instead
299
+
300
+ # Start MCP server
301
+ python main.py mcp --web-url http://localhost:5000
302
+
303
+ # Start both
304
+ python main.py both --web-port 5000 --mcp-port 8862
305
+
306
+ # List presets
307
+ python main.py presets
308
+ """
309
+ )
310
+ subparsers = parser.add_subparsers(dest='command', help='Command to run')
311
+
312
+ # Web server command
313
+ web_parser = subparsers.add_parser('web', help='Start web server')
314
+ web_parser.add_argument('--host', default='0.0.0.0', help='Host to bind')
315
+ web_parser.add_argument('--port', type=int, default=5000, help='Port (default: 5000)')
316
+ web_parser.add_argument('--debug', action='store_true', help='Enable debug mode')
317
+ web_parser.add_argument('--mode', choices=['benign', 'malicious'], default='benign')
318
+ web_parser.add_argument('--preset', help='Attack preset name')
319
+ web_parser.add_argument('--data-dir', help='Data directory')
320
+ web_parser.add_argument('--template-dir', help='Template directory')
321
+
322
+ # MCP server command
323
+ mcp_parser = subparsers.add_parser('mcp', help='Start MCP server')
324
+ mcp_parser.add_argument('--port', type=int, default=8862, help='Port (default: 8862)')
325
+ mcp_parser.add_argument('--web-url', default='http://127.0.0.1:5000', help='Web server URL')
326
+ mcp_parser.add_argument('--auto-web', action='store_true', help='Start web server if not running')
327
+ mcp_parser.add_argument('--host', default='0.0.0.0', help='Web host when auto-starting')
328
+ mcp_parser.add_argument('--mode', choices=['benign', 'malicious'], default='benign')
329
+ mcp_parser.add_argument('--preset', help='Attack preset name')
330
+ mcp_parser.add_argument('--data-dir', help='Data directory')
331
+
332
+ # Both servers command
333
+ both_parser = subparsers.add_parser('both', help='Start both servers')
334
+ both_parser.add_argument('--host', default='0.0.0.0', help='Host to bind')
335
+ both_parser.add_argument('--web-port', type=int, default=5000, help='Web server port')
336
+ both_parser.add_argument('--mcp-port', type=int, default=8862, help='MCP server port')
337
+ both_parser.add_argument('--mode', choices=['benign', 'malicious'], default='benign')
338
+ both_parser.add_argument('--preset', help='Attack preset name')
339
+ both_parser.add_argument('--data-dir', help='Data directory')
340
+ both_parser.add_argument('--template-dir', help='Template directory')
341
+
342
+ # List presets command
343
+ subparsers.add_parser('presets', help='List attack presets')
344
+
345
+ args = parser.parse_args()
346
+
347
+ if args.command == 'web':
348
+ start_web_server(args)
349
+ elif args.command == 'mcp':
350
+ start_mcp_server(args)
351
+ elif args.command == 'both':
352
+ start_both(args)
353
+ elif args.command == 'presets':
354
+ list_presets(args)
355
+ else:
356
+ parser.print_help()
357
+
358
+
359
+ if __name__ == '__main__':
360
+ main()
361
+
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env python3
2
+ """Simple MCP server launcher."""
3
+ import os
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ # Add paths
8
+ base_dir = Path(__file__).parent
9
+ sys.path.insert(0, str(base_dir.parent))
10
+ sys.path.insert(0, str(base_dir))
11
+
12
+ from server.mcp import FinanceMCPServer
13
+
14
+ web_url = os.getenv("WEB_URL", "http://127.0.0.1:5000")
15
+ port = int(os.getenv("PORT", "8862"))
16
+
17
+ print(f"Web server URL: {web_url}")
18
+ print(f"Starting MCP server on port {port}...")
19
+
20
+ server = FinanceMCPServer(web_server_url=web_url)
21
+ server.run(port=port)
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env python3
2
+ """Simple web server launcher."""
3
+ import os
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ # Add paths
8
+ base_dir = Path(__file__).parent
9
+ sys.path.insert(0, str(base_dir.parent))
10
+ sys.path.insert(0, str(base_dir))
11
+
12
+ # Set paths - use cache_2025_h2 which is the correct directory
13
+ data_dir = str(base_dir / 'data/cache_2025_h2')
14
+
15
+ print(f"Data dir: {data_dir}")
16
+ print(f"Data dir exists: {os.path.exists(data_dir)}")
17
+
18
+ from server.web import FinanceWebServer
19
+ server = FinanceWebServer(
20
+ data_dir=data_dir,
21
+ host='0.0.0.0',
22
+ port=5000,
23
+ debug=False
24
+ )
25
+ print("Starting server on port 5000...")
26
+ server.run()
@@ -0,0 +1,41 @@
1
+ """
2
+ Server Module - Web Server and MCP Servers
3
+
4
+ Components:
5
+ FinanceWebServer - Flask web application (serves HTML pages)
6
+ FinanceMCPServer - MCP browser server (for AI agents to browse)
7
+ InjectionMCPServer - MCP injection control server (for red team testing)
8
+ create_app - Factory function for Flask app
9
+ create_mcp_server - Factory function for browser MCP server
10
+ create_injection_mcp - Factory function for injection MCP server
11
+
12
+ Note: Imports are lazy to allow the web server to run independently
13
+ (e.g., inside Docker) without MCP dependencies like httpx/mcp.
14
+ """
15
+
16
+
17
+ def __getattr__(name):
18
+ """Lazy imports to avoid pulling in MCP dependencies for web-only usage."""
19
+ if name in ('FinanceWebServer', 'create_app'):
20
+ from .web import FinanceWebServer, create_app
21
+ return {'FinanceWebServer': FinanceWebServer, 'create_app': create_app}[name]
22
+ if name in ('FinanceMCPServer', 'ActionLogger', 'create_mcp_server'):
23
+ from .mcp import FinanceMCPServer, ActionLogger, create_mcp_server
24
+ return {'FinanceMCPServer': FinanceMCPServer, 'ActionLogger': ActionLogger,
25
+ 'create_mcp_server': create_mcp_server}[name]
26
+ if name in ('InjectionMCPServer', 'create_injection_mcp_server'):
27
+ from .injection_mcp import InjectionMCPServer, create_injection_mcp_server
28
+ return {'InjectionMCPServer': InjectionMCPServer,
29
+ 'create_injection_mcp_server': create_injection_mcp_server}[name]
30
+ if name == 'HtmlExtractor':
31
+ from .extractor import HtmlExtractor
32
+ return HtmlExtractor
33
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
34
+
35
+
36
+ __all__ = [
37
+ 'FinanceWebServer', 'create_app',
38
+ 'FinanceMCPServer', 'ActionLogger', 'create_mcp_server',
39
+ 'InjectionMCPServer', 'create_injection_mcp_server',
40
+ 'HtmlExtractor',
41
+ ]