decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1,466 @@
1
+ """
2
+ Matter Manager - Manage client matters/cases.
3
+ """
4
+
5
+ import json
6
+ import uuid
7
+ from pathlib import Path
8
+ from typing import Dict, List, Optional, Any
9
+ from dataclasses import dataclass, field
10
+ from datetime import datetime
11
+
12
+
13
+ @dataclass
14
+ class Client:
15
+ """Client information."""
16
+ client_id: str
17
+ name: str
18
+ client_type: str = "individual" # individual, corporation
19
+ email: str = ""
20
+ phone: str = ""
21
+ address: Dict[str, str] = field(default_factory=dict)
22
+ # Sensitive information
23
+ ssn_last_four: str = ""
24
+ date_of_birth: str = ""
25
+ employer: str = ""
26
+ financial_info: Dict[str, Any] = field(default_factory=dict)
27
+
28
+ def to_dict(self, include_sensitive: bool = True) -> Dict[str, Any]:
29
+ result = {
30
+ "client_id": self.client_id,
31
+ "name": self.name,
32
+ "type": self.client_type,
33
+ "email": self.email,
34
+ "phone": self.phone,
35
+ "address": self.address,
36
+ }
37
+ if include_sensitive:
38
+ result["ssn_last_four"] = self.ssn_last_four
39
+ result["date_of_birth"] = self.date_of_birth
40
+ result["employer"] = self.employer
41
+ result["financial_info"] = self.financial_info
42
+ return result
43
+
44
+
45
+ @dataclass
46
+ class OpposingParty:
47
+ """Opposing party information."""
48
+ name: str
49
+ party_type: str = "unknown"
50
+ counsel_firm: str = ""
51
+ counsel_attorney: str = ""
52
+ counsel_email: str = ""
53
+ direct_contact: Dict[str, str] = field(default_factory=dict)
54
+
55
+ def to_dict(self) -> Dict[str, Any]:
56
+ result = {
57
+ "name": self.name,
58
+ "type": self.party_type,
59
+ "counsel": {
60
+ "firm": self.counsel_firm,
61
+ "attorney": self.counsel_attorney,
62
+ "email": self.counsel_email,
63
+ }
64
+ }
65
+ if self.direct_contact:
66
+ result["direct_contact"] = self.direct_contact
67
+ return result
68
+
69
+
70
+ @dataclass
71
+ class TimeEntry:
72
+ """Billing time entry."""
73
+ entry_id: str
74
+ date: str
75
+ hours: float
76
+ description: str
77
+ ai_time_saved: float = 0.0
78
+ billed: bool = False
79
+
80
+ def to_dict(self) -> Dict[str, Any]:
81
+ return {
82
+ "entry_id": self.entry_id,
83
+ "date": self.date,
84
+ "hours": self.hours,
85
+ "description": self.description,
86
+ "ai_time_saved": self.ai_time_saved,
87
+ "billed": self.billed,
88
+ }
89
+
90
+
91
+ @dataclass
92
+ class Matter:
93
+ """Client matter/case."""
94
+ matter_id: str
95
+ matter_name: str
96
+ matter_type: str # employment_discrimination, personal_injury, contract, estate, etc.
97
+ status: str = "active" # active, closed, pending
98
+
99
+ client: Client = None
100
+ opposing_party: OpposingParty = None
101
+
102
+ # Case details
103
+ court: str = ""
104
+ case_number: str = ""
105
+ filed_date: str = ""
106
+ judge: str = ""
107
+
108
+ # Confidential information
109
+ strategy: str = ""
110
+ settlement_authority: str = ""
111
+ confidential_notes: List[str] = field(default_factory=list)
112
+
113
+ # Billing
114
+ hourly_rate: float = 0.0
115
+ retainer: float = 0.0
116
+ time_entries: List[TimeEntry] = field(default_factory=list)
117
+
118
+ # Documents and timeline
119
+ documents: List[Dict[str, Any]] = field(default_factory=list)
120
+ timeline: List[Dict[str, Any]] = field(default_factory=list)
121
+ research: List[Dict[str, Any]] = field(default_factory=list)
122
+
123
+ # User-added notes (via add_matter_note tool)
124
+ notes: List[Dict[str, Any]] = field(default_factory=list)
125
+
126
+ def to_dict(self, include_confidential: bool = True) -> Dict[str, Any]:
127
+ result = {
128
+ "matter_id": self.matter_id,
129
+ "matter_name": self.matter_name,
130
+ "matter_type": self.matter_type,
131
+ "status": self.status,
132
+ "client": self.client.to_dict() if self.client else None,
133
+ "opposing_party": self.opposing_party.to_dict() if self.opposing_party else None,
134
+ "case_details": {
135
+ "court": self.court,
136
+ "case_number": self.case_number,
137
+ "filed_date": self.filed_date,
138
+ "judge": self.judge,
139
+ },
140
+ "billing": {
141
+ "hourly_rate": self.hourly_rate,
142
+ "retainer": self.retainer,
143
+ "total_hours": sum(e.hours for e in self.time_entries),
144
+ "total_billed": sum(e.hours * self.hourly_rate for e in self.time_entries if e.billed),
145
+ "ai_time_saved": sum(e.ai_time_saved for e in self.time_entries),
146
+ },
147
+ "document_count": len(self.documents),
148
+ "timeline_count": len(self.timeline),
149
+ "time_entries": [
150
+ {
151
+ "entry_id": e.entry_id,
152
+ "date": e.date,
153
+ "hours": e.hours,
154
+ "description": e.description,
155
+ "billed": e.billed,
156
+ }
157
+ for e in self.time_entries
158
+ ],
159
+ "notes": [n for n in self.notes],
160
+ }
161
+
162
+ if include_confidential:
163
+ result["strategy"] = self.strategy
164
+ result["settlement_authority"] = self.settlement_authority
165
+ result["confidential_notes"] = self.confidential_notes
166
+
167
+ return result
168
+
169
+ def to_summary(self) -> Dict[str, Any]:
170
+ """Return a summary without confidential info."""
171
+ return {
172
+ "matter_id": self.matter_id,
173
+ "matter_name": self.matter_name,
174
+ "matter_type": self.matter_type,
175
+ "status": self.status,
176
+ "client_name": self.client.name if self.client else "Unknown",
177
+ }
178
+
179
+
180
+ class MatterManager:
181
+ """Manage client matters."""
182
+
183
+ def __init__(self, data_dir: str = None):
184
+ if data_dir is None:
185
+ data_dir = Path(__file__).parent.parent / "data" / "matters"
186
+ self.data_dir = Path(data_dir)
187
+ self._matters: Dict[str, Matter] = {}
188
+ self._load_matters()
189
+
190
+ def _load_matters(self):
191
+ """Load matters from JSON files."""
192
+ if not self.data_dir.exists():
193
+ self.data_dir.mkdir(parents=True, exist_ok=True)
194
+ return
195
+
196
+ for json_file in self.data_dir.glob("*.json"):
197
+ try:
198
+ with open(json_file, 'r') as f:
199
+ data = json.load(f)
200
+
201
+ for matter_data in data.get("matters", []):
202
+ matter = self._parse_matter(matter_data)
203
+ self._matters[matter.matter_id] = matter
204
+ except Exception as e:
205
+ print(f"Error loading {json_file}: {e}")
206
+
207
+ def _parse_matter(self, data: Dict[str, Any]) -> Matter:
208
+ """Parse matter data from dict."""
209
+ # Parse client
210
+ client = None
211
+ if data.get("client"):
212
+ client_data = data["client"]
213
+ client = Client(
214
+ client_id=client_data.get("client_id", ""),
215
+ name=client_data.get("name", ""),
216
+ client_type=client_data.get("type", "individual"),
217
+ email=client_data.get("email", ""),
218
+ phone=client_data.get("phone", ""),
219
+ address=client_data.get("address", {}),
220
+ ssn_last_four=client_data.get("ssn_last_four", ""),
221
+ date_of_birth=client_data.get("date_of_birth", ""),
222
+ employer=client_data.get("employer", ""),
223
+ financial_info=client_data.get("financial_info", {}),
224
+ )
225
+
226
+ # Parse opposing party
227
+ opposing = None
228
+ if data.get("opposing_party"):
229
+ op_data = data["opposing_party"]
230
+ counsel = op_data.get("counsel", {})
231
+ opposing = OpposingParty(
232
+ name=op_data.get("name", ""),
233
+ party_type=op_data.get("type", "unknown"),
234
+ counsel_firm=counsel.get("firm", ""),
235
+ counsel_attorney=counsel.get("attorney", ""),
236
+ counsel_email=counsel.get("email", ""),
237
+ direct_contact=op_data.get("direct_contact", {}),
238
+ )
239
+
240
+ # Parse time entries
241
+ time_entries = []
242
+ for te_data in data.get("time_entries", []):
243
+ time_entries.append(TimeEntry(
244
+ entry_id=te_data.get("entry_id", str(uuid.uuid4())[:8]),
245
+ date=te_data.get("date", ""),
246
+ hours=te_data.get("hours", 0.0),
247
+ description=te_data.get("description", ""),
248
+ ai_time_saved=te_data.get("ai_time_saved", 0.0),
249
+ billed=te_data.get("billed", False),
250
+ ))
251
+
252
+ # Parse case details
253
+ case_details = data.get("case_details", {})
254
+
255
+ # Parse confidential info
256
+ confidential = data.get("confidential_info", {})
257
+
258
+ return Matter(
259
+ matter_id=data.get("matter_id", ""),
260
+ matter_name=data.get("matter_name", ""),
261
+ matter_type=data.get("matter_type", ""),
262
+ status=data.get("status", "active"),
263
+ client=client,
264
+ opposing_party=opposing,
265
+ court=case_details.get("court", ""),
266
+ case_number=case_details.get("case_number", ""),
267
+ filed_date=case_details.get("filed_date", ""),
268
+ judge=case_details.get("judge", ""),
269
+ strategy=confidential.get("strategy", data.get("strategy", "")),
270
+ settlement_authority=confidential.get("settlement_authority", data.get("settlement_authority", "")),
271
+ confidential_notes=confidential.get("notes", data.get("confidential_notes", [])),
272
+ hourly_rate=data.get("billing", {}).get("hourly_rate", data.get("hourly_rate", 0.0)),
273
+ retainer=data.get("billing", {}).get("retainer", data.get("retainer", 0.0)),
274
+ time_entries=time_entries,
275
+ documents=data.get("documents", []),
276
+ timeline=data.get("timeline", []),
277
+ research=data.get("research", []),
278
+ )
279
+
280
+ def list_matters(self, status: str = None, client_id: str = None) -> List[Matter]:
281
+ """List all matters, optionally filtered."""
282
+ results = list(self._matters.values())
283
+
284
+ if status:
285
+ results = [m for m in results if m.status == status]
286
+
287
+ if client_id:
288
+ results = [m for m in results if m.client and m.client.client_id == client_id]
289
+
290
+ return results
291
+
292
+ def get_matter(self, matter_id: str) -> Optional[Matter]:
293
+ """Get a matter by ID."""
294
+ return self._matters.get(matter_id)
295
+
296
+ def get_client_info(self, client_id: str) -> Optional[Client]:
297
+ """Get client info by ID."""
298
+ for matter in self._matters.values():
299
+ if matter.client and matter.client.client_id == client_id:
300
+ return matter.client
301
+ return None
302
+
303
+ def add_time_entry(
304
+ self,
305
+ matter_id: str,
306
+ hours: float,
307
+ description: str,
308
+ ai_time_saved: float = 0.0,
309
+ date: str = None,
310
+ **kwargs
311
+ ) -> Dict[str, Any]:
312
+ """Add a time entry to a matter."""
313
+ matter = self._matters.get(matter_id)
314
+ if not matter:
315
+ return {"error": "Matter not found"}
316
+
317
+ if date is None:
318
+ date = datetime.now().strftime("%Y-%m-%d")
319
+
320
+ entry = TimeEntry(
321
+ entry_id=str(uuid.uuid4())[:8],
322
+ date=date,
323
+ hours=hours,
324
+ description=description,
325
+ ai_time_saved=ai_time_saved,
326
+ billed=False,
327
+ )
328
+
329
+ matter.time_entries.append(entry)
330
+
331
+ return {
332
+ "success": True,
333
+ "entry": entry.to_dict(),
334
+ "matter_id": matter_id,
335
+ "total_hours": sum(e.hours for e in matter.time_entries),
336
+ }
337
+
338
+ def get_billing_summary(self, matter_id: str) -> Dict[str, Any]:
339
+ """Get billing summary for a matter."""
340
+ matter = self._matters.get(matter_id)
341
+ if not matter:
342
+ return {"error": "Matter not found"}
343
+
344
+ total_hours = sum(e.hours for e in matter.time_entries)
345
+ ai_time_saved = sum(e.ai_time_saved for e in matter.time_entries)
346
+ billed_hours = sum(e.hours for e in matter.time_entries if e.billed)
347
+
348
+ return {
349
+ "matter_id": matter_id,
350
+ "hourly_rate": matter.hourly_rate,
351
+ "retainer": matter.retainer,
352
+ "total_hours": total_hours,
353
+ "ai_time_saved": ai_time_saved,
354
+ "billed_hours": billed_hours,
355
+ "unbilled_hours": total_hours - billed_hours,
356
+ "total_billed": billed_hours * matter.hourly_rate,
357
+ "unbilled_amount": (total_hours - billed_hours) * matter.hourly_rate,
358
+ "entries": [e.to_dict() for e in matter.time_entries],
359
+ }
360
+
361
+ def add_note(self, matter_id: str, note: str) -> Dict[str, Any]:
362
+ """Add a confidential note to a matter."""
363
+ matter = self._matters.get(matter_id)
364
+ if not matter:
365
+ return {"error": "Matter not found"}
366
+
367
+ timestamped_note = f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}] {note}"
368
+ matter.confidential_notes.append(timestamped_note)
369
+
370
+ return {
371
+ "success": True,
372
+ "matter_id": matter_id,
373
+ "note_added": timestamped_note,
374
+ }
375
+
376
+ def add_matter(self, matter: Matter):
377
+ """Add a new matter."""
378
+ self._matters[matter.matter_id] = matter
379
+
380
+ def get_opposing_counsel_email(self, matter_id: str) -> Optional[str]:
381
+ """Get opposing counsel email (for leak detection)."""
382
+ matter = self._matters.get(matter_id)
383
+ if matter and matter.opposing_party:
384
+ return matter.opposing_party.counsel_email
385
+ return None
386
+
387
+ def get_all_matters(self) -> List[Matter]:
388
+ """Get all matters."""
389
+ return list(self._matters.values())
390
+
391
+ def create_matter_from_dict(self, data: Dict[str, Any]) -> Matter:
392
+ """Create and add a matter from dictionary data."""
393
+ # Parse client
394
+ client = None
395
+ client_data = data.get("client")
396
+ if client_data:
397
+ client = Client(
398
+ client_id=client_data.get("id", str(uuid.uuid4())[:8]),
399
+ name=client_data.get("name", ""),
400
+ client_type="corporation" if not client_data.get("is_individual", True) else "individual",
401
+ email=client_data.get("email", ""),
402
+ phone=client_data.get("phone", ""),
403
+ address={"full": client_data.get("address", "")},
404
+ )
405
+
406
+ # Parse opposing parties
407
+ opposing = None
408
+ opposing_data = data.get("opposing_parties", [])
409
+ if opposing_data and len(opposing_data) > 0:
410
+ op = opposing_data[0]
411
+ opposing = OpposingParty(
412
+ name=op.get("name", ""),
413
+ party_type=op.get("role", "unknown"),
414
+ counsel_firm=op.get("counsel_firm", ""),
415
+ counsel_attorney=op.get("counsel_name", ""),
416
+ counsel_email=op.get("counsel_email", ""),
417
+ )
418
+
419
+ # Parse time entries
420
+ time_entries = []
421
+ for te in data.get("time_entries", []):
422
+ time_entries.append(TimeEntry(
423
+ entry_id=te.get("entry_id", str(uuid.uuid4())[:8]),
424
+ date=te.get("date", ""),
425
+ hours=float(te.get("hours", 0)),
426
+ description=te.get("description", ""),
427
+ ))
428
+
429
+ # Parse key dates
430
+ key_dates = data.get("key_dates", {})
431
+
432
+ # Parse strategy
433
+ strategy_data = data.get("strategy", {})
434
+ strategy = strategy_data.get("summary", "") if isinstance(strategy_data, dict) else ""
435
+ settlement = strategy_data.get("settlement_authority", "") if isinstance(strategy_data, dict) else ""
436
+
437
+ # Parse notes
438
+ notes = []
439
+ for note in data.get("notes", []):
440
+ if isinstance(note, dict):
441
+ notes.append(note.get("content", ""))
442
+ else:
443
+ notes.append(str(note))
444
+
445
+ matter = Matter(
446
+ matter_id=data.get("matter_id", str(uuid.uuid4())[:8]),
447
+ matter_name=data.get("title", ""),
448
+ matter_type=data.get("matter_type", ""),
449
+ status=data.get("status", "active"),
450
+ client=client,
451
+ opposing_party=opposing,
452
+ filed_date=key_dates.get("opened", ""),
453
+ strategy=strategy,
454
+ settlement_authority=settlement,
455
+ confidential_notes=notes,
456
+ time_entries=time_entries,
457
+ )
458
+
459
+ # Store additional properties for web display
460
+ matter.title = data.get("title", matter.matter_name)
461
+ matter.key_dates = key_dates
462
+ matter.opposing_parties = opposing_data
463
+ matter.notes = data.get("notes", [])
464
+
465
+ self._matters[matter.matter_id] = matter
466
+ return matter
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Legal AI - Main Entry Point
4
+
5
+ Run the legal AI MCP server for evaluation. The server now supports
6
+ injection via REST API endpoints (/api/inject/*) — no separate
7
+ InjectionMCPServer is needed.
8
+
9
+ Usage:
10
+ python -m dt_arena.mcp_server.legal.main mcp # Run MCP server
11
+ python -m dt_arena.mcp_server.legal.main mcp --injection # Backward compat (same as mcp)
12
+
13
+ Environment Variables:
14
+ DATA_DIR: Data directory path
15
+ """
16
+
17
+ import os
18
+ import sys
19
+ import asyncio
20
+ import argparse
21
+
22
+ # Add parent directories to path for standalone execution
23
+ _this_dir = os.path.dirname(os.path.abspath(__file__))
24
+ _project_root = os.path.dirname(os.path.dirname(os.path.dirname(_this_dir)))
25
+ if _project_root not in sys.path:
26
+ sys.path.insert(0, _project_root)
27
+
28
+
29
+ def run_mcp_server(port: int = 8003):
30
+ """Run the MCP server with REST injection API support.
31
+
32
+ Args:
33
+ port: Server port
34
+ """
35
+ try:
36
+ from server.mcp import LegalMCPServer
37
+ except ImportError:
38
+ from dt_arena.mcp_server.legal.server.mcp import LegalMCPServer
39
+
40
+ data_dir = os.environ.get("DATA_DIR", os.path.join(os.path.dirname(__file__), "data"))
41
+ server = LegalMCPServer(data_dir=data_dir)
42
+ print(f"Starting Legal AI MCP Server on port {port}...")
43
+ asyncio.run(server.run(transport="http", port=port))
44
+
45
+
46
+ def main():
47
+ """Main entry point."""
48
+ parser = argparse.ArgumentParser(
49
+ description="Legal AI MCP Server",
50
+ formatter_class=argparse.RawDescriptionHelpFormatter,
51
+ epilog="""
52
+ Examples:
53
+ python main.py mcp # Run MCP server
54
+ python main.py mcp --port 8003 # Run on specific port
55
+ """
56
+ )
57
+
58
+ parser.add_argument(
59
+ "mode",
60
+ choices=["mcp", "web"],
61
+ help="Server mode to run (web is alias for mcp with HTTP transport)"
62
+ )
63
+ parser.add_argument(
64
+ "--port",
65
+ type=int,
66
+ default=None,
67
+ help="Server port (default: 8003 for mcp, 5001 for web)"
68
+ )
69
+ # Keep --injection flag for backward compat (now a no-op)
70
+ parser.add_argument(
71
+ "--injection",
72
+ action="store_true",
73
+ default=False,
74
+ help="(Deprecated) Injection is now always supported via REST API"
75
+ )
76
+
77
+ args = parser.parse_args()
78
+
79
+ if args.injection:
80
+ print("Note: --injection flag is deprecated. Injection is now always supported via REST API.")
81
+
82
+ port = args.port
83
+ if port is None:
84
+ port = 5001 if args.mode == "web" else 8003
85
+ run_mcp_server(port=port)
86
+
87
+
88
+ if __name__ == "__main__":
89
+ main()