decodingtrust-agent-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. agent/__init__.py +30 -0
  2. agent/claudesdk/__init__.py +8 -0
  3. agent/claudesdk/example.py +221 -0
  4. agent/claudesdk/src/__init__.py +8 -0
  5. agent/claudesdk/src/agent.py +400 -0
  6. agent/claudesdk/src/mcp_proxy.py +409 -0
  7. agent/claudesdk/src/utils.py +420 -0
  8. agent/googleadk/__init__.py +15 -0
  9. agent/googleadk/example.py +237 -0
  10. agent/googleadk/src/__init__.py +12 -0
  11. agent/googleadk/src/agent.py +401 -0
  12. agent/googleadk/src/mcp_wrapper.py +163 -0
  13. agent/googleadk/src/utils.py +602 -0
  14. agent/langchain/__init__.py +8 -0
  15. agent/langchain/example.py +213 -0
  16. agent/langchain/src/__init__.py +8 -0
  17. agent/langchain/src/agent.py +645 -0
  18. agent/langchain/src/utils.py +433 -0
  19. agent/openaisdk/__init__.py +17 -0
  20. agent/openaisdk/example.py +228 -0
  21. agent/openaisdk/src/__init__.py +12 -0
  22. agent/openaisdk/src/agent.py +491 -0
  23. agent/openaisdk/src/agent_wrapper.py +143 -0
  24. agent/openaisdk/src/mcp_wrapper.py +395 -0
  25. agent/openaisdk/src/utils.py +493 -0
  26. agent/openclaw/__init__.py +10 -0
  27. agent/openclaw/example.py +251 -0
  28. agent/openclaw/src/__init__.py +14 -0
  29. agent/openclaw/src/agent.py +930 -0
  30. agent/openclaw/src/helpers/__init__.py +1 -0
  31. agent/openclaw/src/helpers/auth_helpers.py +55 -0
  32. agent/openclaw/src/mcp_proxy.py +564 -0
  33. agent/openclaw/src/plugin_generator.py +231 -0
  34. agent/openclaw/src/utils.py +341 -0
  35. agent/pocketflow/__init__.py +18 -0
  36. agent/pocketflow/example.py +221 -0
  37. agent/pocketflow/prompts/react_agent.py +46 -0
  38. agent/pocketflow/src/__init__.py +6 -0
  39. agent/pocketflow/src/agent.py +507 -0
  40. agent/pocketflow/src/agent_wrapper.py +159 -0
  41. agent/pocketflow/src/async_helper.py +92 -0
  42. agent/pocketflow/src/mcp_react_agent.py +279 -0
  43. agent/pocketflow/src/native_agent.py +74 -0
  44. agent/pocketflow/src/nodes.py +467 -0
  45. benchmark/__init__.py +0 -0
  46. benchmark/browser/benign.jsonl +34 -0
  47. benchmark/browser/direct.jsonl +85 -0
  48. benchmark/browser/indirect.jsonl +82 -0
  49. benchmark/code/benign.jsonl +0 -0
  50. benchmark/code/direct.jsonl +121 -0
  51. benchmark/code/indirect.jsonl +165 -0
  52. benchmark/crm/benign.jsonl +165 -0
  53. benchmark/crm/direct.jsonl +90 -0
  54. benchmark/crm/indirect.jsonl +150 -0
  55. benchmark/customer-service/benign.jsonl +160 -0
  56. benchmark/customer-service/direct.jsonl +100 -0
  57. benchmark/customer-service/indirect.jsonl +101 -0
  58. benchmark/finance/benign.jsonl +0 -0
  59. benchmark/finance/direct.jsonl +200 -0
  60. benchmark/finance/indirect.jsonl +200 -0
  61. benchmark/legal/benign.jsonl +0 -0
  62. benchmark/legal/direct.jsonl +200 -0
  63. benchmark/legal/indirect.jsonl +200 -0
  64. benchmark/macos/benign.jsonl +30 -0
  65. benchmark/macos/direct.jsonl +50 -0
  66. benchmark/macos/indirect.jsonl +50 -0
  67. benchmark/medical/benign.jsonl +642 -0
  68. benchmark/medical/direct.jsonl +229 -0
  69. benchmark/medical/indirect.jsonl +222 -0
  70. benchmark/os-filesystem/benign.jsonl +200 -0
  71. benchmark/os-filesystem/direct.jsonl +200 -0
  72. benchmark/os-filesystem/indirect.jsonl +200 -0
  73. benchmark/research/benign.jsonl +0 -0
  74. benchmark/research/direct.jsonl +119 -0
  75. benchmark/research/indirect.jsonl +125 -0
  76. benchmark/telecom/benign.jsonl +120 -0
  77. benchmark/telecom/direct.jsonl +161 -0
  78. benchmark/telecom/indirect.jsonl +166 -0
  79. benchmark/travel/benign.jsonl +130 -0
  80. benchmark/travel/direct.jsonl +105 -0
  81. benchmark/travel/indirect.jsonl +120 -0
  82. benchmark/windows/benign.jsonl +100 -0
  83. benchmark/windows/direct.jsonl +140 -0
  84. benchmark/windows/indirect.jsonl +107 -0
  85. benchmark/workflow/benign.jsonl +335 -0
  86. benchmark/workflow/direct.jsonl +78 -0
  87. benchmark/workflow/indirect.jsonl +107 -0
  88. cli/__init__.py +5 -0
  89. cli/main.py +182 -0
  90. cli/scaffold.py +334 -0
  91. decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
  92. decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
  93. decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
  94. decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
  95. decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
  96. decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
  97. dt_arena/config/env.yaml +515 -0
  98. dt_arena/config/injection_mcp.yaml +430 -0
  99. dt_arena/config/mcp.yaml +642 -0
  100. dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
  101. dt_arena/envs/arxiv/docker-compose.yml +36 -0
  102. dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
  103. dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
  104. dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
  105. dt_arena/envs/atlassian/docker-compose.yml +72 -0
  106. dt_arena/envs/bigquery/docker-compose.yml +20 -0
  107. dt_arena/envs/booking/docker-compose.yml +59 -0
  108. dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
  109. dt_arena/envs/calendar/docker-compose.yml +42 -0
  110. dt_arena/envs/custom-website/docker-compose.yml +6 -0
  111. dt_arena/envs/customer_service/docker-compose.yml +59 -0
  112. dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
  113. dt_arena/envs/databricks/docker-compose.yml +51 -0
  114. dt_arena/envs/ecommerce/docker-compose.yml +6 -0
  115. dt_arena/envs/ers/docker-compose.yml +36 -0
  116. dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
  117. dt_arena/envs/finance/docker-compose.yml +23 -0
  118. dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
  119. dt_arena/envs/github/docker/docker-compose.yml +50 -0
  120. dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
  121. dt_arena/envs/gmail/docker-compose.yml +65 -0
  122. dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
  123. dt_arena/envs/google-form/docker-compose.yml +41 -0
  124. dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
  125. dt_arena/envs/googledocs/docker-compose.yml +78 -0
  126. dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
  127. dt_arena/envs/hospital/docker-compose.yml +27 -0
  128. dt_arena/envs/legal/docker-compose.yml +22 -0
  129. dt_arena/envs/linkedin/docker-compose.yml +63 -0
  130. dt_arena/envs/macos/docker-compose.yml +79 -0
  131. dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
  132. dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
  133. dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
  134. dt_arena/envs/paypal/docker-compose.yml +63 -0
  135. dt_arena/envs/research/docker-compose-hub.yml +13 -0
  136. dt_arena/envs/research/docker-compose.yml +24 -0
  137. dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
  138. dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
  139. dt_arena/envs/slack/docker-compose-hub.yml +28 -0
  140. dt_arena/envs/slack/docker-compose.yml +41 -0
  141. dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
  142. dt_arena/envs/snowflake/docker-compose.yml +44 -0
  143. dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
  144. dt_arena/envs/telecom/docker-compose.yml +17 -0
  145. dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
  146. dt_arena/envs/telegram/docker-compose.yml +62 -0
  147. dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
  148. dt_arena/envs/terminal/docker-compose.yml +26 -0
  149. dt_arena/envs/travel/docker-compose-hub.yml +19 -0
  150. dt_arena/envs/travel/docker-compose.yml +19 -0
  151. dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
  152. dt_arena/envs/whatsapp/docker-compose.yml +78 -0
  153. dt_arena/envs/windows/docker-compose.yml +71 -0
  154. dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
  155. dt_arena/envs/zoom/docker-compose.yml +40 -0
  156. dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
  157. dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
  158. dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
  159. dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
  160. dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
  161. dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
  162. dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
  163. dt_arena/injection_mcp_server/github/env_injection.py +206 -0
  164. dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
  165. dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
  166. dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
  167. dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
  168. dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
  169. dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
  170. dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
  171. dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
  172. dt_arena/injection_mcp_server/research/env_injection.py +616 -0
  173. dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
  174. dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
  175. dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
  176. dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
  177. dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
  178. dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
  179. dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
  180. dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
  181. dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
  182. dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
  183. dt_arena/mcp_server/atlassian/main.py +1554 -0
  184. dt_arena/mcp_server/atlassian/test_server.py +66 -0
  185. dt_arena/mcp_server/bigquery/main.py +333 -0
  186. dt_arena/mcp_server/booking/main.py +310 -0
  187. dt_arena/mcp_server/browser/main.py +1741 -0
  188. dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
  189. dt_arena/mcp_server/calendar/main.py +792 -0
  190. dt_arena/mcp_server/calendar/test_mcp.py +135 -0
  191. dt_arena/mcp_server/customer_service/main.py +1063 -0
  192. dt_arena/mcp_server/databricks/main.py +566 -0
  193. dt_arena/mcp_server/databricks/probe.py +102 -0
  194. dt_arena/mcp_server/ers/main.py +845 -0
  195. dt_arena/mcp_server/finance/__init__.py +87 -0
  196. dt_arena/mcp_server/finance/core/__init__.py +12 -0
  197. dt_arena/mcp_server/finance/core/data_loader.py +558 -0
  198. dt_arena/mcp_server/finance/core/portfolio.py +565 -0
  199. dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
  200. dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
  201. dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
  202. dt_arena/mcp_server/finance/injection/__init__.py +66 -0
  203. dt_arena/mcp_server/finance/injection/config.py +176 -0
  204. dt_arena/mcp_server/finance/injection/content.py +755 -0
  205. dt_arena/mcp_server/finance/injection/html.py +409 -0
  206. dt_arena/mcp_server/finance/injection/locations.py +167 -0
  207. dt_arena/mcp_server/finance/injection/methods.py +193 -0
  208. dt_arena/mcp_server/finance/injection/presets.py +1023 -0
  209. dt_arena/mcp_server/finance/main.py +361 -0
  210. dt_arena/mcp_server/finance/run_mcp.py +21 -0
  211. dt_arena/mcp_server/finance/run_web.py +26 -0
  212. dt_arena/mcp_server/finance/server/__init__.py +41 -0
  213. dt_arena/mcp_server/finance/server/extractor.py +1453 -0
  214. dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
  215. dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
  216. dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
  217. dt_arena/mcp_server/finance/server/mcp.py +451 -0
  218. dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
  219. dt_arena/mcp_server/finance/server/tools/account.py +88 -0
  220. dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
  221. dt_arena/mcp_server/finance/server/tools/social.py +73 -0
  222. dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
  223. dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
  224. dt_arena/mcp_server/finance/server/web.py +2139 -0
  225. dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
  226. dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
  227. dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
  228. dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
  229. dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
  230. dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
  231. dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
  232. dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
  233. dt_arena/mcp_server/github/main.py +441 -0
  234. dt_arena/mcp_server/gmail/main.py +1004 -0
  235. dt_arena/mcp_server/google_form/main.py +141 -0
  236. dt_arena/mcp_server/googledocs/main.py +458 -0
  237. dt_arena/mcp_server/hospital/mcp_server.py +458 -0
  238. dt_arena/mcp_server/legal/__init__.py +9 -0
  239. dt_arena/mcp_server/legal/core/__init__.py +14 -0
  240. dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
  241. dt_arena/mcp_server/legal/core/data_loader.py +266 -0
  242. dt_arena/mcp_server/legal/core/document_store.py +197 -0
  243. dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
  244. dt_arena/mcp_server/legal/main.py +89 -0
  245. dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
  246. dt_arena/mcp_server/legal/server/__init__.py +14 -0
  247. dt_arena/mcp_server/legal/server/mcp.py +2330 -0
  248. dt_arena/mcp_server/macos/client_test.py +270 -0
  249. dt_arena/mcp_server/macos/mcp_server.py +285 -0
  250. dt_arena/mcp_server/os-filesystem/main.py +1380 -0
  251. dt_arena/mcp_server/paypal/main.py +501 -0
  252. dt_arena/mcp_server/research/main.py +777 -0
  253. dt_arena/mcp_server/salesforce/main.py +2006 -0
  254. dt_arena/mcp_server/slack/main.py +318 -0
  255. dt_arena/mcp_server/snowflake/main.py +612 -0
  256. dt_arena/mcp_server/snowflake/probe.py +183 -0
  257. dt_arena/mcp_server/telecom/mcp_client.py +423 -0
  258. dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
  259. dt_arena/mcp_server/telegram/main.py +338 -0
  260. dt_arena/mcp_server/terminal/main.py +163 -0
  261. dt_arena/mcp_server/travel/client_test.py +16 -0
  262. dt_arena/mcp_server/travel/mcp_server.py +404 -0
  263. dt_arena/mcp_server/whatsapp/main.py +318 -0
  264. dt_arena/mcp_server/windows/client_test.py +270 -0
  265. dt_arena/mcp_server/windows/mcp_server.py +218 -0
  266. dt_arena/mcp_server/zoom/main.py +466 -0
  267. dt_arena/src/__init__.py +0 -0
  268. dt_arena/src/hooks/__init__.py +0 -0
  269. dt_arena/src/hooks/audit_log.py +30 -0
  270. dt_arena/src/hooks/hooks.json +3 -0
  271. dt_arena/src/run_benign.py +142 -0
  272. dt_arena/src/types/__init__.py +0 -0
  273. dt_arena/src/types/agent.py +441 -0
  274. dt_arena/src/types/attacks.py +2 -0
  275. dt_arena/src/types/environment.py +2 -0
  276. dt_arena/src/types/hooks.py +174 -0
  277. dt_arena/src/types/judge.py +52 -0
  278. dt_arena/src/types/red_teaming_trajectory.py +385 -0
  279. dt_arena/src/types/task.py +260 -0
  280. dt_arena/src/types/trajectory.py +315 -0
  281. dt_arena/utils/__init__.py +1 -0
  282. dt_arena/utils/atlassian/__init__.py +27 -0
  283. dt_arena/utils/atlassian/helpers.py +520 -0
  284. dt_arena/utils/bigquery/__init__.py +1 -0
  285. dt_arena/utils/bigquery/helpers.py +246 -0
  286. dt_arena/utils/calendar/__init__.py +1 -0
  287. dt_arena/utils/calendar/helpers.py +87 -0
  288. dt_arena/utils/customer_service/__init__.py +17 -0
  289. dt_arena/utils/customer_service/cs_env_client.py +940 -0
  290. dt_arena/utils/customer_service/helpers.py +339 -0
  291. dt_arena/utils/customer_service/judges/__init__.py +20 -0
  292. dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
  293. dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
  294. dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
  295. dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
  296. dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
  297. dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
  298. dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
  299. dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
  300. dt_arena/utils/customer_service/judges/text_utils.py +21 -0
  301. dt_arena/utils/databricks/__init__.py +2 -0
  302. dt_arena/utils/databricks/helpers.py +210 -0
  303. dt_arena/utils/finance/__init__.py +0 -0
  304. dt_arena/utils/finance/helpers.py +263 -0
  305. dt_arena/utils/github/__init__.py +1 -0
  306. dt_arena/utils/github/helpers.py +249 -0
  307. dt_arena/utils/gmail/__init__.py +1 -0
  308. dt_arena/utils/gmail/helpers.py +344 -0
  309. dt_arena/utils/google_form/__init__.py +2 -0
  310. dt_arena/utils/google_form/helpers.py +133 -0
  311. dt_arena/utils/legal/__init__.py +0 -0
  312. dt_arena/utils/legal/helpers.py +228 -0
  313. dt_arena/utils/macos/__init__.py +0 -0
  314. dt_arena/utils/macos/env_setup.py +215 -0
  315. dt_arena/utils/macos/helpers.py +61 -0
  316. dt_arena/utils/os_filesystem/__init__.py +1 -0
  317. dt_arena/utils/os_filesystem/helpers.py +366 -0
  318. dt_arena/utils/paypal/__init__.py +1 -0
  319. dt_arena/utils/paypal/helpers.py +178 -0
  320. dt_arena/utils/port_allocator.py +266 -0
  321. dt_arena/utils/research/__init__.py +0 -0
  322. dt_arena/utils/research/helpers.py +251 -0
  323. dt_arena/utils/salesforce/__init__.py +1 -0
  324. dt_arena/utils/salesforce/helpers.py +719 -0
  325. dt_arena/utils/slack/__init__.py +1 -0
  326. dt_arena/utils/slack/helpers.py +176 -0
  327. dt_arena/utils/snowflake/__init__.py +1 -0
  328. dt_arena/utils/snowflake/helpers.py +166 -0
  329. dt_arena/utils/telecom/__init__.py +1 -0
  330. dt_arena/utils/telecom/helpers.py +760 -0
  331. dt_arena/utils/telegram/__init__.py +0 -0
  332. dt_arena/utils/telegram/helpers.py +174 -0
  333. dt_arena/utils/terminal/__init__.py +0 -0
  334. dt_arena/utils/terminal/helpers.py +20 -0
  335. dt_arena/utils/travel/__init__.py +0 -0
  336. dt_arena/utils/travel/env_client.py +537 -0
  337. dt_arena/utils/travel/llm_judge.py +137 -0
  338. dt_arena/utils/travel/prompts.py +64 -0
  339. dt_arena/utils/utils/__init__.py +122 -0
  340. dt_arena/utils/whatsapp/__init__.py +0 -0
  341. dt_arena/utils/whatsapp/helpers.py +226 -0
  342. dt_arena/utils/windows/__init__.py +0 -0
  343. dt_arena/utils/windows/env_reset.py +224 -0
  344. dt_arena/utils/windows/env_setup.py +280 -0
  345. dt_arena/utils/windows/exfil_helpers.py +170 -0
  346. dt_arena/utils/windows/helpers.py +74 -0
  347. dt_arena/utils/zoom/__init__.py +1 -0
  348. dt_arena/utils/zoom/helpers.py +70 -0
  349. eval/__init__.py +1 -0
  350. eval/evaluation.py +426 -0
  351. eval/task_runner.py +449 -0
  352. utils/__init__.py +148 -0
  353. utils/agent_helpers.py +308 -0
  354. utils/agent_wrapper.py +189 -0
  355. utils/compose_utils.py +135 -0
  356. utils/config.py +77 -0
  357. utils/env_helpers.py +104 -0
  358. utils/eval_stats.py +88 -0
  359. utils/injection_helpers.py +429 -0
  360. utils/injection_mcp_helpers.py +152 -0
  361. utils/judge_helpers.py +181 -0
  362. utils/judge_utils.py +472 -0
  363. utils/llm.py +196 -0
  364. utils/logging.py +45 -0
  365. utils/mcp_helpers.py +232 -0
  366. utils/mcp_manager.py +235 -0
  367. utils/memory_guard.py +18 -0
  368. utils/red_teaming_sandbox.py +476 -0
  369. utils/reset_helpers.py +318 -0
  370. utils/resource_manager.py +370 -0
  371. utils/skill_helpers.py +447 -0
  372. utils/task_executor.py +904 -0
  373. utils/task_helpers.py +270 -0
  374. utils/template_helpers.py +179 -0
@@ -0,0 +1,266 @@
1
+ """
2
+ Legal Data Loader - Load statutes and document templates.
3
+
4
+ Case law, judges, dockets, and disclosures are now served by CourtListenerStore.
5
+ This loader only handles statutes (not in CourtListener) and document templates.
6
+ """
7
+
8
+ import json
9
+ from pathlib import Path
10
+ from typing import Dict, List, Optional, Any
11
+ from dataclasses import dataclass, field
12
+
13
+
14
+ @dataclass
15
+ class Statute:
16
+ """Legal statute/regulation data structure."""
17
+ id: str
18
+ citation: str
19
+ title: str
20
+ jurisdiction: str
21
+ code: str
22
+ section: str
23
+ text: str
24
+ effective_date: str = ""
25
+ last_amended: str = ""
26
+ related_sections: List[str] = field(default_factory=list)
27
+ annotations: List[str] = field(default_factory=list)
28
+ topics: List[str] = field(default_factory=list)
29
+ is_valid: bool = True
30
+
31
+ def to_dict(self) -> Dict[str, Any]:
32
+ return {
33
+ "id": self.id,
34
+ "citation": self.citation,
35
+ "title": self.title,
36
+ "jurisdiction": self.jurisdiction,
37
+ "code": self.code,
38
+ "section": self.section,
39
+ "text": self.text,
40
+ "effective_date": self.effective_date,
41
+ "related_sections": self.related_sections,
42
+ }
43
+
44
+
45
+ @dataclass
46
+ class DocumentTemplate:
47
+ """Legal document template."""
48
+ template_id: str
49
+ name: str
50
+ doc_type: str # motion, contract, letter, brief, etc.
51
+ jurisdiction: str
52
+ description: str
53
+ sections: List[Dict[str, Any]] = field(default_factory=list)
54
+ variables: List[str] = field(default_factory=list)
55
+ content: str = ""
56
+
57
+ def to_dict(self) -> Dict[str, Any]:
58
+ return {
59
+ "template_id": self.template_id,
60
+ "name": self.name,
61
+ "type": self.doc_type,
62
+ "jurisdiction": self.jurisdiction,
63
+ "description": self.description,
64
+ "sections": self.sections,
65
+ "variables": self.variables,
66
+ }
67
+
68
+
69
+ class LegalDataLoader:
70
+ """Load and manage legal data (statutes, templates).
71
+
72
+ Case law, judges, dockets, and disclosures are now handled by
73
+ CourtListenerStore. This class only loads statutes and templates.
74
+ """
75
+
76
+ def __init__(self, data_dir: str = None):
77
+ if data_dir is None:
78
+ data_dir = Path(__file__).parent.parent / "data"
79
+ self.data_dir = Path(data_dir)
80
+
81
+ self._statutes: Dict[str, Statute] = {}
82
+ self._templates: Dict[str, DocumentTemplate] = {}
83
+
84
+ self._load_data()
85
+
86
+ def _load_data(self):
87
+ """Load all data from JSON files."""
88
+ self._load_statutes()
89
+ self._load_templates()
90
+
91
+ def _load_statutes(self):
92
+ """Load statute data from JSON files."""
93
+ statutes_dir = self.data_dir / "statutes"
94
+ if not statutes_dir.exists():
95
+ return
96
+
97
+ for json_file in statutes_dir.glob("*.json"):
98
+ try:
99
+ with open(json_file, 'r') as f:
100
+ data = json.load(f)
101
+ for i, statute_data in enumerate(data.get("statutes", [])):
102
+ # Auto-fill missing fields
103
+ citation = statute_data.get("citation", "")
104
+ if "id" not in statute_data:
105
+ statute_data["id"] = f"statute_{json_file.stem}_{i:03d}"
106
+ if "code" not in statute_data:
107
+ # Extract code from citation (e.g., "42 U.S.C." from "42 U.S.C. § 1983")
108
+ parts = citation.split("§")
109
+ statute_data["code"] = parts[0].strip() if parts else ""
110
+ if "section" not in statute_data:
111
+ parts = citation.split("§")
112
+ statute_data["section"] = parts[1].strip() if len(parts) > 1 else ""
113
+ statute = Statute(**{k: v for k, v in statute_data.items() if k in Statute.__dataclass_fields__})
114
+ self._statutes[statute.citation.lower()] = statute
115
+ self._statutes[statute.id] = statute
116
+ except Exception as e:
117
+ print(f"Error loading {json_file}: {e}")
118
+
119
+ def _load_templates(self):
120
+ """Load document templates from JSON files."""
121
+ templates_file = self.data_dir / "templates" / "document_templates.json"
122
+ if not templates_file.exists():
123
+ return
124
+
125
+ try:
126
+ with open(templates_file, 'r') as f:
127
+ data = json.load(f)
128
+ for template_data in data.get("templates", []):
129
+ template = DocumentTemplate(**{k: v for k, v in template_data.items() if k in DocumentTemplate.__dataclass_fields__})
130
+ self._templates[template.template_id] = template
131
+ except Exception as e:
132
+ print(f"Error loading templates: {e}")
133
+
134
+ @staticmethod
135
+ def _normalize_citation(text: str) -> str:
136
+ """Normalize citation text for flexible matching.
137
+ Handles: C.F.R. vs CFR, U.S.C. vs USC, etc."""
138
+ return text.lower().replace(".", "").replace(" ", "").replace("§", "").replace("–", "-")
139
+
140
+ def search_statutes(
141
+ self,
142
+ query: str,
143
+ jurisdiction: str = "all",
144
+ limit: int = 20
145
+ ) -> List[Statute]:
146
+ """Search for statutes matching the query.
147
+
148
+ Matching strategy (in priority order):
149
+ 1. Exact substring: full query found in citation/title/text/topics
150
+ 2. Normalized citation: stripped dots/spaces/§ match
151
+ 3. Token overlap: score statutes by how many query words appear in their fields
152
+ """
153
+ # Clean query: strip surrounding quotes agents often add
154
+ query_clean = query.strip().strip('"').strip("'")
155
+ query_lower = query_clean.lower()
156
+ query_norm = self._normalize_citation(query_clean)
157
+
158
+ scored: Dict[str, float] = {}
159
+ seen_statutes: Dict[str, 'Statute'] = {}
160
+
161
+ for statute in self._statutes.values():
162
+ if jurisdiction != "all" and statute.jurisdiction.lower() != jurisdiction.lower():
163
+ continue
164
+
165
+ sid = statute.id
166
+ if sid in seen_statutes:
167
+ continue
168
+ seen_statutes[sid] = statute
169
+
170
+ title_lower = statute.title.lower()
171
+ text_lower = statute.text.lower()
172
+ citation_lower = statute.citation.lower()
173
+ citation_norm = self._normalize_citation(statute.citation)
174
+ topics_lower = " ".join(
175
+ t.lower() for t in (statute.topics if hasattr(statute, 'topics') and statute.topics else [])
176
+ )
177
+ all_text = f"{citation_lower} {title_lower} {text_lower} {topics_lower}"
178
+
179
+ score = 0.0
180
+
181
+ # Priority 1: exact substring match (full query in a field)
182
+ if (query_lower in title_lower or query_lower in text_lower or
183
+ query_lower in citation_lower or query_lower in topics_lower):
184
+ score = 100.0
185
+ # Priority 2: normalized citation match (handles § vs no-§, dots vs no-dots)
186
+ elif query_norm in citation_norm or citation_norm in query_norm:
187
+ score = 90.0
188
+ else:
189
+ # Priority 3: extract citation-like patterns from query and match
190
+ # e.g., query "asylum 8 U.S.C. 1158 particular social group" -> try "8 U.S.C. 1158"
191
+ import re
192
+ cite_patterns = re.findall(
193
+ r'\d+\s*(?:U\.?S\.?C\.?|C\.?F\.?R\.?)\s*(?:§\s*|Part\s*)?[\d()\w.-]+',
194
+ query_clean, re.IGNORECASE
195
+ )
196
+ for cp in cite_patterns:
197
+ cp_norm = self._normalize_citation(cp)
198
+ if cp_norm in citation_norm or citation_norm in cp_norm:
199
+ score = max(score, 85.0)
200
+ break
201
+
202
+ # Priority 4: token overlap scoring
203
+ if score == 0:
204
+ # Split query into meaningful tokens (skip short noise/stop words)
205
+ stop_words = {'the','and','or','of','in','for','to','a','an','is','on','at','by','with','from','that','this','its'}
206
+ tokens = [t for t in query_lower.split() if len(t) > 2 and t not in stop_words]
207
+ if tokens:
208
+ matched = sum(1 for t in tokens if t in all_text)
209
+ ratio = matched / len(tokens)
210
+ # Only count if at least 40% of tokens match (avoid noise)
211
+ if ratio >= 0.4:
212
+ score = ratio * 50.0
213
+
214
+ if score > 0:
215
+ scored[sid] = score
216
+
217
+ # Sort by score descending, return top results
218
+ ranked = sorted(scored.items(), key=lambda x: -x[1])
219
+ results = []
220
+ for sid, sc in ranked[:limit]:
221
+ statute = seen_statutes[sid]
222
+ if statute not in results:
223
+ results.append(statute)
224
+ return results
225
+
226
+ def get_statute(self, statute_id: str) -> Optional[Statute]:
227
+ """Get a statute by ID or citation."""
228
+ return self._statutes.get(statute_id) or self._statutes.get(statute_id.lower())
229
+
230
+ def get_templates(self, doc_type: str = None) -> List[DocumentTemplate]:
231
+ """Get document templates, optionally filtered by type."""
232
+ if doc_type:
233
+ return [t for t in self._templates.values() if t.doc_type == doc_type]
234
+ return list(self._templates.values())
235
+
236
+ def get_template(self, template_id: str) -> Optional[DocumentTemplate]:
237
+ """Get a specific template by ID."""
238
+ return self._templates.get(template_id)
239
+
240
+ def add_statute(self, statute_data: Dict[str, Any]):
241
+ """Add a statute from dictionary data."""
242
+ statute_id = statute_data.get("id", "")
243
+ citation = statute_data.get("citation", "")
244
+
245
+ self._statutes[citation.lower()] = statute_data
246
+ self._statutes[statute_id] = statute_data
247
+
248
+ def add_template(self, template_data: Dict[str, Any]):
249
+ """Add a template from dictionary data."""
250
+ template_id = template_data.get("id", "")
251
+ self._templates[template_id] = template_data
252
+
253
+ @property
254
+ def templates(self) -> List[Dict[str, Any]]:
255
+ """Get all templates as a list."""
256
+ seen = set()
257
+ result = []
258
+ for key, template in self._templates.items():
259
+ template_id = template.get("id", key) if isinstance(template, dict) else getattr(template, 'template_id', key)
260
+ if template_id not in seen:
261
+ seen.add(template_id)
262
+ if isinstance(template, dict):
263
+ result.append(template)
264
+ else:
265
+ result.append(template.to_dict())
266
+ return result
@@ -0,0 +1,197 @@
1
+ """
2
+ Document Store - Global in-memory storage for documents, drafts, and injected content.
3
+ Docker containers provide per-task isolation, so no session management is needed.
4
+ """
5
+
6
+ import uuid
7
+ from typing import Dict, List, Optional, Any
8
+ from datetime import datetime
9
+ from dataclasses import dataclass, field
10
+
11
+
12
+ @dataclass
13
+ class ConversationMessage:
14
+ """A message in the conversation history."""
15
+ id: str
16
+ role: str # user, assistant
17
+ content: str
18
+ timestamp: str
19
+ matter_id: Optional[str] = None
20
+ citations: List[str] = field(default_factory=list)
21
+ tool_calls: List[Dict[str, Any]] = field(default_factory=list)
22
+
23
+ def to_dict(self) -> Dict[str, Any]:
24
+ return {
25
+ "id": self.id,
26
+ "role": self.role,
27
+ "content": self.content,
28
+ "timestamp": self.timestamp,
29
+ "matter_id": self.matter_id,
30
+ "citations": self.citations,
31
+ "tool_calls": self.tool_calls,
32
+ }
33
+
34
+
35
+ @dataclass
36
+ class UploadedDocument:
37
+ """An uploaded document."""
38
+ document_id: str
39
+ filename: str
40
+ doc_type: str
41
+ upload_time: str
42
+ matter_id: Optional[str] = None
43
+ content: str = ""
44
+ summary: str = ""
45
+ extracted_data: Dict[str, Any] = field(default_factory=dict)
46
+
47
+ def to_dict(self) -> Dict[str, Any]:
48
+ return {
49
+ "document_id": self.document_id,
50
+ "filename": self.filename,
51
+ "type": self.doc_type,
52
+ "upload_time": self.upload_time,
53
+ "matter_id": self.matter_id,
54
+ "has_summary": bool(self.summary),
55
+ "summary": self.summary or "",
56
+ "content_length": len(self.content) if self.content else 0,
57
+ }
58
+
59
+
60
+ @dataclass
61
+ class Draft:
62
+ """A document draft."""
63
+ draft_id: str
64
+ template_id: str
65
+ matter_id: str
66
+ doc_type: str
67
+ title: str
68
+ content: str
69
+ created_at: str
70
+ updated_at: str
71
+ status: str = "draft" # draft, reviewed, finalized
72
+
73
+ def to_dict(self) -> Dict[str, Any]:
74
+ return {
75
+ "draft_id": self.draft_id,
76
+ "template_id": self.template_id,
77
+ "matter_id": self.matter_id,
78
+ "type": self.doc_type,
79
+ "title": self.title,
80
+ "content": self.content,
81
+ "created_at": self.created_at,
82
+ "updated_at": self.updated_at,
83
+ "status": self.status,
84
+ }
85
+
86
+
87
+ class DocumentStore:
88
+ """Global in-memory store for documents, drafts, conversation, and injected content.
89
+
90
+ Docker containers provide per-task isolation — each task runs in its own
91
+ container, so all state is naturally isolated without session management.
92
+ """
93
+
94
+ def __init__(self):
95
+ self.documents: Dict[str, UploadedDocument] = {}
96
+ self.drafts: Dict[str, Draft] = {}
97
+ self.conversation: List[ConversationMessage] = []
98
+ self.research_history: List[Dict[str, Any]] = []
99
+ self.injected_cases: Dict[str, Any] = {}
100
+ self.injected_suggestions: List[Dict[str, Any]] = []
101
+ self.current_matter_id: Optional[str] = None
102
+
103
+ # Conversation management
104
+
105
+ def add_message(self, role: str, content: str,
106
+ matter_id: str = None, citations: List[str] = None,
107
+ tool_calls: List[Dict] = None) -> ConversationMessage:
108
+ msg = ConversationMessage(
109
+ id=str(uuid.uuid4())[:8],
110
+ role=role, content=content,
111
+ timestamp=datetime.now().isoformat(),
112
+ matter_id=matter_id,
113
+ citations=citations or [],
114
+ tool_calls=tool_calls or [],
115
+ )
116
+ self.conversation.append(msg)
117
+ return msg
118
+
119
+ def get_conversation(self, limit: int = 50) -> List[Dict[str, Any]]:
120
+ messages = self.conversation[-limit:] if limit else self.conversation
121
+ return [m.to_dict() for m in messages]
122
+
123
+ # Document management
124
+
125
+ def add_document(self, filename: str, doc_type: str, content: str,
126
+ matter_id: str = None) -> UploadedDocument:
127
+ doc = UploadedDocument(
128
+ document_id=str(uuid.uuid4())[:8],
129
+ filename=filename, doc_type=doc_type,
130
+ upload_time=datetime.now().isoformat(),
131
+ matter_id=matter_id, content=content,
132
+ )
133
+ self.documents[doc.document_id] = doc
134
+ return doc
135
+
136
+ def get_document(self, document_id: str) -> Optional[UploadedDocument]:
137
+ return self.documents.get(document_id)
138
+
139
+ def list_documents(self, matter_id: str = None) -> List[Dict[str, Any]]:
140
+ docs = list(self.documents.values())
141
+ if matter_id:
142
+ docs = [d for d in docs if d.matter_id == matter_id]
143
+ return [d.to_dict() for d in docs]
144
+
145
+ # Draft management
146
+
147
+ def add_draft(self, template_id: str, matter_id: str, doc_type: str,
148
+ title: str, content: str) -> Draft:
149
+ now = datetime.now().isoformat()
150
+ draft = Draft(
151
+ draft_id=str(uuid.uuid4())[:8],
152
+ template_id=template_id, matter_id=matter_id,
153
+ doc_type=doc_type, title=title, content=content,
154
+ created_at=now, updated_at=now,
155
+ )
156
+ self.drafts[draft.draft_id] = draft
157
+ return draft
158
+
159
+ def get_draft(self, draft_id: str) -> Optional[Draft]:
160
+ return self.drafts.get(draft_id)
161
+
162
+ def update_draft(self, draft_id: str, content: str = None,
163
+ status: str = None) -> Optional[Draft]:
164
+ draft = self.drafts.get(draft_id)
165
+ if draft:
166
+ if content is not None:
167
+ draft.content = content
168
+ if status is not None:
169
+ draft.status = status
170
+ draft.updated_at = datetime.now().isoformat()
171
+ return draft
172
+
173
+ def list_drafts(self, matter_id: str = None) -> List[Dict[str, Any]]:
174
+ drafts = list(self.drafts.values())
175
+ if matter_id:
176
+ drafts = [d for d in drafts if d.matter_id == matter_id]
177
+ return [d.to_dict() for d in drafts]
178
+
179
+ # Injection management
180
+
181
+ def inject_case(self, case_id: str, case_data: Dict[str, Any]):
182
+ self.injected_cases[case_id] = case_data
183
+
184
+ def get_injected_cases(self) -> Dict[str, Any]:
185
+ return self.injected_cases
186
+
187
+ # Reset
188
+
189
+ def clear(self):
190
+ """Clear all state."""
191
+ self.documents.clear()
192
+ self.drafts.clear()
193
+ self.conversation.clear()
194
+ self.research_history.clear()
195
+ self.injected_cases.clear()
196
+ self.injected_suggestions.clear()
197
+ self.current_matter_id = None