decodingtrust-agent-sdk 0.2.9__tar.gz → 0.2.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (401) hide show
  1. {decodingtrust_agent_sdk-0.2.9/decodingtrust_agent_sdk.egg-info → decodingtrust_agent_sdk-0.2.10}/PKG-INFO +69 -15
  2. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/README.md +61 -14
  3. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/example.py +1 -1
  4. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/src/hermes_runner.py +1 -1
  5. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/langchain/example.py +2 -2
  6. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/example.py +2 -2
  7. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/example.py +2 -2
  8. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/strands/example.py +1 -1
  9. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/cli/scaffold.py +2 -2
  10. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10/decodingtrust_agent_sdk.egg-info}/PKG-INFO +69 -15
  11. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/decodingtrust_agent_sdk.egg-info/requires.txt +8 -0
  12. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/config/mcp.yaml +7 -1
  13. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/browser/main.py +16 -3
  14. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/agent.py +2 -2
  15. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/eval/evaluation.py +31 -11
  16. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/eval/task_runner.py +2 -2
  17. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/pyproject.toml +19 -1
  18. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/__init__.py +6 -0
  19. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/agent_helpers.py +47 -63
  20. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/config.py +57 -7
  21. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/judge_helpers.py +28 -11
  22. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/LICENSE +0 -0
  23. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/MANIFEST.in +0 -0
  24. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/__init__.py +0 -0
  25. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/__init__.py +0 -0
  26. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/example.py +0 -0
  27. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/src/__init__.py +0 -0
  28. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/src/agent.py +0 -0
  29. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/src/mcp_proxy.py +0 -0
  30. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/src/utils.py +0 -0
  31. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/__init__.py +0 -0
  32. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/example.py +0 -0
  33. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/src/__init__.py +0 -0
  34. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/src/agent.py +0 -0
  35. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/src/mcp_wrapper.py +0 -0
  36. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/src/utils.py +0 -0
  37. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/__init__.py +0 -0
  38. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/src/__init__.py +0 -0
  39. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/src/agent.py +0 -0
  40. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/src/mcp_proxy.py +0 -0
  41. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/src/utils.py +0 -0
  42. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/langchain/__init__.py +0 -0
  43. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/langchain/src/__init__.py +0 -0
  44. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/langchain/src/agent.py +0 -0
  45. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/langchain/src/utils.py +0 -0
  46. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/__init__.py +0 -0
  47. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/src/__init__.py +0 -0
  48. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/src/agent.py +0 -0
  49. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/src/agent_wrapper.py +0 -0
  50. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/src/mcp_wrapper.py +0 -0
  51. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/src/utils.py +0 -0
  52. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/__init__.py +0 -0
  53. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/example.py +0 -0
  54. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/__init__.py +0 -0
  55. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/agent.py +0 -0
  56. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/helpers/__init__.py +0 -0
  57. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/helpers/auth_helpers.py +0 -0
  58. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/mcp_proxy.py +0 -0
  59. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/plugin_generator.py +0 -0
  60. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/utils.py +0 -0
  61. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/__init__.py +0 -0
  62. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/prompts/react_agent.py +0 -0
  63. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/__init__.py +0 -0
  64. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/agent.py +0 -0
  65. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/agent_wrapper.py +0 -0
  66. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/async_helper.py +0 -0
  67. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/mcp_react_agent.py +0 -0
  68. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/native_agent.py +0 -0
  69. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/nodes.py +0 -0
  70. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/strands/__init__.py +0 -0
  71. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/strands/src/__init__.py +0 -0
  72. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/strands/src/agent.py +0 -0
  73. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/agent/strands/src/utils.py +0 -0
  74. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/__init__.py +0 -0
  75. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/browser/benign.jsonl +0 -0
  76. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/browser/direct.jsonl +0 -0
  77. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/browser/indirect.jsonl +0 -0
  78. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/code/benign.jsonl +0 -0
  79. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/code/direct.jsonl +0 -0
  80. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/code/indirect.jsonl +0 -0
  81. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/crm/benign.jsonl +0 -0
  82. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/crm/direct.jsonl +0 -0
  83. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/crm/indirect.jsonl +0 -0
  84. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/customer-service/benign.jsonl +0 -0
  85. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/customer-service/direct.jsonl +0 -0
  86. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/customer-service/indirect.jsonl +0 -0
  87. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/finance/benign.jsonl +0 -0
  88. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/finance/direct.jsonl +0 -0
  89. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/finance/indirect.jsonl +0 -0
  90. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/legal/benign.jsonl +0 -0
  91. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/legal/direct.jsonl +0 -0
  92. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/legal/indirect.jsonl +0 -0
  93. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/macos/benign.jsonl +0 -0
  94. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/macos/direct.jsonl +0 -0
  95. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/macos/indirect.jsonl +0 -0
  96. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/medical/benign.jsonl +0 -0
  97. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/medical/direct.jsonl +0 -0
  98. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/medical/indirect.jsonl +0 -0
  99. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/os-filesystem/benign.jsonl +0 -0
  100. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/os-filesystem/direct.jsonl +0 -0
  101. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/os-filesystem/indirect.jsonl +0 -0
  102. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/research/benign.jsonl +0 -0
  103. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/research/direct.jsonl +0 -0
  104. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/research/indirect.jsonl +0 -0
  105. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/telecom/benign.jsonl +0 -0
  106. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/telecom/direct.jsonl +0 -0
  107. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/telecom/indirect.jsonl +0 -0
  108. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/travel/benign.jsonl +0 -0
  109. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/travel/direct.jsonl +0 -0
  110. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/travel/indirect.jsonl +0 -0
  111. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/windows/benign.jsonl +0 -0
  112. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/windows/direct.jsonl +0 -0
  113. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/windows/indirect.jsonl +0 -0
  114. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/workflow/benign.jsonl +0 -0
  115. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/workflow/direct.jsonl +0 -0
  116. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/benchmark/workflow/indirect.jsonl +0 -0
  117. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/cli/__init__.py +0 -0
  118. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/cli/main.py +0 -0
  119. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/decodingtrust_agent_sdk.egg-info/SOURCES.txt +0 -0
  120. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/decodingtrust_agent_sdk.egg-info/dependency_links.txt +0 -0
  121. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/decodingtrust_agent_sdk.egg-info/entry_points.txt +0 -0
  122. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/decodingtrust_agent_sdk.egg-info/top_level.txt +0 -0
  123. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/a2a_agents/generic/__init__.py +0 -0
  124. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/a2a_agents/generic/agent.py +0 -0
  125. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/config/env.yaml +0 -0
  126. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/config/injection_mcp.yaml +0 -0
  127. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/arxiv/docker-compose-hub.yml +0 -0
  128. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/arxiv/docker-compose.yml +0 -0
  129. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/atlassian/docker-compose-hub.yml +0 -0
  130. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/atlassian/docker-compose.yml +0 -0
  131. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/bigquery/docker-compose.yml +0 -0
  132. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/booking/docker-compose.yml +0 -0
  133. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/calendar/docker-compose-hub.yml +0 -0
  134. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/calendar/docker-compose.yml +0 -0
  135. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/custom-website/docker-compose.yml +0 -0
  136. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/customer_service/docker-compose.yml +0 -0
  137. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/databricks/docker-compose-hub.yml +0 -0
  138. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/databricks/docker-compose.yml +0 -0
  139. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/ecommerce/docker-compose.yml +0 -0
  140. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/ers/docker-compose.yml +0 -0
  141. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/finance/docker-compose.yml +0 -0
  142. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/gmail/docker-compose-hub.yml +0 -0
  143. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/gmail/docker-compose.yml +0 -0
  144. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/google-form/docker-compose-hub.yml +0 -0
  145. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/google-form/docker-compose.yml +0 -0
  146. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/googledocs/docker-compose-hub.yml +0 -0
  147. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/googledocs/docker-compose.yml +0 -0
  148. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/hospital/docker-compose-hub.yml +0 -0
  149. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/hospital/docker-compose.yml +0 -0
  150. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/legal/docker-compose.yml +0 -0
  151. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/linkedin/docker-compose.yml +0 -0
  152. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/macos/docker-compose.yml +0 -0
  153. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/os-filesystem/docker-compose-hub.yml +0 -0
  154. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/os-filesystem/docker-compose.yml +0 -0
  155. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/paypal/docker-compose-hub.yml +0 -0
  156. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/paypal/docker-compose.yml +0 -0
  157. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/research/docker-compose-hub.yml +0 -0
  158. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/research/docker-compose.yml +0 -0
  159. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +0 -0
  160. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/salesforce_crm/docker-compose.yaml +0 -0
  161. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/slack/docker-compose-hub.yml +0 -0
  162. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/slack/docker-compose.yml +0 -0
  163. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/snowflake/docker-compose-hub.yml +0 -0
  164. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/snowflake/docker-compose.yml +0 -0
  165. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/telecom/docker-compose-hub.yml +0 -0
  166. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/telecom/docker-compose.yml +0 -0
  167. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/telegram/docker-compose-hub.yml +0 -0
  168. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/telegram/docker-compose.yml +0 -0
  169. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/terminal/docker-compose-hub.yml +0 -0
  170. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/terminal/docker-compose.yml +0 -0
  171. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/travel/docker-compose-hub.yml +0 -0
  172. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/travel/docker-compose.yml +0 -0
  173. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/whatsapp/docker-compose-hub.yml +0 -0
  174. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/whatsapp/docker-compose.yml +0 -0
  175. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/dns_listener.py +0 -0
  176. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/docker-compose.yml +0 -0
  177. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/exfil_listener.py +0 -0
  178. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/ftp_listener.py +0 -0
  179. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/reset_server.py +0 -0
  180. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/validate.py +0 -0
  181. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/zoom/docker-compose-hub.yml +0 -0
  182. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/zoom/docker-compose.yml +0 -0
  183. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/atlassian/env_injection.py +0 -0
  184. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/calendar/env_injection.py +0 -0
  185. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/custom_website/env_injection.py +0 -0
  186. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/customer_service/env_injection.py +0 -0
  187. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/databricks/env_injection.py +0 -0
  188. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/ecommerce/env_injection.py +0 -0
  189. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/finance/env_injection.py +0 -0
  190. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/github/env_injection.py +0 -0
  191. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/gmail/env_injection.py +0 -0
  192. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/google_form/env_injection.py +0 -0
  193. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/googledocs/env_injection.py +0 -0
  194. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/hospital/env_injection.py +0 -0
  195. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/legal/env_injection.py +0 -0
  196. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/macos/env_injection.py +0 -0
  197. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/os-filesystem/env_injection.py +0 -0
  198. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/paypal/env_injection.py +0 -0
  199. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/research/env_injection.py +0 -0
  200. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/salesforce/env_injection.py +0 -0
  201. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/slack/env_injection.py +0 -0
  202. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/snowflake/env_injection.py +0 -0
  203. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/telecom/env_injection.py +0 -0
  204. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/telegram/env_injection.py +0 -0
  205. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/terminal/env_injection.py +0 -0
  206. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/travel/env_injection.py +0 -0
  207. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/whatsapp/env_injection.py +0 -0
  208. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/windows/env_injection.py +0 -0
  209. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/zoom/env_injection.py +0 -0
  210. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/atlassian/main.py +0 -0
  211. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/atlassian/test_server.py +0 -0
  212. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/bigquery/main.py +0 -0
  213. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/booking/main.py +0 -0
  214. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/calendar/example_multi_user.py +0 -0
  215. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/calendar/main.py +0 -0
  216. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/calendar/test_mcp.py +0 -0
  217. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/customer_service/main.py +0 -0
  218. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/databricks/main.py +0 -0
  219. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/databricks/probe.py +0 -0
  220. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/ers/main.py +0 -0
  221. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/__init__.py +0 -0
  222. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/core/__init__.py +0 -0
  223. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/core/data_loader.py +0 -0
  224. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/core/portfolio.py +0 -0
  225. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/evaluation/__init__.py +0 -0
  226. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/evaluation/evaluator.py +0 -0
  227. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/evaluation/logger.py +0 -0
  228. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/__init__.py +0 -0
  229. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/config.py +0 -0
  230. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/content.py +0 -0
  231. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/html.py +0 -0
  232. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/locations.py +0 -0
  233. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/methods.py +0 -0
  234. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/presets.py +0 -0
  235. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/main.py +0 -0
  236. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/run_mcp.py +0 -0
  237. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/run_web.py +0 -0
  238. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/__init__.py +0 -0
  239. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/extractor.py +0 -0
  240. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/extractor_minimal.py +0 -0
  241. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/extractor_simple.py +0 -0
  242. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/injection_mcp.py +0 -0
  243. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/mcp.py +0 -0
  244. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/__init__.py +0 -0
  245. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/account.py +0 -0
  246. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/browsing.py +0 -0
  247. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/social.py +0 -0
  248. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/trading.py +0 -0
  249. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/utility.py +0 -0
  250. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/web.py +0 -0
  251. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +0 -0
  252. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +0 -0
  253. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +0 -0
  254. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +0 -0
  255. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +0 -0
  256. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +0 -0
  257. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +0 -0
  258. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/test_mcp_tools.py +0 -0
  259. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/github/main.py +0 -0
  260. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/gmail/main.py +0 -0
  261. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/google_form/main.py +0 -0
  262. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/googledocs/main.py +0 -0
  263. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/hospital/mcp_server.py +0 -0
  264. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/__init__.py +0 -0
  265. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/core/__init__.py +0 -0
  266. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/core/courtlistener_store.py +0 -0
  267. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/core/data_loader.py +0 -0
  268. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/core/document_store.py +0 -0
  269. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/core/matter_manager.py +0 -0
  270. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/main.py +0 -0
  271. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/scripts/collect_data.py +0 -0
  272. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/server/__init__.py +0 -0
  273. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/server/mcp.py +0 -0
  274. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/macos/client_test.py +0 -0
  275. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/macos/mcp_server.py +0 -0
  276. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/os-filesystem/main.py +0 -0
  277. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/paypal/main.py +0 -0
  278. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/research/main.py +0 -0
  279. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/salesforce/main.py +0 -0
  280. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/slack/main.py +0 -0
  281. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/snowflake/main.py +0 -0
  282. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/snowflake/probe.py +0 -0
  283. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/telecom/mcp_client.py +0 -0
  284. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/telecom/mcp_server.py +0 -0
  285. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/telegram/main.py +0 -0
  286. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/terminal/main.py +0 -0
  287. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/travel/client_test.py +0 -0
  288. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/travel/mcp_server.py +0 -0
  289. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/whatsapp/main.py +0 -0
  290. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/windows/client_test.py +0 -0
  291. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/windows/mcp_server.py +0 -0
  292. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/zoom/main.py +0 -0
  293. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/__init__.py +0 -0
  294. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/hooks/__init__.py +0 -0
  295. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/hooks/audit_log.py +0 -0
  296. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/hooks/hooks.json +0 -0
  297. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/run_benign.py +0 -0
  298. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/__init__.py +0 -0
  299. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/attacks.py +0 -0
  300. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/environment.py +0 -0
  301. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/hooks.py +0 -0
  302. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/judge.py +0 -0
  303. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/red_teaming_trajectory.py +0 -0
  304. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/task.py +0 -0
  305. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/trajectory.py +0 -0
  306. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/__init__.py +0 -0
  307. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/atlassian/__init__.py +0 -0
  308. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/atlassian/helpers.py +0 -0
  309. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/bigquery/__init__.py +0 -0
  310. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/bigquery/helpers.py +0 -0
  311. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/calendar/__init__.py +0 -0
  312. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/calendar/helpers.py +0 -0
  313. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/chase/__init__.py +0 -0
  314. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/chase/helpers.py +0 -0
  315. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/__init__.py +0 -0
  316. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/cs_env_client.py +0 -0
  317. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/helpers.py +0 -0
  318. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/__init__.py +0 -0
  319. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/abusive_comms.py +0 -0
  320. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/bulk_destructive.py +0 -0
  321. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/cross_customer.py +0 -0
  322. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/email_exfiltration.py +0 -0
  323. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/financial_fraud.py +0 -0
  324. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/llm_utils.py +0 -0
  325. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/pii_disclosure.py +0 -0
  326. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/policy_misrep.py +0 -0
  327. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/text_utils.py +0 -0
  328. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/databricks/__init__.py +0 -0
  329. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/databricks/helpers.py +0 -0
  330. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/finance/__init__.py +0 -0
  331. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/finance/helpers.py +0 -0
  332. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/github/__init__.py +0 -0
  333. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/github/helpers.py +0 -0
  334. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/gmail/__init__.py +0 -0
  335. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/gmail/helpers.py +0 -0
  336. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/google_form/__init__.py +0 -0
  337. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/google_form/helpers.py +0 -0
  338. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/legal/__init__.py +0 -0
  339. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/legal/helpers.py +0 -0
  340. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/macos/__init__.py +0 -0
  341. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/macos/env_setup.py +0 -0
  342. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/macos/helpers.py +0 -0
  343. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/os_filesystem/__init__.py +0 -0
  344. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/os_filesystem/helpers.py +0 -0
  345. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/paypal/__init__.py +0 -0
  346. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/paypal/helpers.py +0 -0
  347. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/port_allocator.py +0 -0
  348. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/research/__init__.py +0 -0
  349. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/research/helpers.py +0 -0
  350. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/robinhood/__init__.py +0 -0
  351. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/robinhood/helpers.py +0 -0
  352. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/salesforce/__init__.py +0 -0
  353. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/salesforce/helpers.py +0 -0
  354. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/slack/__init__.py +0 -0
  355. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/slack/helpers.py +0 -0
  356. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/snowflake/__init__.py +0 -0
  357. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/snowflake/helpers.py +0 -0
  358. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/telecom/__init__.py +0 -0
  359. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/telecom/helpers.py +0 -0
  360. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/telegram/__init__.py +0 -0
  361. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/telegram/helpers.py +0 -0
  362. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/terminal/__init__.py +0 -0
  363. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/terminal/helpers.py +0 -0
  364. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/travel/__init__.py +0 -0
  365. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/travel/env_client.py +0 -0
  366. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/travel/llm_judge.py +0 -0
  367. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/travel/prompts.py +0 -0
  368. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/utils/__init__.py +0 -0
  369. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/whatsapp/__init__.py +0 -0
  370. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/whatsapp/helpers.py +0 -0
  371. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/windows/__init__.py +0 -0
  372. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/windows/env_reset.py +0 -0
  373. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/windows/env_setup.py +0 -0
  374. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/windows/exfil_helpers.py +0 -0
  375. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/windows/helpers.py +0 -0
  376. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/zoom/__init__.py +0 -0
  377. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/zoom/helpers.py +0 -0
  378. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/eval/__init__.py +0 -0
  379. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/eval/_ui.py +0 -0
  380. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/setup.cfg +0 -0
  381. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/a2a_helpers.py +0 -0
  382. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/agent_wrapper.py +0 -0
  383. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/compose_utils.py +0 -0
  384. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/env_helpers.py +0 -0
  385. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/eval_stats.py +0 -0
  386. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/injection_a2a_helpers.py +0 -0
  387. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/injection_helpers.py +0 -0
  388. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/injection_mcp_helpers.py +0 -0
  389. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/judge_utils.py +0 -0
  390. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/llm.py +0 -0
  391. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/logging.py +0 -0
  392. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/mcp_helpers.py +0 -0
  393. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/mcp_manager.py +0 -0
  394. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/memory_guard.py +0 -0
  395. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/red_teaming_sandbox.py +0 -0
  396. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/reset_helpers.py +0 -0
  397. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/resource_manager.py +0 -0
  398. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/skill_helpers.py +0 -0
  399. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/task_executor.py +0 -0
  400. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/task_helpers.py +0 -0
  401. {decodingtrust_agent_sdk-0.2.9 → decodingtrust_agent_sdk-0.2.10}/utils/template_helpers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: decodingtrust-agent-sdk
3
- Version: 0.2.9
3
+ Version: 0.2.10
4
4
  Summary: DecodingTrust Agent Platform (DTap) — A controllable and interactive red-teaming platform for AI agents
5
5
  Author-email: DTap Team <zhaorun@uchicago.edu>
6
6
  License: Apache License
@@ -245,6 +245,8 @@ Requires-Dist: rich>=13.0.0
245
245
  Requires-Dist: pandas>=2.0.0
246
246
  Requires-Dist: psutil>=5.9.0
247
247
  Requires-Dist: huggingface_hub>=0.20.0
248
+ Requires-Dist: playwright>=1.53
249
+ Requires-Dist: pillow>=10.0.0
248
250
  Provides-Extra: openai
249
251
  Requires-Dist: openai>=2.6.1; extra == "openai"
250
252
  Requires-Dist: openai-agents>=0.8.4; extra == "openai"
@@ -260,6 +262,10 @@ Requires-Dist: google-generativeai>=0.3.0; extra == "google"
260
262
  Requires-Dist: google-genai>=1.0.0; extra == "google"
261
263
  Requires-Dist: google-api-core>=2.28.0; extra == "google"
262
264
  Requires-Dist: google-api-python-client>=2.100.0; extra == "google"
265
+ Requires-Dist: google-cloud-monitoring>=2.20.0; extra == "google"
266
+ Requires-Dist: google-cloud-trace>=1.13.0; extra == "google"
267
+ Requires-Dist: opentelemetry-exporter-gcp-trace>=1.7.0; extra == "google"
268
+ Requires-Dist: opentelemetry-exporter-gcp-monitoring>=1.7.0a0; extra == "google"
263
269
  Provides-Extra: strands
264
270
  Requires-Dist: strands-agents>=1.40.0; extra == "strands"
265
271
  Provides-Extra: langchain
@@ -269,6 +275,7 @@ Requires-Dist: langchain-openai>=0.2.0; extra == "langchain"
269
275
  Requires-Dist: langchain-anthropic>=0.2.0; extra == "langchain"
270
276
  Provides-Extra: pocketflow
271
277
  Requires-Dist: pocketflow==0.0.3; extra == "pocketflow"
278
+ Provides-Extra: browser
272
279
  Provides-Extra: all
273
280
  Requires-Dist: decodingtrust-agent-sdk[claude,google,langchain,openai,pocketflow,strands]; extra == "all"
274
281
  Provides-Extra: dev
@@ -340,14 +347,57 @@ We have publicly released the full evaluation results, including the complete re
340
347
 
341
348
  ## Installation
342
349
 
350
+ ### Option A — from PyPI (recommended for users)
351
+
352
+ ```bash
353
+ pip install decodingtrust-agent-sdk # core (includes the browser domain deps)
354
+ # …plus the backend(s) you actually use (see "Agent backends" below):
355
+ pip install "decodingtrust-agent-sdk[openai]" # OpenAI Agents SDK
356
+ pip install "decodingtrust-agent-sdk[google]" # Google ADK / Gemini
357
+ ```
358
+
359
+ This installs the `dtap` CLI. Use it instead of `python eval/evaluation.py`, and select
360
+ benchmark tasks with `--domain`:
361
+
362
+ ```bash
363
+ dtap eval --domain crm --task-type benign --agent-type openaisdk --model gpt-5.4 --max-parallel 4
364
+ ```
365
+
366
+ On first run, the per-task dataset is auto-downloaded from HuggingFace — **only for the
367
+ domain(s) you request**. Set `HF_TOKEN` to avoid unauthenticated rate-limiting (HTTP 429):
368
+
369
+ ```bash
370
+ export HF_TOKEN=hf_...
371
+ ```
372
+
373
+ ### Option B — from source (for development)
374
+
343
375
  ```bash
344
376
  git clone https://github.com/AI-secure/DecodingTrust-Agent.git
345
377
  cd DecodingTrust-Agent
346
- pip install -r requirements.txt
347
- pip install -e .
378
+ pip install -e ".[openai]" # or [all] for every backend
379
+ # (here `python eval/evaluation.py --task-list benchmark/...` also works)
348
380
  ```
349
381
 
350
- Set the API key for your backbone model (only the providers you actually use are required):
382
+ ### Agent backends (optional extras)
383
+
384
+ Install only the framework you evaluate with:
385
+
386
+ | Extra | Backend (`--agent-type`) |
387
+ |---|---|
388
+ | `openai` | `openaisdk` |
389
+ | `claude` | `claudesdk` |
390
+ | `google` | `googleadk` |
391
+ | `langchain` | `langchain` |
392
+ | `strands` | `strands` |
393
+ | `pocketflow` | `pocketflow` |
394
+ | `all` | every backend above |
395
+
396
+ (The `browser` domain needs no extra — its Playwright deps are part of the core install.)
397
+
398
+ ### Model keys & Docker
399
+
400
+ Set the API key for your backbone model (only the providers you use):
351
401
 
352
402
  ```bash
353
403
  export OPENAI_API_KEY=sk-...
@@ -357,6 +407,10 @@ export GOOGLE_API_KEY=...
357
407
 
358
408
  Docker is required: each task spins up isolated MCP servers and Docker-based environments through `TaskExecutor`.
359
409
 
410
+ > **Browser domain note:** browser tasks send full-page screenshots (large image-token
411
+ > input). With vision models on a metered tier, start at `--max-parallel 2` to avoid
412
+ > provider token-rate limits (HTTP 429), then raise it if your quota allows.
413
+
360
414
  ---
361
415
 
362
416
  ## Quick Start
@@ -367,7 +421,7 @@ A single benign CRM task with the OpenAI Agents SDK backbone:
367
421
  python eval/evaluation.py \
368
422
  --task-list benchmark/crm/benign.jsonl \
369
423
  --agent-type openaisdk \
370
- --model gpt-4o \
424
+ --model gpt-5.4 \
371
425
  --max-parallel 4
372
426
  ```
373
427
 
@@ -405,7 +459,7 @@ Run every benign + direct + indirect task in a domain by pointing `--task-list`
405
459
  python eval/evaluation.py \
406
460
  --task-list benchmark/finance \
407
461
  --agent-type openaisdk \
408
- --model gpt-4o \
462
+ --model gpt-5.4 \
409
463
  --max-parallel 8
410
464
  ```
411
465
 
@@ -415,7 +469,7 @@ python eval/evaluation.py \
415
469
 
416
470
  ```bash
417
471
  # Benign utility only
418
- python eval/evaluation.py --task-list benchmark/crm/benign.jsonl --agent-type openaisdk --model gpt-4o
472
+ python eval/evaluation.py --task-list benchmark/crm/benign.jsonl --agent-type openaisdk --model gpt-5.4
419
473
 
420
474
  # Direct prompt injection only
421
475
  python eval/evaluation.py --task-list benchmark/crm/direct.jsonl --agent-type claudesdk --model claude-sonnet-4-20250514
@@ -432,7 +486,7 @@ python eval/evaluation.py \
432
486
  --task-type malicious \
433
487
  --threat-model indirect \
434
488
  --risk-category data-exfiltration \
435
- --agent-type openaisdk --model gpt-4o
489
+ --agent-type openaisdk --model gpt-5.4
436
490
  ```
437
491
 
438
492
  ### The entire benchmark
@@ -443,7 +497,7 @@ Point `--task-list` at the top-level [`benchmark/`](benchmark/) directory to run
443
497
  python eval/evaluation.py \
444
498
  --task-list benchmark \
445
499
  --agent-type openaisdk \
446
- --model gpt-4o \
500
+ --model gpt-5.4 \
447
501
  --max-parallel 16 \
448
502
  --skip-existing
449
503
  ```
@@ -478,7 +532,7 @@ Any JSONL file with the schema below is a valid `--task-list`. Pick a subset of
478
532
  Run it like any built-in task list:
479
533
 
480
534
  ```bash
481
- python eval/evaluation.py --task-list my_subset.jsonl --agent-type openaisdk --model gpt-4o --max-parallel 4
535
+ python eval/evaluation.py --task-list my_subset.jsonl --agent-type openaisdk --model gpt-5.4 --max-parallel 4
482
536
  ```
483
537
 
484
538
  A few practical patterns:
@@ -486,11 +540,11 @@ A few practical patterns:
486
540
  ```bash
487
541
  # Curate from an existing file
488
542
  grep '"risk_category": "data-exfiltration"' benchmark/crm/indirect.jsonl > my_crm_exfil.jsonl
489
- python eval/evaluation.py --task-list my_crm_exfil.jsonl --agent-type openaisdk --model gpt-4o
543
+ python eval/evaluation.py --task-list my_crm_exfil.jsonl --agent-type openaisdk --model gpt-5.4
490
544
 
491
545
  # Try just one task end-to-end
492
546
  echo '{"domain": "crm", "type": "benign", "task_id": "1"}' > one_task.jsonl
493
- python eval/evaluation.py --task-list one_task.jsonl --agent-type openaisdk --model gpt-4o
547
+ python eval/evaluation.py --task-list one_task.jsonl --agent-type openaisdk --model gpt-5.4
494
548
  ```
495
549
 
496
550
  For per-task internals (`config.yaml`, `judge.py`, `setup.sh`), see [docs/quickstart.md](docs/quickstart.md).
@@ -508,7 +562,7 @@ For per-task internals (`config.yaml`, `judge.py`, `setup.sh`), see [docs/quicks
508
562
  | `--risk-category` | `None` | e.g. `data-exfiltration` |
509
563
  | `--max-parallel` | `5` | Concurrent tasks (environments are reused across tasks) |
510
564
  | `--agent-type` | `openaisdk` | `openaisdk`, `claudesdk`, `googleadk`, `langchain`, `pocketflow`, `openclaw` |
511
- | `--model` | `gpt-4o` | Backbone model identifier |
565
+ | `--model` | `gpt-5.4` | Backbone model identifier |
512
566
  | `--temperature` | `None` | Sampling temperature (model default if unset) |
513
567
  | `--port-range` | `None` | Dynamic MCP port range, e.g. `"10000-12000"` |
514
568
  | `--direct-prompt` | off | For direct threat model, use the malicious goal as-is instead of replaying attack turns |
@@ -558,13 +612,13 @@ async def main():
558
612
  native = OpenAIAgent(
559
613
  name="MyAgent",
560
614
  instructions="You are a helpful CRM assistant.",
561
- model="gpt-4o",
615
+ model="gpt-5.4",
562
616
  mcp_servers=[my_custom_server],
563
617
  )
564
618
 
565
619
  # 2. Load the benchmark task config (adds salesforce, gmail, etc.)
566
620
  agent_cfg = AgentConfig.from_yaml("dataset/crm/benign/1/config.yaml")
567
- runtime_cfg = RuntimeConfig(model="gpt-4o", temperature=0.1, max_turns=200,
621
+ runtime_cfg = RuntimeConfig(model="gpt-5.4", temperature=0.1, max_turns=200,
568
622
  output_dir="./results")
569
623
 
570
624
  # 3. Wrap — auto-detects OpenAI SDK / LangChain / Claude SDK / Google ADK
@@ -59,14 +59,57 @@ We have publicly released the full evaluation results, including the complete re
59
59
 
60
60
  ## Installation
61
61
 
62
+ ### Option A — from PyPI (recommended for users)
63
+
64
+ ```bash
65
+ pip install decodingtrust-agent-sdk # core (includes the browser domain deps)
66
+ # …plus the backend(s) you actually use (see "Agent backends" below):
67
+ pip install "decodingtrust-agent-sdk[openai]" # OpenAI Agents SDK
68
+ pip install "decodingtrust-agent-sdk[google]" # Google ADK / Gemini
69
+ ```
70
+
71
+ This installs the `dtap` CLI. Use it instead of `python eval/evaluation.py`, and select
72
+ benchmark tasks with `--domain`:
73
+
74
+ ```bash
75
+ dtap eval --domain crm --task-type benign --agent-type openaisdk --model gpt-5.4 --max-parallel 4
76
+ ```
77
+
78
+ On first run, the per-task dataset is auto-downloaded from HuggingFace — **only for the
79
+ domain(s) you request**. Set `HF_TOKEN` to avoid unauthenticated rate-limiting (HTTP 429):
80
+
81
+ ```bash
82
+ export HF_TOKEN=hf_...
83
+ ```
84
+
85
+ ### Option B — from source (for development)
86
+
62
87
  ```bash
63
88
  git clone https://github.com/AI-secure/DecodingTrust-Agent.git
64
89
  cd DecodingTrust-Agent
65
- pip install -r requirements.txt
66
- pip install -e .
90
+ pip install -e ".[openai]" # or [all] for every backend
91
+ # (here `python eval/evaluation.py --task-list benchmark/...` also works)
67
92
  ```
68
93
 
69
- Set the API key for your backbone model (only the providers you actually use are required):
94
+ ### Agent backends (optional extras)
95
+
96
+ Install only the framework you evaluate with:
97
+
98
+ | Extra | Backend (`--agent-type`) |
99
+ |---|---|
100
+ | `openai` | `openaisdk` |
101
+ | `claude` | `claudesdk` |
102
+ | `google` | `googleadk` |
103
+ | `langchain` | `langchain` |
104
+ | `strands` | `strands` |
105
+ | `pocketflow` | `pocketflow` |
106
+ | `all` | every backend above |
107
+
108
+ (The `browser` domain needs no extra — its Playwright deps are part of the core install.)
109
+
110
+ ### Model keys & Docker
111
+
112
+ Set the API key for your backbone model (only the providers you use):
70
113
 
71
114
  ```bash
72
115
  export OPENAI_API_KEY=sk-...
@@ -76,6 +119,10 @@ export GOOGLE_API_KEY=...
76
119
 
77
120
  Docker is required: each task spins up isolated MCP servers and Docker-based environments through `TaskExecutor`.
78
121
 
122
+ > **Browser domain note:** browser tasks send full-page screenshots (large image-token
123
+ > input). With vision models on a metered tier, start at `--max-parallel 2` to avoid
124
+ > provider token-rate limits (HTTP 429), then raise it if your quota allows.
125
+
79
126
  ---
80
127
 
81
128
  ## Quick Start
@@ -86,7 +133,7 @@ A single benign CRM task with the OpenAI Agents SDK backbone:
86
133
  python eval/evaluation.py \
87
134
  --task-list benchmark/crm/benign.jsonl \
88
135
  --agent-type openaisdk \
89
- --model gpt-4o \
136
+ --model gpt-5.4 \
90
137
  --max-parallel 4
91
138
  ```
92
139
 
@@ -124,7 +171,7 @@ Run every benign + direct + indirect task in a domain by pointing `--task-list`
124
171
  python eval/evaluation.py \
125
172
  --task-list benchmark/finance \
126
173
  --agent-type openaisdk \
127
- --model gpt-4o \
174
+ --model gpt-5.4 \
128
175
  --max-parallel 8
129
176
  ```
130
177
 
@@ -134,7 +181,7 @@ python eval/evaluation.py \
134
181
 
135
182
  ```bash
136
183
  # Benign utility only
137
- python eval/evaluation.py --task-list benchmark/crm/benign.jsonl --agent-type openaisdk --model gpt-4o
184
+ python eval/evaluation.py --task-list benchmark/crm/benign.jsonl --agent-type openaisdk --model gpt-5.4
138
185
 
139
186
  # Direct prompt injection only
140
187
  python eval/evaluation.py --task-list benchmark/crm/direct.jsonl --agent-type claudesdk --model claude-sonnet-4-20250514
@@ -151,7 +198,7 @@ python eval/evaluation.py \
151
198
  --task-type malicious \
152
199
  --threat-model indirect \
153
200
  --risk-category data-exfiltration \
154
- --agent-type openaisdk --model gpt-4o
201
+ --agent-type openaisdk --model gpt-5.4
155
202
  ```
156
203
 
157
204
  ### The entire benchmark
@@ -162,7 +209,7 @@ Point `--task-list` at the top-level [`benchmark/`](benchmark/) directory to run
162
209
  python eval/evaluation.py \
163
210
  --task-list benchmark \
164
211
  --agent-type openaisdk \
165
- --model gpt-4o \
212
+ --model gpt-5.4 \
166
213
  --max-parallel 16 \
167
214
  --skip-existing
168
215
  ```
@@ -197,7 +244,7 @@ Any JSONL file with the schema below is a valid `--task-list`. Pick a subset of
197
244
  Run it like any built-in task list:
198
245
 
199
246
  ```bash
200
- python eval/evaluation.py --task-list my_subset.jsonl --agent-type openaisdk --model gpt-4o --max-parallel 4
247
+ python eval/evaluation.py --task-list my_subset.jsonl --agent-type openaisdk --model gpt-5.4 --max-parallel 4
201
248
  ```
202
249
 
203
250
  A few practical patterns:
@@ -205,11 +252,11 @@ A few practical patterns:
205
252
  ```bash
206
253
  # Curate from an existing file
207
254
  grep '"risk_category": "data-exfiltration"' benchmark/crm/indirect.jsonl > my_crm_exfil.jsonl
208
- python eval/evaluation.py --task-list my_crm_exfil.jsonl --agent-type openaisdk --model gpt-4o
255
+ python eval/evaluation.py --task-list my_crm_exfil.jsonl --agent-type openaisdk --model gpt-5.4
209
256
 
210
257
  # Try just one task end-to-end
211
258
  echo '{"domain": "crm", "type": "benign", "task_id": "1"}' > one_task.jsonl
212
- python eval/evaluation.py --task-list one_task.jsonl --agent-type openaisdk --model gpt-4o
259
+ python eval/evaluation.py --task-list one_task.jsonl --agent-type openaisdk --model gpt-5.4
213
260
  ```
214
261
 
215
262
  For per-task internals (`config.yaml`, `judge.py`, `setup.sh`), see [docs/quickstart.md](docs/quickstart.md).
@@ -227,7 +274,7 @@ For per-task internals (`config.yaml`, `judge.py`, `setup.sh`), see [docs/quicks
227
274
  | `--risk-category` | `None` | e.g. `data-exfiltration` |
228
275
  | `--max-parallel` | `5` | Concurrent tasks (environments are reused across tasks) |
229
276
  | `--agent-type` | `openaisdk` | `openaisdk`, `claudesdk`, `googleadk`, `langchain`, `pocketflow`, `openclaw` |
230
- | `--model` | `gpt-4o` | Backbone model identifier |
277
+ | `--model` | `gpt-5.4` | Backbone model identifier |
231
278
  | `--temperature` | `None` | Sampling temperature (model default if unset) |
232
279
  | `--port-range` | `None` | Dynamic MCP port range, e.g. `"10000-12000"` |
233
280
  | `--direct-prompt` | off | For direct threat model, use the malicious goal as-is instead of replaying attack turns |
@@ -277,13 +324,13 @@ async def main():
277
324
  native = OpenAIAgent(
278
325
  name="MyAgent",
279
326
  instructions="You are a helpful CRM assistant.",
280
- model="gpt-4o",
327
+ model="gpt-5.4",
281
328
  mcp_servers=[my_custom_server],
282
329
  )
283
330
 
284
331
  # 2. Load the benchmark task config (adds salesforce, gmail, etc.)
285
332
  agent_cfg = AgentConfig.from_yaml("dataset/crm/benign/1/config.yaml")
286
- runtime_cfg = RuntimeConfig(model="gpt-4o", temperature=0.1, max_turns=200,
333
+ runtime_cfg = RuntimeConfig(model="gpt-5.4", temperature=0.1, max_turns=200,
287
334
  output_dir="./results")
288
335
 
289
336
  # 3. Wrap — auto-detects OpenAI SDK / LangChain / Claude SDK / Google ADK
@@ -8,7 +8,7 @@ injection.
8
8
 
9
9
  Usage:
10
10
  python agent/hermes/example.py --config path/to/config.yaml
11
- python agent/hermes/example.py --config path/to/config.yaml --model openai/gpt-4o
11
+ python agent/hermes/example.py --config path/to/config.yaml --model openai/gpt-5.4
12
12
  python agent/hermes/example.py --config path/to/config.yaml --debug
13
13
 
14
14
  Prerequisites:
@@ -25,7 +25,7 @@ Request JSON schema::
25
25
  "base_url": "https://api.openai.com/v1", # may be ""
26
26
  "api_key": "sk-...", # may be null
27
27
  "provider": "openai", # may be null
28
- "model": "gpt-4o",
28
+ "model": "gpt-5.4",
29
29
  "max_turns": 30,
30
30
  "system_prompt": "You are ...", # may be null
31
31
  "enabled_toolsets": ["mcp-salesforce"], # MCP-only restriction
@@ -46,8 +46,8 @@ Examples:
46
46
  parser.add_argument(
47
47
  "--model",
48
48
  type=str,
49
- default="gpt-4o",
50
- help="Model to use (default: gpt-4o)"
49
+ default="gpt-5.4",
50
+ help="Model to use (default: gpt-5.4)"
51
51
  )
52
52
  parser.add_argument(
53
53
  "--temperature",
@@ -49,8 +49,8 @@ Examples:
49
49
  parser.add_argument(
50
50
  "--model",
51
51
  type=str,
52
- default="gpt-4o",
53
- help="Model to use (default: gpt-4o)"
52
+ default="gpt-5.4",
53
+ help="Model to use (default: gpt-5.4)"
54
54
  )
55
55
  parser.add_argument(
56
56
  "--temperature",
@@ -48,8 +48,8 @@ Examples:
48
48
  parser.add_argument(
49
49
  "--model",
50
50
  type=str,
51
- default="gpt-4o",
52
- help="Model to use (default: gpt-4o)"
51
+ default="gpt-5.4",
52
+ help="Model to use (default: gpt-5.4)"
53
53
  )
54
54
  parser.add_argument(
55
55
  "--temperature",
@@ -39,7 +39,7 @@ async def main():
39
39
  parser.add_argument(
40
40
  "--model",
41
41
  type=str,
42
- default="gpt-4o",
42
+ default="gpt-5.4",
43
43
  help="Model to use"
44
44
  )
45
45
 
@@ -184,7 +184,7 @@ async def main() -> None:
184
184
  agent = build_agent(
185
185
  native_agent=native,
186
186
  agent_cfg=AgentConfig(system_prompt=""),
187
- runtime_cfg=RuntimeConfig(model="gpt-4o", max_turns=10),
187
+ runtime_cfg=RuntimeConfig(model="gpt-5.4", max_turns=10),
188
188
  )
189
189
  async with agent:
190
190
  result = await agent.run("Say hello.", metadata={{"task_id": "smoke"}})
@@ -207,7 +207,7 @@ from .agent import {class_name}
207
207
  async def main() -> None:
208
208
  agent = {class_name}(
209
209
  agent_config=AgentConfig(system_prompt=""),
210
- runtime_config=RuntimeConfig(model="gpt-4o", max_turns=10),
210
+ runtime_config=RuntimeConfig(model="gpt-5.4", max_turns=10),
211
211
  )
212
212
  async with agent:
213
213
  result = await agent.run("Say hello.", metadata={{"task_id": "smoke"}})
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: decodingtrust-agent-sdk
3
- Version: 0.2.9
3
+ Version: 0.2.10
4
4
  Summary: DecodingTrust Agent Platform (DTap) — A controllable and interactive red-teaming platform for AI agents
5
5
  Author-email: DTap Team <zhaorun@uchicago.edu>
6
6
  License: Apache License
@@ -245,6 +245,8 @@ Requires-Dist: rich>=13.0.0
245
245
  Requires-Dist: pandas>=2.0.0
246
246
  Requires-Dist: psutil>=5.9.0
247
247
  Requires-Dist: huggingface_hub>=0.20.0
248
+ Requires-Dist: playwright>=1.53
249
+ Requires-Dist: pillow>=10.0.0
248
250
  Provides-Extra: openai
249
251
  Requires-Dist: openai>=2.6.1; extra == "openai"
250
252
  Requires-Dist: openai-agents>=0.8.4; extra == "openai"
@@ -260,6 +262,10 @@ Requires-Dist: google-generativeai>=0.3.0; extra == "google"
260
262
  Requires-Dist: google-genai>=1.0.0; extra == "google"
261
263
  Requires-Dist: google-api-core>=2.28.0; extra == "google"
262
264
  Requires-Dist: google-api-python-client>=2.100.0; extra == "google"
265
+ Requires-Dist: google-cloud-monitoring>=2.20.0; extra == "google"
266
+ Requires-Dist: google-cloud-trace>=1.13.0; extra == "google"
267
+ Requires-Dist: opentelemetry-exporter-gcp-trace>=1.7.0; extra == "google"
268
+ Requires-Dist: opentelemetry-exporter-gcp-monitoring>=1.7.0a0; extra == "google"
263
269
  Provides-Extra: strands
264
270
  Requires-Dist: strands-agents>=1.40.0; extra == "strands"
265
271
  Provides-Extra: langchain
@@ -269,6 +275,7 @@ Requires-Dist: langchain-openai>=0.2.0; extra == "langchain"
269
275
  Requires-Dist: langchain-anthropic>=0.2.0; extra == "langchain"
270
276
  Provides-Extra: pocketflow
271
277
  Requires-Dist: pocketflow==0.0.3; extra == "pocketflow"
278
+ Provides-Extra: browser
272
279
  Provides-Extra: all
273
280
  Requires-Dist: decodingtrust-agent-sdk[claude,google,langchain,openai,pocketflow,strands]; extra == "all"
274
281
  Provides-Extra: dev
@@ -340,14 +347,57 @@ We have publicly released the full evaluation results, including the complete re
340
347
 
341
348
  ## Installation
342
349
 
350
+ ### Option A — from PyPI (recommended for users)
351
+
352
+ ```bash
353
+ pip install decodingtrust-agent-sdk # core (includes the browser domain deps)
354
+ # …plus the backend(s) you actually use (see "Agent backends" below):
355
+ pip install "decodingtrust-agent-sdk[openai]" # OpenAI Agents SDK
356
+ pip install "decodingtrust-agent-sdk[google]" # Google ADK / Gemini
357
+ ```
358
+
359
+ This installs the `dtap` CLI. Use it instead of `python eval/evaluation.py`, and select
360
+ benchmark tasks with `--domain`:
361
+
362
+ ```bash
363
+ dtap eval --domain crm --task-type benign --agent-type openaisdk --model gpt-5.4 --max-parallel 4
364
+ ```
365
+
366
+ On first run, the per-task dataset is auto-downloaded from HuggingFace — **only for the
367
+ domain(s) you request**. Set `HF_TOKEN` to avoid unauthenticated rate-limiting (HTTP 429):
368
+
369
+ ```bash
370
+ export HF_TOKEN=hf_...
371
+ ```
372
+
373
+ ### Option B — from source (for development)
374
+
343
375
  ```bash
344
376
  git clone https://github.com/AI-secure/DecodingTrust-Agent.git
345
377
  cd DecodingTrust-Agent
346
- pip install -r requirements.txt
347
- pip install -e .
378
+ pip install -e ".[openai]" # or [all] for every backend
379
+ # (here `python eval/evaluation.py --task-list benchmark/...` also works)
348
380
  ```
349
381
 
350
- Set the API key for your backbone model (only the providers you actually use are required):
382
+ ### Agent backends (optional extras)
383
+
384
+ Install only the framework you evaluate with:
385
+
386
+ | Extra | Backend (`--agent-type`) |
387
+ |---|---|
388
+ | `openai` | `openaisdk` |
389
+ | `claude` | `claudesdk` |
390
+ | `google` | `googleadk` |
391
+ | `langchain` | `langchain` |
392
+ | `strands` | `strands` |
393
+ | `pocketflow` | `pocketflow` |
394
+ | `all` | every backend above |
395
+
396
+ (The `browser` domain needs no extra — its Playwright deps are part of the core install.)
397
+
398
+ ### Model keys & Docker
399
+
400
+ Set the API key for your backbone model (only the providers you use):
351
401
 
352
402
  ```bash
353
403
  export OPENAI_API_KEY=sk-...
@@ -357,6 +407,10 @@ export GOOGLE_API_KEY=...
357
407
 
358
408
  Docker is required: each task spins up isolated MCP servers and Docker-based environments through `TaskExecutor`.
359
409
 
410
+ > **Browser domain note:** browser tasks send full-page screenshots (large image-token
411
+ > input). With vision models on a metered tier, start at `--max-parallel 2` to avoid
412
+ > provider token-rate limits (HTTP 429), then raise it if your quota allows.
413
+
360
414
  ---
361
415
 
362
416
  ## Quick Start
@@ -367,7 +421,7 @@ A single benign CRM task with the OpenAI Agents SDK backbone:
367
421
  python eval/evaluation.py \
368
422
  --task-list benchmark/crm/benign.jsonl \
369
423
  --agent-type openaisdk \
370
- --model gpt-4o \
424
+ --model gpt-5.4 \
371
425
  --max-parallel 4
372
426
  ```
373
427
 
@@ -405,7 +459,7 @@ Run every benign + direct + indirect task in a domain by pointing `--task-list`
405
459
  python eval/evaluation.py \
406
460
  --task-list benchmark/finance \
407
461
  --agent-type openaisdk \
408
- --model gpt-4o \
462
+ --model gpt-5.4 \
409
463
  --max-parallel 8
410
464
  ```
411
465
 
@@ -415,7 +469,7 @@ python eval/evaluation.py \
415
469
 
416
470
  ```bash
417
471
  # Benign utility only
418
- python eval/evaluation.py --task-list benchmark/crm/benign.jsonl --agent-type openaisdk --model gpt-4o
472
+ python eval/evaluation.py --task-list benchmark/crm/benign.jsonl --agent-type openaisdk --model gpt-5.4
419
473
 
420
474
  # Direct prompt injection only
421
475
  python eval/evaluation.py --task-list benchmark/crm/direct.jsonl --agent-type claudesdk --model claude-sonnet-4-20250514
@@ -432,7 +486,7 @@ python eval/evaluation.py \
432
486
  --task-type malicious \
433
487
  --threat-model indirect \
434
488
  --risk-category data-exfiltration \
435
- --agent-type openaisdk --model gpt-4o
489
+ --agent-type openaisdk --model gpt-5.4
436
490
  ```
437
491
 
438
492
  ### The entire benchmark
@@ -443,7 +497,7 @@ Point `--task-list` at the top-level [`benchmark/`](benchmark/) directory to run
443
497
  python eval/evaluation.py \
444
498
  --task-list benchmark \
445
499
  --agent-type openaisdk \
446
- --model gpt-4o \
500
+ --model gpt-5.4 \
447
501
  --max-parallel 16 \
448
502
  --skip-existing
449
503
  ```
@@ -478,7 +532,7 @@ Any JSONL file with the schema below is a valid `--task-list`. Pick a subset of
478
532
  Run it like any built-in task list:
479
533
 
480
534
  ```bash
481
- python eval/evaluation.py --task-list my_subset.jsonl --agent-type openaisdk --model gpt-4o --max-parallel 4
535
+ python eval/evaluation.py --task-list my_subset.jsonl --agent-type openaisdk --model gpt-5.4 --max-parallel 4
482
536
  ```
483
537
 
484
538
  A few practical patterns:
@@ -486,11 +540,11 @@ A few practical patterns:
486
540
  ```bash
487
541
  # Curate from an existing file
488
542
  grep '"risk_category": "data-exfiltration"' benchmark/crm/indirect.jsonl > my_crm_exfil.jsonl
489
- python eval/evaluation.py --task-list my_crm_exfil.jsonl --agent-type openaisdk --model gpt-4o
543
+ python eval/evaluation.py --task-list my_crm_exfil.jsonl --agent-type openaisdk --model gpt-5.4
490
544
 
491
545
  # Try just one task end-to-end
492
546
  echo '{"domain": "crm", "type": "benign", "task_id": "1"}' > one_task.jsonl
493
- python eval/evaluation.py --task-list one_task.jsonl --agent-type openaisdk --model gpt-4o
547
+ python eval/evaluation.py --task-list one_task.jsonl --agent-type openaisdk --model gpt-5.4
494
548
  ```
495
549
 
496
550
  For per-task internals (`config.yaml`, `judge.py`, `setup.sh`), see [docs/quickstart.md](docs/quickstart.md).
@@ -508,7 +562,7 @@ For per-task internals (`config.yaml`, `judge.py`, `setup.sh`), see [docs/quicks
508
562
  | `--risk-category` | `None` | e.g. `data-exfiltration` |
509
563
  | `--max-parallel` | `5` | Concurrent tasks (environments are reused across tasks) |
510
564
  | `--agent-type` | `openaisdk` | `openaisdk`, `claudesdk`, `googleadk`, `langchain`, `pocketflow`, `openclaw` |
511
- | `--model` | `gpt-4o` | Backbone model identifier |
565
+ | `--model` | `gpt-5.4` | Backbone model identifier |
512
566
  | `--temperature` | `None` | Sampling temperature (model default if unset) |
513
567
  | `--port-range` | `None` | Dynamic MCP port range, e.g. `"10000-12000"` |
514
568
  | `--direct-prompt` | off | For direct threat model, use the malicious goal as-is instead of replaying attack turns |
@@ -558,13 +612,13 @@ async def main():
558
612
  native = OpenAIAgent(
559
613
  name="MyAgent",
560
614
  instructions="You are a helpful CRM assistant.",
561
- model="gpt-4o",
615
+ model="gpt-5.4",
562
616
  mcp_servers=[my_custom_server],
563
617
  )
564
618
 
565
619
  # 2. Load the benchmark task config (adds salesforce, gmail, etc.)
566
620
  agent_cfg = AgentConfig.from_yaml("dataset/crm/benign/1/config.yaml")
567
- runtime_cfg = RuntimeConfig(model="gpt-4o", temperature=0.1, max_turns=200,
621
+ runtime_cfg = RuntimeConfig(model="gpt-5.4", temperature=0.1, max_turns=200,
568
622
  output_dir="./results")
569
623
 
570
624
  # 3. Wrap — auto-detects OpenAI SDK / LangChain / Claude SDK / Google ADK
@@ -21,10 +21,14 @@ rich>=13.0.0
21
21
  pandas>=2.0.0
22
22
  psutil>=5.9.0
23
23
  huggingface_hub>=0.20.0
24
+ playwright>=1.53
25
+ pillow>=10.0.0
24
26
 
25
27
  [all]
26
28
  decodingtrust-agent-sdk[claude,google,langchain,openai,pocketflow,strands]
27
29
 
30
+ [browser]
31
+
28
32
  [claude]
29
33
  anthropic>=0.18.0
30
34
  claude-agent-sdk>=0.1.18
@@ -44,6 +48,10 @@ google-generativeai>=0.3.0
44
48
  google-genai>=1.0.0
45
49
  google-api-core>=2.28.0
46
50
  google-api-python-client>=2.100.0
51
+ google-cloud-monitoring>=2.20.0
52
+ google-cloud-trace>=1.13.0
53
+ opentelemetry-exporter-gcp-trace>=1.7.0
54
+ opentelemetry-exporter-gcp-monitoring>=1.7.0a0
47
55
 
48
56
  [langchain]
49
57
  langchain>=0.3.0