decodingtrust-agent-sdk 0.2.8__tar.gz → 0.2.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (401) hide show
  1. {decodingtrust_agent_sdk-0.2.8/decodingtrust_agent_sdk.egg-info → decodingtrust_agent_sdk-0.2.10}/PKG-INFO +79 -15
  2. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/README.md +71 -14
  3. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/example.py +1 -1
  4. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/src/hermes_runner.py +1 -1
  5. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/langchain/example.py +2 -2
  6. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/example.py +2 -2
  7. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/example.py +2 -2
  8. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/strands/example.py +1 -1
  9. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/cli/scaffold.py +2 -2
  10. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10/decodingtrust_agent_sdk.egg-info}/PKG-INFO +79 -15
  11. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/decodingtrust_agent_sdk.egg-info/requires.txt +8 -0
  12. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/config/mcp.yaml +7 -1
  13. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/browser/main.py +16 -3
  14. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/agent.py +2 -2
  15. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/eval/evaluation.py +31 -11
  16. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/eval/task_runner.py +2 -2
  17. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/pyproject.toml +19 -1
  18. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/__init__.py +6 -0
  19. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/agent_helpers.py +47 -63
  20. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/config.py +57 -7
  21. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/judge_helpers.py +28 -11
  22. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/LICENSE +0 -0
  23. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/MANIFEST.in +0 -0
  24. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/__init__.py +0 -0
  25. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/__init__.py +0 -0
  26. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/example.py +0 -0
  27. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/src/__init__.py +0 -0
  28. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/src/agent.py +0 -0
  29. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/src/mcp_proxy.py +0 -0
  30. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/src/utils.py +0 -0
  31. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/__init__.py +0 -0
  32. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/example.py +0 -0
  33. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/src/__init__.py +0 -0
  34. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/src/agent.py +0 -0
  35. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/src/mcp_wrapper.py +0 -0
  36. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/src/utils.py +0 -0
  37. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/__init__.py +0 -0
  38. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/src/__init__.py +0 -0
  39. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/src/agent.py +0 -0
  40. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/src/mcp_proxy.py +0 -0
  41. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/src/utils.py +0 -0
  42. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/langchain/__init__.py +0 -0
  43. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/langchain/src/__init__.py +0 -0
  44. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/langchain/src/agent.py +0 -0
  45. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/langchain/src/utils.py +0 -0
  46. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/__init__.py +0 -0
  47. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/src/__init__.py +0 -0
  48. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/src/agent.py +0 -0
  49. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/src/agent_wrapper.py +0 -0
  50. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/src/mcp_wrapper.py +0 -0
  51. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/src/utils.py +0 -0
  52. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/__init__.py +0 -0
  53. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/example.py +0 -0
  54. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/__init__.py +0 -0
  55. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/agent.py +0 -0
  56. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/helpers/__init__.py +0 -0
  57. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/helpers/auth_helpers.py +0 -0
  58. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/mcp_proxy.py +0 -0
  59. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/plugin_generator.py +0 -0
  60. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/utils.py +0 -0
  61. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/__init__.py +0 -0
  62. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/prompts/react_agent.py +0 -0
  63. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/__init__.py +0 -0
  64. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/agent.py +0 -0
  65. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/agent_wrapper.py +0 -0
  66. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/async_helper.py +0 -0
  67. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/mcp_react_agent.py +0 -0
  68. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/native_agent.py +0 -0
  69. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/nodes.py +0 -0
  70. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/strands/__init__.py +0 -0
  71. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/strands/src/__init__.py +0 -0
  72. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/strands/src/agent.py +0 -0
  73. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/strands/src/utils.py +0 -0
  74. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/__init__.py +0 -0
  75. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/browser/benign.jsonl +0 -0
  76. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/browser/direct.jsonl +0 -0
  77. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/browser/indirect.jsonl +0 -0
  78. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/code/benign.jsonl +0 -0
  79. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/code/direct.jsonl +0 -0
  80. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/code/indirect.jsonl +0 -0
  81. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/crm/benign.jsonl +0 -0
  82. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/crm/direct.jsonl +0 -0
  83. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/crm/indirect.jsonl +0 -0
  84. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/customer-service/benign.jsonl +0 -0
  85. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/customer-service/direct.jsonl +0 -0
  86. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/customer-service/indirect.jsonl +0 -0
  87. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/finance/benign.jsonl +0 -0
  88. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/finance/direct.jsonl +0 -0
  89. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/finance/indirect.jsonl +0 -0
  90. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/legal/benign.jsonl +0 -0
  91. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/legal/direct.jsonl +0 -0
  92. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/legal/indirect.jsonl +0 -0
  93. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/macos/benign.jsonl +0 -0
  94. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/macos/direct.jsonl +0 -0
  95. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/macos/indirect.jsonl +0 -0
  96. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/medical/benign.jsonl +0 -0
  97. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/medical/direct.jsonl +0 -0
  98. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/medical/indirect.jsonl +0 -0
  99. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/os-filesystem/benign.jsonl +0 -0
  100. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/os-filesystem/direct.jsonl +0 -0
  101. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/os-filesystem/indirect.jsonl +0 -0
  102. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/research/benign.jsonl +0 -0
  103. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/research/direct.jsonl +0 -0
  104. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/research/indirect.jsonl +0 -0
  105. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/telecom/benign.jsonl +0 -0
  106. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/telecom/direct.jsonl +0 -0
  107. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/telecom/indirect.jsonl +0 -0
  108. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/travel/benign.jsonl +0 -0
  109. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/travel/direct.jsonl +0 -0
  110. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/travel/indirect.jsonl +0 -0
  111. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/windows/benign.jsonl +0 -0
  112. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/windows/direct.jsonl +0 -0
  113. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/windows/indirect.jsonl +0 -0
  114. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/workflow/benign.jsonl +0 -0
  115. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/workflow/direct.jsonl +0 -0
  116. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/workflow/indirect.jsonl +0 -0
  117. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/cli/__init__.py +0 -0
  118. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/cli/main.py +0 -0
  119. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/decodingtrust_agent_sdk.egg-info/SOURCES.txt +0 -0
  120. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/decodingtrust_agent_sdk.egg-info/dependency_links.txt +0 -0
  121. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/decodingtrust_agent_sdk.egg-info/entry_points.txt +0 -0
  122. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/decodingtrust_agent_sdk.egg-info/top_level.txt +0 -0
  123. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/a2a_agents/generic/__init__.py +0 -0
  124. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/a2a_agents/generic/agent.py +0 -0
  125. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/config/env.yaml +0 -0
  126. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/config/injection_mcp.yaml +0 -0
  127. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/arxiv/docker-compose-hub.yml +0 -0
  128. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/arxiv/docker-compose.yml +0 -0
  129. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/atlassian/docker-compose-hub.yml +0 -0
  130. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/atlassian/docker-compose.yml +0 -0
  131. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/bigquery/docker-compose.yml +0 -0
  132. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/booking/docker-compose.yml +0 -0
  133. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/calendar/docker-compose-hub.yml +0 -0
  134. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/calendar/docker-compose.yml +0 -0
  135. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/custom-website/docker-compose.yml +0 -0
  136. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/customer_service/docker-compose.yml +0 -0
  137. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/databricks/docker-compose-hub.yml +0 -0
  138. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/databricks/docker-compose.yml +0 -0
  139. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/ecommerce/docker-compose.yml +0 -0
  140. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/ers/docker-compose.yml +0 -0
  141. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/finance/docker-compose.yml +0 -0
  142. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/gmail/docker-compose-hub.yml +0 -0
  143. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/gmail/docker-compose.yml +0 -0
  144. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/google-form/docker-compose-hub.yml +0 -0
  145. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/google-form/docker-compose.yml +0 -0
  146. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/googledocs/docker-compose-hub.yml +0 -0
  147. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/googledocs/docker-compose.yml +0 -0
  148. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/hospital/docker-compose-hub.yml +0 -0
  149. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/hospital/docker-compose.yml +0 -0
  150. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/legal/docker-compose.yml +0 -0
  151. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/linkedin/docker-compose.yml +0 -0
  152. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/macos/docker-compose.yml +0 -0
  153. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/os-filesystem/docker-compose-hub.yml +0 -0
  154. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/os-filesystem/docker-compose.yml +0 -0
  155. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/paypal/docker-compose-hub.yml +0 -0
  156. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/paypal/docker-compose.yml +0 -0
  157. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/research/docker-compose-hub.yml +0 -0
  158. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/research/docker-compose.yml +0 -0
  159. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +0 -0
  160. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/salesforce_crm/docker-compose.yaml +0 -0
  161. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/slack/docker-compose-hub.yml +0 -0
  162. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/slack/docker-compose.yml +0 -0
  163. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/snowflake/docker-compose-hub.yml +0 -0
  164. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/snowflake/docker-compose.yml +0 -0
  165. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/telecom/docker-compose-hub.yml +0 -0
  166. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/telecom/docker-compose.yml +0 -0
  167. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/telegram/docker-compose-hub.yml +0 -0
  168. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/telegram/docker-compose.yml +0 -0
  169. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/terminal/docker-compose-hub.yml +0 -0
  170. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/terminal/docker-compose.yml +0 -0
  171. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/travel/docker-compose-hub.yml +0 -0
  172. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/travel/docker-compose.yml +0 -0
  173. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/whatsapp/docker-compose-hub.yml +0 -0
  174. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/whatsapp/docker-compose.yml +0 -0
  175. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/dns_listener.py +0 -0
  176. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/docker-compose.yml +0 -0
  177. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/exfil_listener.py +0 -0
  178. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/ftp_listener.py +0 -0
  179. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/reset_server.py +0 -0
  180. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/validate.py +0 -0
  181. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/zoom/docker-compose-hub.yml +0 -0
  182. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/zoom/docker-compose.yml +0 -0
  183. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/atlassian/env_injection.py +0 -0
  184. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/calendar/env_injection.py +0 -0
  185. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/custom_website/env_injection.py +0 -0
  186. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/customer_service/env_injection.py +0 -0
  187. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/databricks/env_injection.py +0 -0
  188. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/ecommerce/env_injection.py +0 -0
  189. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/finance/env_injection.py +0 -0
  190. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/github/env_injection.py +0 -0
  191. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/gmail/env_injection.py +0 -0
  192. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/google_form/env_injection.py +0 -0
  193. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/googledocs/env_injection.py +0 -0
  194. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/hospital/env_injection.py +0 -0
  195. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/legal/env_injection.py +0 -0
  196. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/macos/env_injection.py +0 -0
  197. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/os-filesystem/env_injection.py +0 -0
  198. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/paypal/env_injection.py +0 -0
  199. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/research/env_injection.py +0 -0
  200. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/salesforce/env_injection.py +0 -0
  201. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/slack/env_injection.py +0 -0
  202. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/snowflake/env_injection.py +0 -0
  203. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/telecom/env_injection.py +0 -0
  204. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/telegram/env_injection.py +0 -0
  205. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/terminal/env_injection.py +0 -0
  206. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/travel/env_injection.py +0 -0
  207. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/whatsapp/env_injection.py +0 -0
  208. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/windows/env_injection.py +0 -0
  209. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/zoom/env_injection.py +0 -0
  210. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/atlassian/main.py +0 -0
  211. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/atlassian/test_server.py +0 -0
  212. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/bigquery/main.py +0 -0
  213. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/booking/main.py +0 -0
  214. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/calendar/example_multi_user.py +0 -0
  215. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/calendar/main.py +0 -0
  216. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/calendar/test_mcp.py +0 -0
  217. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/customer_service/main.py +0 -0
  218. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/databricks/main.py +0 -0
  219. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/databricks/probe.py +0 -0
  220. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/ers/main.py +0 -0
  221. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/__init__.py +0 -0
  222. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/core/__init__.py +0 -0
  223. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/core/data_loader.py +0 -0
  224. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/core/portfolio.py +0 -0
  225. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/evaluation/__init__.py +0 -0
  226. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/evaluation/evaluator.py +0 -0
  227. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/evaluation/logger.py +0 -0
  228. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/__init__.py +0 -0
  229. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/config.py +0 -0
  230. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/content.py +0 -0
  231. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/html.py +0 -0
  232. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/locations.py +0 -0
  233. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/methods.py +0 -0
  234. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/presets.py +0 -0
  235. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/main.py +0 -0
  236. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/run_mcp.py +0 -0
  237. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/run_web.py +0 -0
  238. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/__init__.py +0 -0
  239. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/extractor.py +0 -0
  240. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/extractor_minimal.py +0 -0
  241. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/extractor_simple.py +0 -0
  242. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/injection_mcp.py +0 -0
  243. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/mcp.py +0 -0
  244. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/__init__.py +0 -0
  245. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/account.py +0 -0
  246. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/browsing.py +0 -0
  247. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/social.py +0 -0
  248. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/trading.py +0 -0
  249. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/utility.py +0 -0
  250. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/web.py +0 -0
  251. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +0 -0
  252. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +0 -0
  253. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +0 -0
  254. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +0 -0
  255. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +0 -0
  256. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +0 -0
  257. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +0 -0
  258. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/test_mcp_tools.py +0 -0
  259. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/github/main.py +0 -0
  260. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/gmail/main.py +0 -0
  261. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/google_form/main.py +0 -0
  262. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/googledocs/main.py +0 -0
  263. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/hospital/mcp_server.py +0 -0
  264. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/__init__.py +0 -0
  265. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/core/__init__.py +0 -0
  266. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/core/courtlistener_store.py +0 -0
  267. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/core/data_loader.py +0 -0
  268. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/core/document_store.py +0 -0
  269. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/core/matter_manager.py +0 -0
  270. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/main.py +0 -0
  271. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/scripts/collect_data.py +0 -0
  272. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/server/__init__.py +0 -0
  273. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/server/mcp.py +0 -0
  274. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/macos/client_test.py +0 -0
  275. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/macos/mcp_server.py +0 -0
  276. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/os-filesystem/main.py +0 -0
  277. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/paypal/main.py +0 -0
  278. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/research/main.py +0 -0
  279. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/salesforce/main.py +0 -0
  280. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/slack/main.py +0 -0
  281. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/snowflake/main.py +0 -0
  282. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/snowflake/probe.py +0 -0
  283. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/telecom/mcp_client.py +0 -0
  284. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/telecom/mcp_server.py +0 -0
  285. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/telegram/main.py +0 -0
  286. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/terminal/main.py +0 -0
  287. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/travel/client_test.py +0 -0
  288. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/travel/mcp_server.py +0 -0
  289. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/whatsapp/main.py +0 -0
  290. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/windows/client_test.py +0 -0
  291. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/windows/mcp_server.py +0 -0
  292. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/zoom/main.py +0 -0
  293. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/__init__.py +0 -0
  294. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/hooks/__init__.py +0 -0
  295. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/hooks/audit_log.py +0 -0
  296. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/hooks/hooks.json +0 -0
  297. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/run_benign.py +0 -0
  298. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/__init__.py +0 -0
  299. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/attacks.py +0 -0
  300. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/environment.py +0 -0
  301. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/hooks.py +0 -0
  302. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/judge.py +0 -0
  303. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/red_teaming_trajectory.py +0 -0
  304. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/task.py +0 -0
  305. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/trajectory.py +0 -0
  306. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/__init__.py +0 -0
  307. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/atlassian/__init__.py +0 -0
  308. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/atlassian/helpers.py +0 -0
  309. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/bigquery/__init__.py +0 -0
  310. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/bigquery/helpers.py +0 -0
  311. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/calendar/__init__.py +0 -0
  312. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/calendar/helpers.py +0 -0
  313. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/chase/__init__.py +0 -0
  314. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/chase/helpers.py +0 -0
  315. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/__init__.py +0 -0
  316. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/cs_env_client.py +0 -0
  317. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/helpers.py +0 -0
  318. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/__init__.py +0 -0
  319. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/abusive_comms.py +0 -0
  320. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/bulk_destructive.py +0 -0
  321. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/cross_customer.py +0 -0
  322. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/email_exfiltration.py +0 -0
  323. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/financial_fraud.py +0 -0
  324. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/llm_utils.py +0 -0
  325. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/pii_disclosure.py +0 -0
  326. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/policy_misrep.py +0 -0
  327. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/text_utils.py +0 -0
  328. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/databricks/__init__.py +0 -0
  329. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/databricks/helpers.py +0 -0
  330. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/finance/__init__.py +0 -0
  331. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/finance/helpers.py +0 -0
  332. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/github/__init__.py +0 -0
  333. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/github/helpers.py +0 -0
  334. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/gmail/__init__.py +0 -0
  335. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/gmail/helpers.py +0 -0
  336. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/google_form/__init__.py +0 -0
  337. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/google_form/helpers.py +0 -0
  338. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/legal/__init__.py +0 -0
  339. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/legal/helpers.py +0 -0
  340. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/macos/__init__.py +0 -0
  341. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/macos/env_setup.py +0 -0
  342. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/macos/helpers.py +0 -0
  343. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/os_filesystem/__init__.py +0 -0
  344. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/os_filesystem/helpers.py +0 -0
  345. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/paypal/__init__.py +0 -0
  346. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/paypal/helpers.py +0 -0
  347. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/port_allocator.py +0 -0
  348. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/research/__init__.py +0 -0
  349. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/research/helpers.py +0 -0
  350. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/robinhood/__init__.py +0 -0
  351. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/robinhood/helpers.py +0 -0
  352. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/salesforce/__init__.py +0 -0
  353. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/salesforce/helpers.py +0 -0
  354. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/slack/__init__.py +0 -0
  355. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/slack/helpers.py +0 -0
  356. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/snowflake/__init__.py +0 -0
  357. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/snowflake/helpers.py +0 -0
  358. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/telecom/__init__.py +0 -0
  359. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/telecom/helpers.py +0 -0
  360. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/telegram/__init__.py +0 -0
  361. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/telegram/helpers.py +0 -0
  362. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/terminal/__init__.py +0 -0
  363. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/terminal/helpers.py +0 -0
  364. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/travel/__init__.py +0 -0
  365. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/travel/env_client.py +0 -0
  366. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/travel/llm_judge.py +0 -0
  367. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/travel/prompts.py +0 -0
  368. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/utils/__init__.py +0 -0
  369. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/whatsapp/__init__.py +0 -0
  370. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/whatsapp/helpers.py +0 -0
  371. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/windows/__init__.py +0 -0
  372. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/windows/env_reset.py +0 -0
  373. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/windows/env_setup.py +0 -0
  374. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/windows/exfil_helpers.py +0 -0
  375. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/windows/helpers.py +0 -0
  376. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/zoom/__init__.py +0 -0
  377. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/zoom/helpers.py +0 -0
  378. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/eval/__init__.py +0 -0
  379. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/eval/_ui.py +0 -0
  380. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/setup.cfg +0 -0
  381. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/a2a_helpers.py +0 -0
  382. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/agent_wrapper.py +0 -0
  383. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/compose_utils.py +0 -0
  384. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/env_helpers.py +0 -0
  385. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/eval_stats.py +0 -0
  386. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/injection_a2a_helpers.py +0 -0
  387. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/injection_helpers.py +0 -0
  388. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/injection_mcp_helpers.py +0 -0
  389. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/judge_utils.py +0 -0
  390. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/llm.py +0 -0
  391. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/logging.py +0 -0
  392. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/mcp_helpers.py +0 -0
  393. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/mcp_manager.py +0 -0
  394. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/memory_guard.py +0 -0
  395. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/red_teaming_sandbox.py +0 -0
  396. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/reset_helpers.py +0 -0
  397. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/resource_manager.py +0 -0
  398. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/skill_helpers.py +0 -0
  399. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/task_executor.py +0 -0
  400. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/task_helpers.py +0 -0
  401. {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/template_helpers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: decodingtrust-agent-sdk
3
- Version: 0.2.8
3
+ Version: 0.2.10
4
4
  Summary: DecodingTrust Agent Platform (DTap) — A controllable and interactive red-teaming platform for AI agents
5
5
  Author-email: DTap Team <zhaorun@uchicago.edu>
6
6
  License: Apache License
@@ -245,6 +245,8 @@ Requires-Dist: rich>=13.0.0
245
245
  Requires-Dist: pandas>=2.0.0
246
246
  Requires-Dist: psutil>=5.9.0
247
247
  Requires-Dist: huggingface_hub>=0.20.0
248
+ Requires-Dist: playwright>=1.53
249
+ Requires-Dist: pillow>=10.0.0
248
250
  Provides-Extra: openai
249
251
  Requires-Dist: openai>=2.6.1; extra == "openai"
250
252
  Requires-Dist: openai-agents>=0.8.4; extra == "openai"
@@ -260,6 +262,10 @@ Requires-Dist: google-generativeai>=0.3.0; extra == "google"
260
262
  Requires-Dist: google-genai>=1.0.0; extra == "google"
261
263
  Requires-Dist: google-api-core>=2.28.0; extra == "google"
262
264
  Requires-Dist: google-api-python-client>=2.100.0; extra == "google"
265
+ Requires-Dist: google-cloud-monitoring>=2.20.0; extra == "google"
266
+ Requires-Dist: google-cloud-trace>=1.13.0; extra == "google"
267
+ Requires-Dist: opentelemetry-exporter-gcp-trace>=1.7.0; extra == "google"
268
+ Requires-Dist: opentelemetry-exporter-gcp-monitoring>=1.7.0a0; extra == "google"
263
269
  Provides-Extra: strands
264
270
  Requires-Dist: strands-agents>=1.40.0; extra == "strands"
265
271
  Provides-Extra: langchain
@@ -269,6 +275,7 @@ Requires-Dist: langchain-openai>=0.2.0; extra == "langchain"
269
275
  Requires-Dist: langchain-anthropic>=0.2.0; extra == "langchain"
270
276
  Provides-Extra: pocketflow
271
277
  Requires-Dist: pocketflow==0.0.3; extra == "pocketflow"
278
+ Provides-Extra: browser
272
279
  Provides-Extra: all
273
280
  Requires-Dist: decodingtrust-agent-sdk[claude,google,langchain,openai,pocketflow,strands]; extra == "all"
274
281
  Provides-Extra: dev
@@ -311,8 +318,18 @@ Website: <https://decodingtrust-agent.com>
311
318
 
312
319
  ---
313
320
 
321
+ ## Evaluation Results & Trajectories
322
+
323
+ We have publicly released the full evaluation results, including the complete red-teaming trajectories for every evaluated agent, along with the verifiable judge output for each task that records each agent's pass/fail status:
324
+
325
+ - **Dataset and trajectories:** <https://huggingface.co/datasets/AI-Secure/DTap-Bench-Agent-Trajectories>
326
+ - **Benchmark website:** <https://decodingtrust-agent.com/benchmark>
327
+
328
+ ---
329
+
314
330
  ## Table of Contents
315
331
 
332
+ - [Evaluation Results & Trajectories](#evaluation-results--trajectories)
316
333
  - [Installation](#installation)
317
334
  - [Quick Start](#quick-start)
318
335
  - [Running the Default Benchmark](#running-the-default-benchmark)
@@ -330,14 +347,57 @@ Website: <https://decodingtrust-agent.com>
330
347
 
331
348
  ## Installation
332
349
 
350
+ ### Option A — from PyPI (recommended for users)
351
+
352
+ ```bash
353
+ pip install decodingtrust-agent-sdk # core (includes the browser domain deps)
354
+ # …plus the backend(s) you actually use (see "Agent backends" below):
355
+ pip install "decodingtrust-agent-sdk[openai]" # OpenAI Agents SDK
356
+ pip install "decodingtrust-agent-sdk[google]" # Google ADK / Gemini
357
+ ```
358
+
359
+ This installs the `dtap` CLI. Use it instead of `python eval/evaluation.py`, and select
360
+ benchmark tasks with `--domain`:
361
+
362
+ ```bash
363
+ dtap eval --domain crm --task-type benign --agent-type openaisdk --model gpt-5.4 --max-parallel 4
364
+ ```
365
+
366
+ On first run, the per-task dataset is auto-downloaded from HuggingFace — **only for the
367
+ domain(s) you request**. Set `HF_TOKEN` to avoid unauthenticated rate-limiting (HTTP 429):
368
+
369
+ ```bash
370
+ export HF_TOKEN=hf_...
371
+ ```
372
+
373
+ ### Option B — from source (for development)
374
+
333
375
  ```bash
334
376
  git clone https://github.com/AI-secure/DecodingTrust-Agent.git
335
377
  cd DecodingTrust-Agent
336
- pip install -r requirements.txt
337
- pip install -e .
378
+ pip install -e ".[openai]" # or [all] for every backend
379
+ # (here `python eval/evaluation.py --task-list benchmark/...` also works)
338
380
  ```
339
381
 
340
- Set the API key for your backbone model (only the providers you actually use are required):
382
+ ### Agent backends (optional extras)
383
+
384
+ Install only the framework you evaluate with:
385
+
386
+ | Extra | Backend (`--agent-type`) |
387
+ |---|---|
388
+ | `openai` | `openaisdk` |
389
+ | `claude` | `claudesdk` |
390
+ | `google` | `googleadk` |
391
+ | `langchain` | `langchain` |
392
+ | `strands` | `strands` |
393
+ | `pocketflow` | `pocketflow` |
394
+ | `all` | every backend above |
395
+
396
+ (The `browser` domain needs no extra — its Playwright deps are part of the core install.)
397
+
398
+ ### Model keys & Docker
399
+
400
+ Set the API key for your backbone model (only the providers you use):
341
401
 
342
402
  ```bash
343
403
  export OPENAI_API_KEY=sk-...
@@ -347,6 +407,10 @@ export GOOGLE_API_KEY=...
347
407
 
348
408
  Docker is required: each task spins up isolated MCP servers and Docker-based environments through `TaskExecutor`.
349
409
 
410
+ > **Browser domain note:** browser tasks send full-page screenshots (large image-token
411
+ > input). With vision models on a metered tier, start at `--max-parallel 2` to avoid
412
+ > provider token-rate limits (HTTP 429), then raise it if your quota allows.
413
+
350
414
  ---
351
415
 
352
416
  ## Quick Start
@@ -357,7 +421,7 @@ A single benign CRM task with the OpenAI Agents SDK backbone:
357
421
  python eval/evaluation.py \
358
422
  --task-list benchmark/crm/benign.jsonl \
359
423
  --agent-type openaisdk \
360
- --model gpt-4o \
424
+ --model gpt-5.4 \
361
425
  --max-parallel 4
362
426
  ```
363
427
 
@@ -395,7 +459,7 @@ Run every benign + direct + indirect task in a domain by pointing `--task-list`
395
459
  python eval/evaluation.py \
396
460
  --task-list benchmark/finance \
397
461
  --agent-type openaisdk \
398
- --model gpt-4o \
462
+ --model gpt-5.4 \
399
463
  --max-parallel 8
400
464
  ```
401
465
 
@@ -405,7 +469,7 @@ python eval/evaluation.py \
405
469
 
406
470
  ```bash
407
471
  # Benign utility only
408
- python eval/evaluation.py --task-list benchmark/crm/benign.jsonl --agent-type openaisdk --model gpt-4o
472
+ python eval/evaluation.py --task-list benchmark/crm/benign.jsonl --agent-type openaisdk --model gpt-5.4
409
473
 
410
474
  # Direct prompt injection only
411
475
  python eval/evaluation.py --task-list benchmark/crm/direct.jsonl --agent-type claudesdk --model claude-sonnet-4-20250514
@@ -422,7 +486,7 @@ python eval/evaluation.py \
422
486
  --task-type malicious \
423
487
  --threat-model indirect \
424
488
  --risk-category data-exfiltration \
425
- --agent-type openaisdk --model gpt-4o
489
+ --agent-type openaisdk --model gpt-5.4
426
490
  ```
427
491
 
428
492
  ### The entire benchmark
@@ -433,7 +497,7 @@ Point `--task-list` at the top-level [`benchmark/`](benchmark/) directory to run
433
497
  python eval/evaluation.py \
434
498
  --task-list benchmark \
435
499
  --agent-type openaisdk \
436
- --model gpt-4o \
500
+ --model gpt-5.4 \
437
501
  --max-parallel 16 \
438
502
  --skip-existing
439
503
  ```
@@ -468,7 +532,7 @@ Any JSONL file with the schema below is a valid `--task-list`. Pick a subset of
468
532
  Run it like any built-in task list:
469
533
 
470
534
  ```bash
471
- python eval/evaluation.py --task-list my_subset.jsonl --agent-type openaisdk --model gpt-4o --max-parallel 4
535
+ python eval/evaluation.py --task-list my_subset.jsonl --agent-type openaisdk --model gpt-5.4 --max-parallel 4
472
536
  ```
473
537
 
474
538
  A few practical patterns:
@@ -476,11 +540,11 @@ A few practical patterns:
476
540
  ```bash
477
541
  # Curate from an existing file
478
542
  grep '"risk_category": "data-exfiltration"' benchmark/crm/indirect.jsonl > my_crm_exfil.jsonl
479
- python eval/evaluation.py --task-list my_crm_exfil.jsonl --agent-type openaisdk --model gpt-4o
543
+ python eval/evaluation.py --task-list my_crm_exfil.jsonl --agent-type openaisdk --model gpt-5.4
480
544
 
481
545
  # Try just one task end-to-end
482
546
  echo '{"domain": "crm", "type": "benign", "task_id": "1"}' > one_task.jsonl
483
- python eval/evaluation.py --task-list one_task.jsonl --agent-type openaisdk --model gpt-4o
547
+ python eval/evaluation.py --task-list one_task.jsonl --agent-type openaisdk --model gpt-5.4
484
548
  ```
485
549
 
486
550
  For per-task internals (`config.yaml`, `judge.py`, `setup.sh`), see [docs/quickstart.md](docs/quickstart.md).
@@ -498,7 +562,7 @@ For per-task internals (`config.yaml`, `judge.py`, `setup.sh`), see [docs/quicks
498
562
  | `--risk-category` | `None` | e.g. `data-exfiltration` |
499
563
  | `--max-parallel` | `5` | Concurrent tasks (environments are reused across tasks) |
500
564
  | `--agent-type` | `openaisdk` | `openaisdk`, `claudesdk`, `googleadk`, `langchain`, `pocketflow`, `openclaw` |
501
- | `--model` | `gpt-4o` | Backbone model identifier |
565
+ | `--model` | `gpt-5.4` | Backbone model identifier |
502
566
  | `--temperature` | `None` | Sampling temperature (model default if unset) |
503
567
  | `--port-range` | `None` | Dynamic MCP port range, e.g. `"10000-12000"` |
504
568
  | `--direct-prompt` | off | For direct threat model, use the malicious goal as-is instead of replaying attack turns |
@@ -548,13 +612,13 @@ async def main():
548
612
  native = OpenAIAgent(
549
613
  name="MyAgent",
550
614
  instructions="You are a helpful CRM assistant.",
551
- model="gpt-4o",
615
+ model="gpt-5.4",
552
616
  mcp_servers=[my_custom_server],
553
617
  )
554
618
 
555
619
  # 2. Load the benchmark task config (adds salesforce, gmail, etc.)
556
620
  agent_cfg = AgentConfig.from_yaml("dataset/crm/benign/1/config.yaml")
557
- runtime_cfg = RuntimeConfig(model="gpt-4o", temperature=0.1, max_turns=200,
621
+ runtime_cfg = RuntimeConfig(model="gpt-5.4", temperature=0.1, max_turns=200,
558
622
  output_dir="./results")
559
623
 
560
624
  # 3. Wrap — auto-detects OpenAI SDK / LangChain / Claude SDK / Google ADK
@@ -30,8 +30,18 @@ Website: <https://decodingtrust-agent.com>
30
30
 
31
31
  ---
32
32
 
33
+ ## Evaluation Results & Trajectories
34
+
35
+ We have publicly released the full evaluation results, including the complete red-teaming trajectories for every evaluated agent, along with the verifiable judge output for each task that records each agent's pass/fail status:
36
+
37
+ - **Dataset and trajectories:** <https://huggingface.co/datasets/AI-Secure/DTap-Bench-Agent-Trajectories>
38
+ - **Benchmark website:** <https://decodingtrust-agent.com/benchmark>
39
+
40
+ ---
41
+
33
42
  ## Table of Contents
34
43
 
44
+ - [Evaluation Results & Trajectories](#evaluation-results--trajectories)
35
45
  - [Installation](#installation)
36
46
  - [Quick Start](#quick-start)
37
47
  - [Running the Default Benchmark](#running-the-default-benchmark)
@@ -49,14 +59,57 @@ Website: <https://decodingtrust-agent.com>
49
59
 
50
60
  ## Installation
51
61
 
62
+ ### Option A — from PyPI (recommended for users)
63
+
64
+ ```bash
65
+ pip install decodingtrust-agent-sdk # core (includes the browser domain deps)
66
+ # …plus the backend(s) you actually use (see "Agent backends" below):
67
+ pip install "decodingtrust-agent-sdk[openai]" # OpenAI Agents SDK
68
+ pip install "decodingtrust-agent-sdk[google]" # Google ADK / Gemini
69
+ ```
70
+
71
+ This installs the `dtap` CLI. Use it instead of `python eval/evaluation.py`, and select
72
+ benchmark tasks with `--domain`:
73
+
74
+ ```bash
75
+ dtap eval --domain crm --task-type benign --agent-type openaisdk --model gpt-5.4 --max-parallel 4
76
+ ```
77
+
78
+ On first run, the per-task dataset is auto-downloaded from HuggingFace — **only for the
79
+ domain(s) you request**. Set `HF_TOKEN` to avoid unauthenticated rate-limiting (HTTP 429):
80
+
81
+ ```bash
82
+ export HF_TOKEN=hf_...
83
+ ```
84
+
85
+ ### Option B — from source (for development)
86
+
52
87
  ```bash
53
88
  git clone https://github.com/AI-secure/DecodingTrust-Agent.git
54
89
  cd DecodingTrust-Agent
55
- pip install -r requirements.txt
56
- pip install -e .
90
+ pip install -e ".[openai]" # or [all] for every backend
91
+ # (here `python eval/evaluation.py --task-list benchmark/...` also works)
57
92
  ```
58
93
 
59
- Set the API key for your backbone model (only the providers you actually use are required):
94
+ ### Agent backends (optional extras)
95
+
96
+ Install only the framework you evaluate with:
97
+
98
+ | Extra | Backend (`--agent-type`) |
99
+ |---|---|
100
+ | `openai` | `openaisdk` |
101
+ | `claude` | `claudesdk` |
102
+ | `google` | `googleadk` |
103
+ | `langchain` | `langchain` |
104
+ | `strands` | `strands` |
105
+ | `pocketflow` | `pocketflow` |
106
+ | `all` | every backend above |
107
+
108
+ (The `browser` domain needs no extra — its Playwright deps are part of the core install.)
109
+
110
+ ### Model keys & Docker
111
+
112
+ Set the API key for your backbone model (only the providers you use):
60
113
 
61
114
  ```bash
62
115
  export OPENAI_API_KEY=sk-...
@@ -66,6 +119,10 @@ export GOOGLE_API_KEY=...
66
119
 
67
120
  Docker is required: each task spins up isolated MCP servers and Docker-based environments through `TaskExecutor`.
68
121
 
122
+ > **Browser domain note:** browser tasks send full-page screenshots (large image-token
123
+ > input). With vision models on a metered tier, start at `--max-parallel 2` to avoid
124
+ > provider token-rate limits (HTTP 429), then raise it if your quota allows.
125
+
69
126
  ---
70
127
 
71
128
  ## Quick Start
@@ -76,7 +133,7 @@ A single benign CRM task with the OpenAI Agents SDK backbone:
76
133
  python eval/evaluation.py \
77
134
  --task-list benchmark/crm/benign.jsonl \
78
135
  --agent-type openaisdk \
79
- --model gpt-4o \
136
+ --model gpt-5.4 \
80
137
  --max-parallel 4
81
138
  ```
82
139
 
@@ -114,7 +171,7 @@ Run every benign + direct + indirect task in a domain by pointing `--task-list`
114
171
  python eval/evaluation.py \
115
172
  --task-list benchmark/finance \
116
173
  --agent-type openaisdk \
117
- --model gpt-4o \
174
+ --model gpt-5.4 \
118
175
  --max-parallel 8
119
176
  ```
120
177
 
@@ -124,7 +181,7 @@ python eval/evaluation.py \
124
181
 
125
182
  ```bash
126
183
  # Benign utility only
127
- python eval/evaluation.py --task-list benchmark/crm/benign.jsonl --agent-type openaisdk --model gpt-4o
184
+ python eval/evaluation.py --task-list benchmark/crm/benign.jsonl --agent-type openaisdk --model gpt-5.4
128
185
 
129
186
  # Direct prompt injection only
130
187
  python eval/evaluation.py --task-list benchmark/crm/direct.jsonl --agent-type claudesdk --model claude-sonnet-4-20250514
@@ -141,7 +198,7 @@ python eval/evaluation.py \
141
198
  --task-type malicious \
142
199
  --threat-model indirect \
143
200
  --risk-category data-exfiltration \
144
- --agent-type openaisdk --model gpt-4o
201
+ --agent-type openaisdk --model gpt-5.4
145
202
  ```
146
203
 
147
204
  ### The entire benchmark
@@ -152,7 +209,7 @@ Point `--task-list` at the top-level [`benchmark/`](benchmark/) directory to run
152
209
  python eval/evaluation.py \
153
210
  --task-list benchmark \
154
211
  --agent-type openaisdk \
155
- --model gpt-4o \
212
+ --model gpt-5.4 \
156
213
  --max-parallel 16 \
157
214
  --skip-existing
158
215
  ```
@@ -187,7 +244,7 @@ Any JSONL file with the schema below is a valid `--task-list`. Pick a subset of
187
244
  Run it like any built-in task list:
188
245
 
189
246
  ```bash
190
- python eval/evaluation.py --task-list my_subset.jsonl --agent-type openaisdk --model gpt-4o --max-parallel 4
247
+ python eval/evaluation.py --task-list my_subset.jsonl --agent-type openaisdk --model gpt-5.4 --max-parallel 4
191
248
  ```
192
249
 
193
250
  A few practical patterns:
@@ -195,11 +252,11 @@ A few practical patterns:
195
252
  ```bash
196
253
  # Curate from an existing file
197
254
  grep '"risk_category": "data-exfiltration"' benchmark/crm/indirect.jsonl > my_crm_exfil.jsonl
198
- python eval/evaluation.py --task-list my_crm_exfil.jsonl --agent-type openaisdk --model gpt-4o
255
+ python eval/evaluation.py --task-list my_crm_exfil.jsonl --agent-type openaisdk --model gpt-5.4
199
256
 
200
257
  # Try just one task end-to-end
201
258
  echo '{"domain": "crm", "type": "benign", "task_id": "1"}' > one_task.jsonl
202
- python eval/evaluation.py --task-list one_task.jsonl --agent-type openaisdk --model gpt-4o
259
+ python eval/evaluation.py --task-list one_task.jsonl --agent-type openaisdk --model gpt-5.4
203
260
  ```
204
261
 
205
262
  For per-task internals (`config.yaml`, `judge.py`, `setup.sh`), see [docs/quickstart.md](docs/quickstart.md).
@@ -217,7 +274,7 @@ For per-task internals (`config.yaml`, `judge.py`, `setup.sh`), see [docs/quicks
217
274
  | `--risk-category` | `None` | e.g. `data-exfiltration` |
218
275
  | `--max-parallel` | `5` | Concurrent tasks (environments are reused across tasks) |
219
276
  | `--agent-type` | `openaisdk` | `openaisdk`, `claudesdk`, `googleadk`, `langchain`, `pocketflow`, `openclaw` |
220
- | `--model` | `gpt-4o` | Backbone model identifier |
277
+ | `--model` | `gpt-5.4` | Backbone model identifier |
221
278
  | `--temperature` | `None` | Sampling temperature (model default if unset) |
222
279
  | `--port-range` | `None` | Dynamic MCP port range, e.g. `"10000-12000"` |
223
280
  | `--direct-prompt` | off | For direct threat model, use the malicious goal as-is instead of replaying attack turns |
@@ -267,13 +324,13 @@ async def main():
267
324
  native = OpenAIAgent(
268
325
  name="MyAgent",
269
326
  instructions="You are a helpful CRM assistant.",
270
- model="gpt-4o",
327
+ model="gpt-5.4",
271
328
  mcp_servers=[my_custom_server],
272
329
  )
273
330
 
274
331
  # 2. Load the benchmark task config (adds salesforce, gmail, etc.)
275
332
  agent_cfg = AgentConfig.from_yaml("dataset/crm/benign/1/config.yaml")
276
- runtime_cfg = RuntimeConfig(model="gpt-4o", temperature=0.1, max_turns=200,
333
+ runtime_cfg = RuntimeConfig(model="gpt-5.4", temperature=0.1, max_turns=200,
277
334
  output_dir="./results")
278
335
 
279
336
  # 3. Wrap — auto-detects OpenAI SDK / LangChain / Claude SDK / Google ADK
@@ -8,7 +8,7 @@ injection.
8
8
 
9
9
  Usage:
10
10
  python agent/hermes/example.py --config path/to/config.yaml
11
- python agent/hermes/example.py --config path/to/config.yaml --model openai/gpt-4o
11
+ python agent/hermes/example.py --config path/to/config.yaml --model openai/gpt-5.4
12
12
  python agent/hermes/example.py --config path/to/config.yaml --debug
13
13
 
14
14
  Prerequisites:
@@ -25,7 +25,7 @@ Request JSON schema::
25
25
  "base_url": "https://api.openai.com/v1", # may be ""
26
26
  "api_key": "sk-...", # may be null
27
27
  "provider": "openai", # may be null
28
- "model": "gpt-4o",
28
+ "model": "gpt-5.4",
29
29
  "max_turns": 30,
30
30
  "system_prompt": "You are ...", # may be null
31
31
  "enabled_toolsets": ["mcp-salesforce"], # MCP-only restriction
@@ -46,8 +46,8 @@ Examples:
46
46
  parser.add_argument(
47
47
  "--model",
48
48
  type=str,
49
- default="gpt-4o",
50
- help="Model to use (default: gpt-4o)"
49
+ default="gpt-5.4",
50
+ help="Model to use (default: gpt-5.4)"
51
51
  )
52
52
  parser.add_argument(
53
53
  "--temperature",
@@ -49,8 +49,8 @@ Examples:
49
49
  parser.add_argument(
50
50
  "--model",
51
51
  type=str,
52
- default="gpt-4o",
53
- help="Model to use (default: gpt-4o)"
52
+ default="gpt-5.4",
53
+ help="Model to use (default: gpt-5.4)"
54
54
  )
55
55
  parser.add_argument(
56
56
  "--temperature",
@@ -48,8 +48,8 @@ Examples:
48
48
  parser.add_argument(
49
49
  "--model",
50
50
  type=str,
51
- default="gpt-4o",
52
- help="Model to use (default: gpt-4o)"
51
+ default="gpt-5.4",
52
+ help="Model to use (default: gpt-5.4)"
53
53
  )
54
54
  parser.add_argument(
55
55
  "--temperature",
@@ -39,7 +39,7 @@ async def main():
39
39
  parser.add_argument(
40
40
  "--model",
41
41
  type=str,
42
- default="gpt-4o",
42
+ default="gpt-5.4",
43
43
  help="Model to use"
44
44
  )
45
45
 
@@ -184,7 +184,7 @@ async def main() -> None:
184
184
  agent = build_agent(
185
185
  native_agent=native,
186
186
  agent_cfg=AgentConfig(system_prompt=""),
187
- runtime_cfg=RuntimeConfig(model="gpt-4o", max_turns=10),
187
+ runtime_cfg=RuntimeConfig(model="gpt-5.4", max_turns=10),
188
188
  )
189
189
  async with agent:
190
190
  result = await agent.run("Say hello.", metadata={{"task_id": "smoke"}})
@@ -207,7 +207,7 @@ from .agent import {class_name}
207
207
  async def main() -> None:
208
208
  agent = {class_name}(
209
209
  agent_config=AgentConfig(system_prompt=""),
210
- runtime_config=RuntimeConfig(model="gpt-4o", max_turns=10),
210
+ runtime_config=RuntimeConfig(model="gpt-5.4", max_turns=10),
211
211
  )
212
212
  async with agent:
213
213
  result = await agent.run("Say hello.", metadata={{"task_id": "smoke"}})