decodingtrust-agent-sdk 0.1.1__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (381) hide show
  1. {decodingtrust_agent_sdk-0.1.1/decodingtrust_agent_sdk.egg-info → decodingtrust_agent_sdk-0.2.1}/PKG-INFO +1 -1
  2. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/cli/main.py +1 -1
  3. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1/decodingtrust_agent_sdk.egg-info}/PKG-INFO +1 -1
  4. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/decodingtrust_agent_sdk.egg-info/SOURCES.txt +1 -0
  5. decodingtrust_agent_sdk-0.2.1/eval/_ui.py +339 -0
  6. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/eval/evaluation.py +121 -24
  7. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/eval/task_runner.py +16 -0
  8. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/pyproject.toml +1 -1
  9. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/LICENSE +0 -0
  10. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/MANIFEST.in +0 -0
  11. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/README.md +0 -0
  12. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/__init__.py +0 -0
  13. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/claudesdk/__init__.py +0 -0
  14. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/claudesdk/example.py +0 -0
  15. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/claudesdk/src/__init__.py +0 -0
  16. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/claudesdk/src/agent.py +0 -0
  17. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/claudesdk/src/mcp_proxy.py +0 -0
  18. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/claudesdk/src/utils.py +0 -0
  19. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/googleadk/__init__.py +0 -0
  20. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/googleadk/example.py +0 -0
  21. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/googleadk/src/__init__.py +0 -0
  22. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/googleadk/src/agent.py +0 -0
  23. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/googleadk/src/mcp_wrapper.py +0 -0
  24. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/googleadk/src/utils.py +0 -0
  25. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/langchain/__init__.py +0 -0
  26. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/langchain/example.py +0 -0
  27. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/langchain/src/__init__.py +0 -0
  28. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/langchain/src/agent.py +0 -0
  29. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/langchain/src/utils.py +0 -0
  30. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openaisdk/__init__.py +0 -0
  31. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openaisdk/example.py +0 -0
  32. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openaisdk/src/__init__.py +0 -0
  33. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openaisdk/src/agent.py +0 -0
  34. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openaisdk/src/agent_wrapper.py +0 -0
  35. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openaisdk/src/mcp_wrapper.py +0 -0
  36. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openaisdk/src/utils.py +0 -0
  37. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/__init__.py +0 -0
  38. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/example.py +0 -0
  39. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/src/__init__.py +0 -0
  40. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/src/agent.py +0 -0
  41. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/src/helpers/__init__.py +0 -0
  42. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/src/helpers/auth_helpers.py +0 -0
  43. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/src/mcp_proxy.py +0 -0
  44. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/src/plugin_generator.py +0 -0
  45. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/src/utils.py +0 -0
  46. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/__init__.py +0 -0
  47. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/example.py +0 -0
  48. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/prompts/react_agent.py +0 -0
  49. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/src/__init__.py +0 -0
  50. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/src/agent.py +0 -0
  51. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/src/agent_wrapper.py +0 -0
  52. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/src/async_helper.py +0 -0
  53. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/src/mcp_react_agent.py +0 -0
  54. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/src/native_agent.py +0 -0
  55. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/src/nodes.py +0 -0
  56. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/__init__.py +0 -0
  57. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/browser/benign.jsonl +0 -0
  58. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/browser/direct.jsonl +0 -0
  59. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/browser/indirect.jsonl +0 -0
  60. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/code/benign.jsonl +0 -0
  61. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/code/direct.jsonl +0 -0
  62. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/code/indirect.jsonl +0 -0
  63. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/crm/benign.jsonl +0 -0
  64. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/crm/direct.jsonl +0 -0
  65. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/crm/indirect.jsonl +0 -0
  66. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/customer-service/benign.jsonl +0 -0
  67. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/customer-service/direct.jsonl +0 -0
  68. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/customer-service/indirect.jsonl +0 -0
  69. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/finance/benign.jsonl +0 -0
  70. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/finance/direct.jsonl +0 -0
  71. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/finance/indirect.jsonl +0 -0
  72. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/legal/benign.jsonl +0 -0
  73. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/legal/direct.jsonl +0 -0
  74. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/legal/indirect.jsonl +0 -0
  75. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/macos/benign.jsonl +0 -0
  76. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/macos/direct.jsonl +0 -0
  77. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/macos/indirect.jsonl +0 -0
  78. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/medical/benign.jsonl +0 -0
  79. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/medical/direct.jsonl +0 -0
  80. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/medical/indirect.jsonl +0 -0
  81. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/os-filesystem/benign.jsonl +0 -0
  82. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/os-filesystem/direct.jsonl +0 -0
  83. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/os-filesystem/indirect.jsonl +0 -0
  84. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/research/benign.jsonl +0 -0
  85. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/research/direct.jsonl +0 -0
  86. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/research/indirect.jsonl +0 -0
  87. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/telecom/benign.jsonl +0 -0
  88. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/telecom/direct.jsonl +0 -0
  89. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/telecom/indirect.jsonl +0 -0
  90. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/travel/benign.jsonl +0 -0
  91. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/travel/direct.jsonl +0 -0
  92. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/travel/indirect.jsonl +0 -0
  93. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/windows/benign.jsonl +0 -0
  94. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/windows/direct.jsonl +0 -0
  95. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/windows/indirect.jsonl +0 -0
  96. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/workflow/benign.jsonl +0 -0
  97. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/workflow/direct.jsonl +0 -0
  98. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/workflow/indirect.jsonl +0 -0
  99. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/cli/__init__.py +0 -0
  100. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/cli/scaffold.py +0 -0
  101. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/decodingtrust_agent_sdk.egg-info/dependency_links.txt +0 -0
  102. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/decodingtrust_agent_sdk.egg-info/entry_points.txt +0 -0
  103. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/decodingtrust_agent_sdk.egg-info/requires.txt +0 -0
  104. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/decodingtrust_agent_sdk.egg-info/top_level.txt +0 -0
  105. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/config/env.yaml +0 -0
  106. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/config/injection_mcp.yaml +0 -0
  107. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/config/mcp.yaml +0 -0
  108. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/arxiv/docker-compose-hub.yml +0 -0
  109. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/arxiv/docker-compose.yml +0 -0
  110. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/atlassian/docker/docker-compose.dev.yml +0 -0
  111. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/atlassian/docker/docker-compose.yml +0 -0
  112. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/atlassian/docker-compose-hub.yml +0 -0
  113. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/atlassian/docker-compose.yml +0 -0
  114. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/bigquery/docker-compose.yml +0 -0
  115. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/booking/docker-compose.yml +0 -0
  116. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/calendar/docker-compose-hub.yml +0 -0
  117. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/calendar/docker-compose.yml +0 -0
  118. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/custom-website/docker-compose.yml +0 -0
  119. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/customer_service/docker-compose.yml +0 -0
  120. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/databricks/docker-compose-hub.yml +0 -0
  121. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/databricks/docker-compose.yml +0 -0
  122. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/ecommerce/docker-compose.yml +0 -0
  123. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/ers/docker-compose.yml +0 -0
  124. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/ers/hrms/docker/docker-compose.yml +0 -0
  125. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/finance/docker-compose.yml +0 -0
  126. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/github/docker/docker-compose-hub.yml +0 -0
  127. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/github/docker/docker-compose.yml +0 -0
  128. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/gmail/docker-compose-hub.yml +0 -0
  129. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/gmail/docker-compose.yml +0 -0
  130. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/google-form/docker-compose-hub.yml +0 -0
  131. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/google-form/docker-compose.yml +0 -0
  132. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/googledocs/docker-compose-hub.yml +0 -0
  133. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/googledocs/docker-compose.yml +0 -0
  134. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/hospital/docker-compose-hub.yml +0 -0
  135. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/hospital/docker-compose.yml +0 -0
  136. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/legal/docker-compose.yml +0 -0
  137. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/linkedin/docker-compose.yml +0 -0
  138. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/macos/docker-compose.yml +0 -0
  139. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/os-filesystem/docker-compose-hub.yml +0 -0
  140. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/os-filesystem/docker-compose.yml +0 -0
  141. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/paypal/docker-compose-hub.yml +0 -0
  142. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/paypal/docker-compose.yml +0 -0
  143. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/research/docker-compose-hub.yml +0 -0
  144. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/research/docker-compose.yml +0 -0
  145. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +0 -0
  146. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/salesforce_crm/docker-compose.yaml +0 -0
  147. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/slack/docker-compose-hub.yml +0 -0
  148. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/slack/docker-compose.yml +0 -0
  149. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/snowflake/docker-compose-hub.yml +0 -0
  150. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/snowflake/docker-compose.yml +0 -0
  151. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/telecom/docker-compose-hub.yml +0 -0
  152. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/telecom/docker-compose.yml +0 -0
  153. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/telegram/docker-compose-hub.yml +0 -0
  154. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/telegram/docker-compose.yml +0 -0
  155. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/terminal/docker-compose-hub.yml +0 -0
  156. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/terminal/docker-compose.yml +0 -0
  157. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/travel/docker-compose-hub.yml +0 -0
  158. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/travel/docker-compose.yml +0 -0
  159. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/whatsapp/docker-compose-hub.yml +0 -0
  160. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/whatsapp/docker-compose.yml +0 -0
  161. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/windows/docker-compose.yml +0 -0
  162. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/zoom/docker-compose-hub.yml +0 -0
  163. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/zoom/docker-compose.yml +0 -0
  164. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/atlassian/env_injection.py +0 -0
  165. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/calendar/env_injection.py +0 -0
  166. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/custom_website/env_injection.py +0 -0
  167. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/customer_service/env_injection.py +0 -0
  168. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/databricks/env_injection.py +0 -0
  169. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/ecommerce/env_injection.py +0 -0
  170. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/finance/env_injection.py +0 -0
  171. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/github/env_injection.py +0 -0
  172. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/gmail/env_injection.py +0 -0
  173. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/google_form/env_injection.py +0 -0
  174. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/googledocs/env_injection.py +0 -0
  175. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/hospital/env_injection.py +0 -0
  176. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/legal/env_injection.py +0 -0
  177. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/macos/env_injection.py +0 -0
  178. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/os-filesystem/env_injection.py +0 -0
  179. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/paypal/env_injection.py +0 -0
  180. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/research/env_injection.py +0 -0
  181. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/salesforce/env_injection.py +0 -0
  182. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/slack/env_injection.py +0 -0
  183. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/snowflake/env_injection.py +0 -0
  184. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/telecom/env_injection.py +0 -0
  185. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/telegram/env_injection.py +0 -0
  186. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/terminal/env_injection.py +0 -0
  187. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/travel/env_injection.py +0 -0
  188. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/whatsapp/env_injection.py +0 -0
  189. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/windows/env_injection.py +0 -0
  190. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/zoom/env_injection.py +0 -0
  191. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/atlassian/main.py +0 -0
  192. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/atlassian/test_server.py +0 -0
  193. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/bigquery/main.py +0 -0
  194. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/booking/main.py +0 -0
  195. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/browser/main.py +0 -0
  196. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/calendar/example_multi_user.py +0 -0
  197. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/calendar/main.py +0 -0
  198. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/calendar/test_mcp.py +0 -0
  199. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/customer_service/main.py +0 -0
  200. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/databricks/main.py +0 -0
  201. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/databricks/probe.py +0 -0
  202. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/ers/main.py +0 -0
  203. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/__init__.py +0 -0
  204. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/core/__init__.py +0 -0
  205. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/core/data_loader.py +0 -0
  206. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/core/portfolio.py +0 -0
  207. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/evaluation/__init__.py +0 -0
  208. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/evaluation/evaluator.py +0 -0
  209. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/evaluation/logger.py +0 -0
  210. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/injection/__init__.py +0 -0
  211. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/injection/config.py +0 -0
  212. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/injection/content.py +0 -0
  213. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/injection/html.py +0 -0
  214. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/injection/locations.py +0 -0
  215. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/injection/methods.py +0 -0
  216. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/injection/presets.py +0 -0
  217. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/main.py +0 -0
  218. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/run_mcp.py +0 -0
  219. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/run_web.py +0 -0
  220. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/__init__.py +0 -0
  221. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/extractor.py +0 -0
  222. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/extractor_minimal.py +0 -0
  223. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/extractor_simple.py +0 -0
  224. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/injection_mcp.py +0 -0
  225. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/mcp.py +0 -0
  226. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/tools/__init__.py +0 -0
  227. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/tools/account.py +0 -0
  228. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/tools/browsing.py +0 -0
  229. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/tools/social.py +0 -0
  230. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/tools/trading.py +0 -0
  231. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/tools/utility.py +0 -0
  232. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/web.py +0 -0
  233. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +0 -0
  234. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +0 -0
  235. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +0 -0
  236. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +0 -0
  237. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +0 -0
  238. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +0 -0
  239. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +0 -0
  240. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/test_mcp_tools.py +0 -0
  241. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/github/main.py +0 -0
  242. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/gmail/main.py +0 -0
  243. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/google_form/main.py +0 -0
  244. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/googledocs/main.py +0 -0
  245. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/hospital/mcp_server.py +0 -0
  246. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/__init__.py +0 -0
  247. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/core/__init__.py +0 -0
  248. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/core/courtlistener_store.py +0 -0
  249. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/core/data_loader.py +0 -0
  250. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/core/document_store.py +0 -0
  251. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/core/matter_manager.py +0 -0
  252. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/main.py +0 -0
  253. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/scripts/collect_data.py +0 -0
  254. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/server/__init__.py +0 -0
  255. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/server/mcp.py +0 -0
  256. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/macos/client_test.py +0 -0
  257. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/macos/mcp_server.py +0 -0
  258. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/os-filesystem/main.py +0 -0
  259. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/paypal/main.py +0 -0
  260. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/research/main.py +0 -0
  261. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/salesforce/main.py +0 -0
  262. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/slack/main.py +0 -0
  263. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/snowflake/main.py +0 -0
  264. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/snowflake/probe.py +0 -0
  265. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/telecom/mcp_client.py +0 -0
  266. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/telecom/mcp_server.py +0 -0
  267. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/telegram/main.py +0 -0
  268. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/terminal/main.py +0 -0
  269. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/travel/client_test.py +0 -0
  270. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/travel/mcp_server.py +0 -0
  271. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/whatsapp/main.py +0 -0
  272. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/windows/client_test.py +0 -0
  273. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/windows/mcp_server.py +0 -0
  274. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/zoom/main.py +0 -0
  275. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/__init__.py +0 -0
  276. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/hooks/__init__.py +0 -0
  277. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/hooks/audit_log.py +0 -0
  278. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/hooks/hooks.json +0 -0
  279. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/run_benign.py +0 -0
  280. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/__init__.py +0 -0
  281. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/agent.py +0 -0
  282. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/attacks.py +0 -0
  283. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/environment.py +0 -0
  284. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/hooks.py +0 -0
  285. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/judge.py +0 -0
  286. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/red_teaming_trajectory.py +0 -0
  287. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/task.py +0 -0
  288. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/trajectory.py +0 -0
  289. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/__init__.py +0 -0
  290. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/atlassian/__init__.py +0 -0
  291. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/atlassian/helpers.py +0 -0
  292. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/bigquery/__init__.py +0 -0
  293. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/bigquery/helpers.py +0 -0
  294. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/calendar/__init__.py +0 -0
  295. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/calendar/helpers.py +0 -0
  296. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/__init__.py +0 -0
  297. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/cs_env_client.py +0 -0
  298. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/helpers.py +0 -0
  299. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/__init__.py +0 -0
  300. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/abusive_comms.py +0 -0
  301. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/bulk_destructive.py +0 -0
  302. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/cross_customer.py +0 -0
  303. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/email_exfiltration.py +0 -0
  304. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/financial_fraud.py +0 -0
  305. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/llm_utils.py +0 -0
  306. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/pii_disclosure.py +0 -0
  307. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/policy_misrep.py +0 -0
  308. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/text_utils.py +0 -0
  309. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/databricks/__init__.py +0 -0
  310. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/databricks/helpers.py +0 -0
  311. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/finance/__init__.py +0 -0
  312. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/finance/helpers.py +0 -0
  313. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/github/__init__.py +0 -0
  314. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/github/helpers.py +0 -0
  315. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/gmail/__init__.py +0 -0
  316. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/gmail/helpers.py +0 -0
  317. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/google_form/__init__.py +0 -0
  318. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/google_form/helpers.py +0 -0
  319. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/legal/__init__.py +0 -0
  320. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/legal/helpers.py +0 -0
  321. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/macos/__init__.py +0 -0
  322. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/macos/env_setup.py +0 -0
  323. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/macos/helpers.py +0 -0
  324. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/os_filesystem/__init__.py +0 -0
  325. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/os_filesystem/helpers.py +0 -0
  326. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/paypal/__init__.py +0 -0
  327. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/paypal/helpers.py +0 -0
  328. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/port_allocator.py +0 -0
  329. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/research/__init__.py +0 -0
  330. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/research/helpers.py +0 -0
  331. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/salesforce/__init__.py +0 -0
  332. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/salesforce/helpers.py +0 -0
  333. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/slack/__init__.py +0 -0
  334. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/slack/helpers.py +0 -0
  335. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/snowflake/__init__.py +0 -0
  336. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/snowflake/helpers.py +0 -0
  337. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/telecom/__init__.py +0 -0
  338. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/telecom/helpers.py +0 -0
  339. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/telegram/__init__.py +0 -0
  340. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/telegram/helpers.py +0 -0
  341. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/terminal/__init__.py +0 -0
  342. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/terminal/helpers.py +0 -0
  343. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/travel/__init__.py +0 -0
  344. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/travel/env_client.py +0 -0
  345. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/travel/llm_judge.py +0 -0
  346. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/travel/prompts.py +0 -0
  347. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/utils/__init__.py +0 -0
  348. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/whatsapp/__init__.py +0 -0
  349. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/whatsapp/helpers.py +0 -0
  350. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/windows/__init__.py +0 -0
  351. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/windows/env_reset.py +0 -0
  352. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/windows/env_setup.py +0 -0
  353. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/windows/exfil_helpers.py +0 -0
  354. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/windows/helpers.py +0 -0
  355. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/zoom/__init__.py +0 -0
  356. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/zoom/helpers.py +0 -0
  357. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/eval/__init__.py +0 -0
  358. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/setup.cfg +0 -0
  359. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/__init__.py +0 -0
  360. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/agent_helpers.py +0 -0
  361. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/agent_wrapper.py +0 -0
  362. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/compose_utils.py +0 -0
  363. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/config.py +0 -0
  364. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/env_helpers.py +0 -0
  365. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/eval_stats.py +0 -0
  366. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/injection_helpers.py +0 -0
  367. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/injection_mcp_helpers.py +0 -0
  368. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/judge_helpers.py +0 -0
  369. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/judge_utils.py +0 -0
  370. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/llm.py +0 -0
  371. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/logging.py +0 -0
  372. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/mcp_helpers.py +0 -0
  373. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/mcp_manager.py +0 -0
  374. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/memory_guard.py +0 -0
  375. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/red_teaming_sandbox.py +0 -0
  376. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/reset_helpers.py +0 -0
  377. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/resource_manager.py +0 -0
  378. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/skill_helpers.py +0 -0
  379. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/task_executor.py +0 -0
  380. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/task_helpers.py +0 -0
  381. {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/template_helpers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: decodingtrust-agent-sdk
3
- Version: 0.1.1
3
+ Version: 0.2.1
4
4
  Summary: DecodingTrust Agent Platform (DTap) — A controllable and interactive red-teaming platform for AI agents
5
5
  Author-email: DTap Team <zhaorun@uchicago.edu>
6
6
  License: Apache License
@@ -12,7 +12,7 @@ from .scaffold import SUPPORTED_FRAMEWORKS, scaffold
12
12
 
13
13
  app = typer.Typer(
14
14
  name="dtap",
15
- help="DecodingTrust Agent Platform: run red-teaming evaluations against AI agents.",
15
+ help="DecodingTrust-Agent Platform (DTap): A Controllable and Interactive Red-Teaming Platform for AI Agents",
16
16
  add_completion=False,
17
17
  no_args_is_help=True,
18
18
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: decodingtrust-agent-sdk
3
- Version: 0.1.1
3
+ Version: 0.2.1
4
4
  Summary: DecodingTrust Agent Platform (DTap) — A controllable and interactive red-teaming platform for AI agents
5
5
  Author-email: DTap Team <zhaorun@uchicago.edu>
6
6
  License: Apache License
@@ -351,6 +351,7 @@ dt_arena/utils/windows/helpers.py
351
351
  dt_arena/utils/zoom/__init__.py
352
352
  dt_arena/utils/zoom/helpers.py
353
353
  eval/__init__.py
354
+ eval/_ui.py
354
355
  eval/evaluation.py
355
356
  eval/task_runner.py
356
357
  utils/__init__.py
@@ -0,0 +1,339 @@
1
+ """Rich live-progress UI for `dtap eval`.
2
+
3
+ Off when --verbose is set or stdout is not a TTY. The UI consumes structured
4
+ `[DTAP_STATUS]` lines emitted by task_runner.py via stdout-pipe capture.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import shutil
9
+ import subprocess
10
+ import threading
11
+ import time
12
+ from collections import deque
13
+ from dataclasses import dataclass, field
14
+ from pathlib import Path
15
+ from typing import Dict, Iterable, List, Optional, Tuple
16
+
17
+ import psutil
18
+ from rich.console import Console, Group
19
+ from rich.layout import Layout
20
+ from rich.live import Live
21
+ from rich.panel import Panel
22
+ from rich.progress import BarColumn, Progress, TextColumn, TimeRemainingColumn
23
+ from rich.table import Table
24
+ from rich.text import Text
25
+
26
+
27
+ STATUS_PREFIX = "[DTAP_STATUS]"
28
+ _MAX_FAILURES_SHOWN = 5
29
+ _MAX_RUNNING_SHOWN = 12
30
+
31
+
32
+ # ─── Live state ──────────────────────────────────────────────────────────────
33
+
34
+ @dataclass
35
+ class TaskState:
36
+ task_id: str
37
+ domain: str
38
+ environments: Tuple[str, ...]
39
+ start_time: float
40
+ phase: str = "queued" # queued | loading | running | judging | done
41
+ turn: int = 0
42
+ total_turns: int = 0
43
+ log_path: Optional[Path] = None
44
+
45
+
46
+ @dataclass
47
+ class EvalUIState:
48
+ total_tasks: int
49
+ agent_type: str
50
+ model: str
51
+ max_parallel: int
52
+ filters: Dict[str, Optional[str]]
53
+ start_time: float = field(default_factory=time.monotonic)
54
+
55
+ running: Dict[str, TaskState] = field(default_factory=dict)
56
+ succeeded: int = 0
57
+ failed: int = 0
58
+ recent_failures: deque = field(default_factory=lambda: deque(maxlen=_MAX_FAILURES_SHOWN))
59
+ durations: List[float] = field(default_factory=list)
60
+
61
+ _lock: threading.Lock = field(default_factory=threading.Lock)
62
+
63
+ # mutation helpers — thread-safe (status lines arrive on background reader tasks)
64
+
65
+ def start_task(self, task_id: str, domain: str, environments: Iterable[str],
66
+ log_path: Optional[Path]) -> None:
67
+ with self._lock:
68
+ self.running[task_id] = TaskState(
69
+ task_id=task_id,
70
+ domain=domain,
71
+ environments=tuple(environments),
72
+ start_time=time.monotonic(),
73
+ log_path=log_path,
74
+ )
75
+
76
+ def update_status(self, task_id: str, fields: Dict[str, str]) -> None:
77
+ with self._lock:
78
+ st = self.running.get(task_id)
79
+ if st is None:
80
+ return
81
+ if "phase" in fields:
82
+ st.phase = fields["phase"]
83
+ if "turn" in fields:
84
+ st.turn = int(fields["turn"])
85
+ if "total_turns" in fields:
86
+ st.total_turns = int(fields["total_turns"])
87
+ if "turns" in fields and not st.total_turns:
88
+ st.total_turns = int(fields["turns"])
89
+
90
+ def finish_task(self, task_id: str, success: bool) -> None:
91
+ with self._lock:
92
+ st = self.running.pop(task_id, None)
93
+ elapsed = time.monotonic() - st.start_time if st else 0.0
94
+ self.durations.append(elapsed)
95
+ if success:
96
+ self.succeeded += 1
97
+ else:
98
+ self.failed += 1
99
+ if st:
100
+ self.recent_failures.append((task_id, st.log_path))
101
+
102
+ @property
103
+ def completed(self) -> int:
104
+ return self.succeeded + self.failed
105
+
106
+
107
+ # ─── Status-line parsing ─────────────────────────────────────────────────────
108
+
109
+ def parse_status_line(line: str) -> Optional[Dict[str, str]]:
110
+ """Parse a '[DTAP_STATUS] k=v k=v' line into a dict, or None if not a status."""
111
+ if not line.startswith(STATUS_PREFIX):
112
+ return None
113
+ body = line[len(STATUS_PREFIX):].strip()
114
+ fields: Dict[str, str] = {}
115
+ for tok in body.split():
116
+ if "=" in tok:
117
+ k, v = tok.split("=", 1)
118
+ fields[k] = v
119
+ return fields
120
+
121
+
122
+ # ─── Rendering ───────────────────────────────────────────────────────────────
123
+
124
+ def _format_duration(secs: float) -> str:
125
+ secs = max(0, int(secs))
126
+ h, rem = divmod(secs, 3600)
127
+ m, s = divmod(rem, 60)
128
+ if h:
129
+ return f"{h:d}:{m:02d}:{s:02d}"
130
+ return f"{m:02d}:{s:02d}"
131
+
132
+
133
+ def _docker_container_count() -> Optional[int]:
134
+ if shutil.which("docker") is None:
135
+ return None
136
+ try:
137
+ out = subprocess.run(
138
+ ["docker", "ps", "--quiet"],
139
+ capture_output=True, text=True, timeout=2,
140
+ )
141
+ if out.returncode != 0:
142
+ return None
143
+ return sum(1 for line in out.stdout.splitlines() if line.strip())
144
+ except Exception:
145
+ return None
146
+
147
+
148
+ def _phase_chip(phase: str, turn: int, total_turns: int) -> Text:
149
+ color = {
150
+ "queued": "dim",
151
+ "loading": "yellow",
152
+ "running": "cyan",
153
+ "judging": "magenta",
154
+ "done": "green",
155
+ }.get(phase, "white")
156
+ label = phase
157
+ if phase == "running" and total_turns:
158
+ label = f"turn {turn or 1}/{total_turns}"
159
+ return Text(f"[{label}]", style=color)
160
+
161
+
162
+ def _header_panel(state: EvalUIState) -> Panel:
163
+ filter_str = " · ".join(f"{k}={v}" for k, v in state.filters.items() if v)
164
+ line = Text.assemble(
165
+ (state.agent_type, "bold cyan"),
166
+ " · ",
167
+ (state.model, "bold"),
168
+ f" · max_parallel={state.max_parallel}",
169
+ )
170
+ if filter_str:
171
+ line.append(" · ")
172
+ line.append(filter_str, style="dim")
173
+ return Panel(line, title="DTap evaluation", title_align="left", border_style="cyan")
174
+
175
+
176
+ def _progress_panel(state: EvalUIState, progress: Progress, task_id: int) -> Panel:
177
+ progress.update(task_id, completed=state.completed)
178
+ elapsed = time.monotonic() - state.start_time
179
+ avg = (sum(state.durations) / len(state.durations)) if state.durations else 0.0
180
+ summary = Text.assemble(
181
+ ("Succeeded: ", "bold"),
182
+ (str(state.succeeded), "green"),
183
+ " Failed: ",
184
+ (str(state.failed), "red" if state.failed else "dim"),
185
+ " Avg: ",
186
+ (f"{avg:0.1f}s", "cyan"),
187
+ " Elapsed: ",
188
+ (_format_duration(elapsed), "cyan"),
189
+ )
190
+ return Panel(Group(progress, summary), title="Progress", title_align="left", border_style="white")
191
+
192
+
193
+ def _running_panel(state: EvalUIState) -> Panel:
194
+ with state._lock:
195
+ items = list(state.running.values())
196
+ items.sort(key=lambda t: t.start_time)
197
+ table = Table.grid(padding=(0, 1))
198
+ table.add_column(width=3, no_wrap=True)
199
+ table.add_column(no_wrap=True)
200
+ table.add_column(no_wrap=True)
201
+ table.add_column(no_wrap=True, justify="right")
202
+
203
+ now = time.monotonic()
204
+ shown = items[:_MAX_RUNNING_SHOWN]
205
+ for st in shown:
206
+ envs = ",".join(st.environments) or "—"
207
+ table.add_row(
208
+ "▸",
209
+ Text(f"{st.domain}/{st.task_id}", style="bold"),
210
+ _phase_chip(st.phase, st.turn, st.total_turns),
211
+ Text(_format_duration(now - st.start_time), style="dim"),
212
+ )
213
+ if len(items) > _MAX_RUNNING_SHOWN:
214
+ table.add_row("", Text(f"... +{len(items) - _MAX_RUNNING_SHOWN} more", style="dim"), "", "")
215
+ if not items:
216
+ table.add_row("", Text("(idle)", style="dim"), "", "")
217
+ return Panel(
218
+ table,
219
+ title=f"Running ({len(items)}/{state.max_parallel})",
220
+ title_align="left",
221
+ border_style="cyan",
222
+ )
223
+
224
+
225
+ def _resources_panel(state: EvalUIState, port_provider=None) -> Panel:
226
+ cpu = psutil.cpu_percent(interval=None)
227
+ vm = psutil.virtual_memory()
228
+ mem_used_gb = (vm.total - vm.available) / (1024 ** 3)
229
+ mem_total_gb = vm.total / (1024 ** 3)
230
+
231
+ containers = _docker_container_count()
232
+ docker_str = f"{containers}" if containers is not None else "n/a"
233
+
234
+ ports = port_provider() if port_provider else []
235
+ if len(ports) > 12:
236
+ port_str = ", ".join(str(p) for p in sorted(ports)[:12]) + f", … (+{len(ports) - 12})"
237
+ else:
238
+ port_str = ", ".join(str(p) for p in sorted(ports)) if ports else "—"
239
+
240
+ line = Text.assemble(
241
+ ("Docker: ", "bold"), (f"{docker_str} containers ", "white"),
242
+ ("CPU: ", "bold"), (f"{cpu:0.0f}% ", "white"),
243
+ ("Mem: ", "bold"), (f"{mem_used_gb:0.1f}/{mem_total_gb:0.0f} GB", "white"),
244
+ )
245
+ ports_line = Text.assemble(("Ports in use: ", "bold"), (port_str, "dim"))
246
+ return Panel(Group(line, ports_line), title="Resources", title_align="left", border_style="magenta")
247
+
248
+
249
+ def _failures_panel(state: EvalUIState) -> Optional[Panel]:
250
+ with state._lock:
251
+ failures = list(state.recent_failures)
252
+ if not failures:
253
+ return None
254
+ table = Table.grid(padding=(0, 1))
255
+ table.add_column(no_wrap=True)
256
+ table.add_column(no_wrap=False)
257
+ for task_id, log in failures:
258
+ log_str = str(log) if log else "(no log)"
259
+ table.add_row(Text(task_id, style="red"), Text(log_str, style="dim"))
260
+ return Panel(table, title="Recent failures", title_align="left", border_style="red")
261
+
262
+
263
+ # ─── LiveProgress controller ─────────────────────────────────────────────────
264
+
265
+ class LiveProgress:
266
+ """Owns the Rich Live layout and exposes mutation hooks for the eval loop."""
267
+
268
+ def __init__(
269
+ self,
270
+ *,
271
+ total_tasks: int,
272
+ agent_type: str,
273
+ model: str,
274
+ max_parallel: int,
275
+ filters: Dict[str, Optional[str]],
276
+ port_provider=None,
277
+ console: Optional[Console] = None,
278
+ ) -> None:
279
+ self.state = EvalUIState(
280
+ total_tasks=total_tasks,
281
+ agent_type=agent_type,
282
+ model=model,
283
+ max_parallel=max_parallel,
284
+ filters=filters,
285
+ )
286
+ self._console = console or Console()
287
+ self._port_provider = port_provider
288
+ self._progress = Progress(
289
+ TextColumn("[bold blue]{task.percentage:>5.1f}%"),
290
+ BarColumn(bar_width=None),
291
+ TextColumn("{task.completed}/{task.total}"),
292
+ TextColumn("ETA"),
293
+ TimeRemainingColumn(),
294
+ expand=True,
295
+ )
296
+ self._task_id = self._progress.add_task("overall", total=total_tasks)
297
+ self._live: Optional[Live] = None
298
+
299
+ def __enter__(self) -> "LiveProgress":
300
+ self._live = Live(
301
+ self._render(),
302
+ console=self._console,
303
+ refresh_per_second=4,
304
+ transient=False,
305
+ )
306
+ self._live.__enter__()
307
+ # Kick off a refresh thread so the elapsed-time / resources tick even
308
+ # when no events arrive.
309
+ self._stop = threading.Event()
310
+ self._refresher = threading.Thread(target=self._tick, daemon=True)
311
+ self._refresher.start()
312
+ return self
313
+
314
+ def __exit__(self, *exc) -> None:
315
+ self._stop.set()
316
+ self._refresher.join(timeout=2)
317
+ if self._live:
318
+ self._live.update(self._render())
319
+ self._live.__exit__(*exc)
320
+
321
+ def _tick(self) -> None:
322
+ while not self._stop.is_set():
323
+ if self._live is not None:
324
+ self._live.update(self._render())
325
+ self._stop.wait(0.25)
326
+
327
+ def _render(self) -> Layout:
328
+ layout = Layout()
329
+ sections = [
330
+ Layout(_header_panel(self.state), size=3, name="header"),
331
+ Layout(_progress_panel(self.state, self._progress, self._task_id), size=4, name="progress"),
332
+ Layout(_running_panel(self.state), name="running"),
333
+ Layout(_resources_panel(self.state, self._port_provider), size=4, name="resources"),
334
+ ]
335
+ failures = _failures_panel(self.state)
336
+ if failures is not None:
337
+ sections.insert(4, Layout(failures, size=min(len(self.state.recent_failures) + 2, 8), name="failures"))
338
+ layout.split_column(*sections)
339
+ return layout
@@ -4,7 +4,7 @@ import os
4
4
  import sys
5
5
  import time
6
6
  from pathlib import Path
7
- from typing import Dict, List, Tuple
7
+ from typing import Dict, List, Optional, Tuple
8
8
 
9
9
  from utils import (
10
10
  PROJECT_ROOT,
@@ -18,6 +18,8 @@ from utils.memory_guard import check_memory_before_launch
18
18
  from utils.task_executor import TaskExecutor, ScheduledTask, EnvInstance, get_task_environments
19
19
  from utils.eval_stats import TaskTiming, EvaluationStats
20
20
 
21
+ from eval._ui import LiveProgress, parse_status_line
22
+
21
23
 
22
24
  def _build_scheduled_tasks(task_specs: List[TaskSpec]) -> List[ScheduledTask]:
23
25
  """Convert TaskSpec list to ScheduledTask list with environment requirements."""
@@ -37,49 +39,65 @@ def _build_scheduled_tasks(task_specs: List[TaskSpec]) -> List[ScheduledTask]:
37
39
  return scheduled
38
40
 
39
41
 
42
+ def _short_task_id(task: ScheduledTask) -> str:
43
+ """Compact ID like 'finance/indirect/action_reversal/12' for the UI."""
44
+ if task.risk_category and task.task_id:
45
+ return f"{task.risk_category}/{task.task_id}"
46
+ return task.task_dir.name
47
+
48
+
49
+ def _task_log_path(args: argparse.Namespace, task: ScheduledTask) -> Path:
50
+ """Where to write the captured subprocess output for this task."""
51
+ output_root = Path(os.getenv("EVAL_RESULTS_ROOT", str(Path.cwd() / "results")))
52
+ dataset_path = extract_dataset_path(task.task_dir)
53
+ safe_model = args.model.replace("/", "_").replace(":", "_")
54
+ result_type = "direct_prompt" if args.direct_prompt else "benchmark"
55
+ log_dir = output_root / result_type / args.agent_type / safe_model / dataset_path
56
+ log_dir.mkdir(parents=True, exist_ok=True)
57
+ return log_dir / "task.log"
58
+
59
+
40
60
  async def _run_single_task_subprocess(
41
61
  task: ScheduledTask,
42
62
  instances: Dict[str, EnvInstance],
43
63
  args: argparse.Namespace,
44
64
  base_env: Dict[str, str],
65
+ ui: Optional[LiveProgress] = None,
45
66
  ) -> int:
46
- """
47
- Invoke task_runner.py as a subprocess for a single task.
67
+ """Invoke task_runner.py as a subprocess for one task.
68
+
69
+ One code path regardless of UI mode:
70
+ - subprocess stdout/stderr are always piped, written to <out>/task.log,
71
+ and scanned for [DTAP_STATUS] lines feeding the UI state.
72
+ - --verbose additionally echoes each captured line to the terminal.
48
73
 
49
- The Docker environments are already running (managed by TaskExecutor).
50
- We pass the port mappings via environment variables so task_runner
51
- can skip Docker startup and use the existing instances.
74
+ Docker environments are managed by TaskExecutor; we pass port mappings
75
+ via env vars so task_runner skips its own Docker startup.
52
76
  """
53
77
  env = base_env.copy()
78
+ env["PYTHONUNBUFFERED"] = "1" # keep per-line latency low
54
79
 
55
- # Ensure PYTHONPATH includes project root
80
+ # PYTHONPATH must include the project root for the subprocess
56
81
  pythonpath = env.get("PYTHONPATH", "")
57
82
  if str(PROJECT_ROOT) not in pythonpath:
58
83
  env["PYTHONPATH"] = f"{PROJECT_ROOT}:{pythonpath}" if pythonpath else str(PROJECT_ROOT)
59
84
 
60
85
  # Pass port mappings and project names from instances
61
- print(f"[DEBUG] Instances: {list(instances.keys())}", flush=True)
62
86
  for instance in instances.values():
63
- print(f"[DEBUG] Instance {instance.env_name} ports: {instance.ports}", flush=True)
64
87
  for var_name, port in instance.ports.items():
65
88
  env[var_name] = str(port)
66
- # Pass project name so env_seed.py can use it for docker commands
67
89
  env_name_upper = instance.env_name.upper().replace("-", "_")
68
90
  env[f"{env_name_upper}_PROJECT_NAME"] = instance.project_name
69
91
 
70
- # Port range for MCP servers (allocated per-task)
71
92
  if args.port_range:
72
93
  env["DT_PORT_RANGE"] = args.port_range
73
94
 
74
95
  cmd = [
75
96
  sys.executable,
76
97
  str(TASK_RUNNER_PATH),
77
- "--task-dir",
78
- str(task.task_dir),
79
- "--agent-type",
80
- args.agent_type,
81
- "--model",
82
- args.model,
98
+ "--task-dir", str(task.task_dir),
99
+ "--agent-type", args.agent_type,
100
+ "--model", args.model,
83
101
  ]
84
102
  if args.skip_mcp:
85
103
  cmd.append("--skip-mcp")
@@ -94,14 +112,54 @@ async def _run_single_task_subprocess(
94
112
  if args.disallowed_tools:
95
113
  cmd.extend(["--disallowed-tools"] + args.disallowed_tools)
96
114
 
97
- print(f"[EVAL] Starting task: {task.task_dir} (envs={list(task.environments)})", flush=True)
115
+ short_id = _short_task_id(task)
116
+ log_path = _task_log_path(args, task)
117
+
118
+ if ui is not None:
119
+ ui.state.start_task(
120
+ task_id=short_id,
121
+ domain=task.domain or "?",
122
+ environments=task.environments,
123
+ log_path=log_path,
124
+ )
125
+ if args.verbose:
126
+ sys.stdout.write(
127
+ f"[EVAL] {short_id} envs={list(task.environments)} log={log_path}\n"
128
+ )
129
+ sys.stdout.flush()
130
+
98
131
  proc = await asyncio.create_subprocess_exec(
99
132
  *cmd,
100
133
  cwd=str(PROJECT_ROOT),
101
134
  env=env,
135
+ stdout=asyncio.subprocess.PIPE,
136
+ stderr=asyncio.subprocess.STDOUT, # merge so the log has the full timeline
102
137
  )
138
+ assert proc.stdout is not None
139
+
140
+ with log_path.open("wb") as logf:
141
+ async for line in proc.stdout:
142
+ logf.write(line)
143
+ logf.flush()
144
+
145
+ text = line.decode(errors="replace").rstrip()
146
+
147
+ # Status lines drive the UI state regardless of display mode.
148
+ fields = parse_status_line(text)
149
+ if fields and ui is not None:
150
+ ui.state.update_status(short_id, fields)
151
+
152
+ # Verbose mode echoes raw output to the terminal.
153
+ if args.verbose:
154
+ sys.stdout.write(text + "\n")
155
+ sys.stdout.flush()
156
+
103
157
  rc = await proc.wait()
104
- print(f"[EVAL] Finished task: {task.task_dir} (rc={rc})")
158
+ if ui is not None:
159
+ ui.state.finish_task(short_id, success=(rc == 0))
160
+ if args.verbose:
161
+ sys.stdout.write(f"[EVAL] {short_id} finished (rc={rc})\n")
162
+ sys.stdout.flush()
105
163
  return rc
106
164
 
107
165
 
@@ -223,6 +281,31 @@ async def _run_all_tasks(args: argparse.Namespace) -> int:
223
281
  base_env = os.environ.copy()
224
282
  task_timings: Dict[str, TaskTiming] = {}
225
283
 
284
+ # Decide between Rich live UI vs raw passthrough.
285
+ # Live UI is the default; --verbose or non-TTY falls back to legacy logs.
286
+ use_live_ui = (not args.verbose) and sys.stdout.isatty()
287
+
288
+ def _ports_in_use() -> list:
289
+ try:
290
+ ports: set = set()
291
+ for inst in getattr(executor, "_all_instances", {}).values():
292
+ for p in (inst.ports or {}).values():
293
+ ports.add(int(p))
294
+ return sorted(ports)
295
+ except Exception:
296
+ return []
297
+
298
+ ui_ctx: Optional[LiveProgress] = None
299
+ if use_live_ui:
300
+ ui_ctx = LiveProgress(
301
+ total_tasks=len(scheduled_tasks),
302
+ agent_type=args.agent_type,
303
+ model=args.model,
304
+ max_parallel=args.max_parallel,
305
+ filters=filter_items,
306
+ port_provider=_ports_in_use,
307
+ )
308
+
226
309
  async def run_task(task: ScheduledTask, instances: Dict[str, EnvInstance]) -> int:
227
310
  """Wrapper to run a task and track timing."""
228
311
  task_name = task.task_dir.name
@@ -230,11 +313,12 @@ async def _run_all_tasks(args: argparse.Namespace) -> int:
230
313
  timing.start_time = time.time()
231
314
 
232
315
  try:
233
- rc = await _run_single_task_subprocess(task, instances, args, base_env)
316
+ rc = await _run_single_task_subprocess(task, instances, args, base_env, ui=ui_ctx)
234
317
  timing.success = (rc == 0)
235
318
  return rc
236
319
  except Exception as e:
237
- print(f"[EVAL] Error running task {task_name}: {e}", flush=True)
320
+ if not use_live_ui:
321
+ print(f"[EVAL] Error running task {task_name}: {e}", flush=True)
238
322
  timing.success = False
239
323
  return 1
240
324
  finally:
@@ -243,9 +327,13 @@ async def _run_all_tasks(args: argparse.Namespace) -> int:
243
327
  stats.task_timings.append(timing)
244
328
 
245
329
  try:
246
- # Run all tasks
247
- print(f"\n[EVAL] Starting evaluation...", flush=True)
248
- results = await executor.run_all(scheduled_tasks, run_task)
330
+ if not use_live_ui:
331
+ print(f"\n[EVAL] Starting evaluation...", flush=True)
332
+ if ui_ctx is not None:
333
+ with ui_ctx:
334
+ results = await executor.run_all(scheduled_tasks, run_task)
335
+ else:
336
+ results = await executor.run_all(scheduled_tasks, run_task)
249
337
 
250
338
  finally:
251
339
  # Record end time
@@ -387,6 +475,15 @@ def main() -> None:
387
475
  action="store_true",
388
476
  help="Enable debug mode to save extra info like tool descriptions in trajectory.",
389
477
  )
478
+ parser.add_argument(
479
+ "--verbose",
480
+ action="store_true",
481
+ help=(
482
+ "Stream raw task_runner stdout/stderr to the terminal. "
483
+ "Without this, dtap eval shows a live progress UI and writes "
484
+ "per-task logs to <results>/.../task.log."
485
+ ),
486
+ )
390
487
  parser.add_argument(
391
488
  "--direct-prompt",
392
489
  action="store_true",
@@ -35,6 +35,16 @@ from dt_arena.src.types.agent import AgentConfig, RuntimeConfig
35
35
  from dt_arena.src.types.task import AttackConfig, TaskConfig
36
36
 
37
37
 
38
+ def _status(**fields: object) -> None:
39
+ """Emit a machine-readable status line consumed by the dtap eval progress UI.
40
+
41
+ Format: [DTAP_STATUS] key1=val1 key2=val2 ...
42
+ Always flushes so the parent gets near-real-time updates.
43
+ """
44
+ parts = " ".join(f"{k}={v}" for k, v in fields.items())
45
+ print(f"[DTAP_STATUS] {parts}", flush=True)
46
+
47
+
38
48
  async def run_single_task(
39
49
  task_dir: Path,
40
50
  *,
@@ -68,6 +78,7 @@ async def run_single_task(
68
78
  print(f"[ERROR] Configuration file not found: {config_path}")
69
79
  return 1
70
80
 
81
+ _status(phase="loading")
71
82
  print(f"[INFO] Loading agent config from: {config_path}")
72
83
  print("-" * 80)
73
84
 
@@ -241,6 +252,7 @@ async def run_single_task(
241
252
  print(attack_cfg.malicious_goal.strip())
242
253
  print("-" * 80)
243
254
 
255
+ _status(phase="running", turns=len(user_instruction) if isinstance(user_instruction, list) else 1)
244
256
  # Run agent with per-turn env injection support
245
257
  async with agent:
246
258
  metadata = {
@@ -281,6 +293,7 @@ async def run_single_task(
281
293
  print(f" Error: {inj_result['error']}")
282
294
 
283
295
  # Run agent for this turn
296
+ _status(turn=turn_id, total_turns=len(instructions))
284
297
  print(f"\n[TURN {turn_id}] Processing query...")
285
298
  try:
286
299
  result = await agent.run(turn_instruction, metadata=metadata)
@@ -315,6 +328,7 @@ async def run_single_task(
315
328
 
316
329
  # Run judge
317
330
  if not skip_judge:
331
+ _status(phase="judging")
318
332
  judge_result = run_judge(task_dir, response=final_output)
319
333
  print_judge_result(judge_result)
320
334
 
@@ -325,10 +339,12 @@ async def run_single_task(
325
339
  print(f"[INFO] Judge result saved to: {judge_output_path}")
326
340
 
327
341
  if judge_result.get("task_success") is False:
342
+ _status(phase="done", success=0)
328
343
  return 1
329
344
  else:
330
345
  print("[JUDGE] Skipped judge as requested.")
331
346
 
347
+ _status(phase="done", success=1)
332
348
  return 0
333
349
 
334
350
  except subprocess.CalledProcessError as e:
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "decodingtrust-agent-sdk"
7
- version = "0.1.1"
7
+ version = "0.2.1"
8
8
  description = "DecodingTrust Agent Platform (DTap) — A controllable and interactive red-teaming platform for AI agents"
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }