decodingtrust-agent-sdk 0.2.8__tar.gz → 0.2.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {decodingtrust_agent_sdk-0.2.8/decodingtrust_agent_sdk.egg-info → decodingtrust_agent_sdk-0.2.10}/PKG-INFO +79 -15
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/README.md +71 -14
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/example.py +1 -1
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/src/hermes_runner.py +1 -1
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/langchain/example.py +2 -2
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/example.py +2 -2
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/example.py +2 -2
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/strands/example.py +1 -1
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/cli/scaffold.py +2 -2
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10/decodingtrust_agent_sdk.egg-info}/PKG-INFO +79 -15
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/decodingtrust_agent_sdk.egg-info/requires.txt +8 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/config/mcp.yaml +7 -1
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/browser/main.py +16 -3
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/agent.py +2 -2
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/eval/evaluation.py +31 -11
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/eval/task_runner.py +2 -2
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/pyproject.toml +19 -1
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/__init__.py +6 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/agent_helpers.py +47 -63
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/config.py +57 -7
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/judge_helpers.py +28 -11
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/LICENSE +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/MANIFEST.in +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/example.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/src/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/src/agent.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/src/mcp_proxy.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/claudesdk/src/utils.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/example.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/src/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/src/agent.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/src/mcp_wrapper.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/googleadk/src/utils.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/src/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/src/agent.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/src/mcp_proxy.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/src/utils.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/langchain/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/langchain/src/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/langchain/src/agent.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/langchain/src/utils.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/src/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/src/agent.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/src/agent_wrapper.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/src/mcp_wrapper.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openaisdk/src/utils.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/example.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/agent.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/helpers/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/helpers/auth_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/mcp_proxy.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/plugin_generator.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/openclaw/src/utils.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/prompts/react_agent.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/agent.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/agent_wrapper.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/async_helper.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/mcp_react_agent.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/native_agent.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/pocketflow/src/nodes.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/strands/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/strands/src/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/strands/src/agent.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/strands/src/utils.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/browser/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/browser/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/browser/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/code/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/code/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/code/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/crm/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/crm/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/crm/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/customer-service/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/customer-service/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/customer-service/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/finance/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/finance/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/finance/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/legal/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/legal/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/legal/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/macos/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/macos/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/macos/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/medical/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/medical/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/medical/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/os-filesystem/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/os-filesystem/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/os-filesystem/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/research/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/research/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/research/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/telecom/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/telecom/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/telecom/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/travel/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/travel/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/travel/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/windows/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/windows/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/windows/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/workflow/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/workflow/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/benchmark/workflow/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/cli/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/cli/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/decodingtrust_agent_sdk.egg-info/SOURCES.txt +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/decodingtrust_agent_sdk.egg-info/dependency_links.txt +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/decodingtrust_agent_sdk.egg-info/entry_points.txt +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/decodingtrust_agent_sdk.egg-info/top_level.txt +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/a2a_agents/generic/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/a2a_agents/generic/agent.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/config/env.yaml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/config/injection_mcp.yaml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/arxiv/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/arxiv/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/atlassian/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/atlassian/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/bigquery/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/booking/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/calendar/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/calendar/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/custom-website/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/customer_service/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/databricks/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/databricks/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/ecommerce/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/ers/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/finance/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/gmail/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/gmail/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/google-form/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/google-form/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/googledocs/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/googledocs/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/hospital/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/hospital/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/legal/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/linkedin/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/macos/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/os-filesystem/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/os-filesystem/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/paypal/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/paypal/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/research/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/research/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/salesforce_crm/docker-compose.yaml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/slack/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/slack/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/snowflake/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/snowflake/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/telecom/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/telecom/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/telegram/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/telegram/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/terminal/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/terminal/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/travel/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/travel/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/whatsapp/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/whatsapp/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/dns_listener.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/exfil_listener.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/ftp_listener.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/reset_server.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/windows/validate.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/zoom/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/envs/zoom/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/atlassian/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/calendar/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/custom_website/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/customer_service/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/databricks/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/ecommerce/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/finance/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/github/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/gmail/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/google_form/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/googledocs/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/hospital/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/legal/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/macos/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/os-filesystem/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/paypal/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/research/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/salesforce/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/slack/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/snowflake/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/telecom/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/telegram/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/terminal/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/travel/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/whatsapp/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/windows/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/injection_mcp_server/zoom/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/atlassian/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/atlassian/test_server.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/bigquery/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/booking/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/calendar/example_multi_user.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/calendar/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/calendar/test_mcp.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/customer_service/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/databricks/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/databricks/probe.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/ers/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/core/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/core/data_loader.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/core/portfolio.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/evaluation/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/evaluation/evaluator.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/evaluation/logger.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/config.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/content.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/html.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/locations.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/methods.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/injection/presets.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/run_mcp.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/run_web.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/extractor.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/extractor_minimal.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/extractor_simple.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/injection_mcp.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/mcp.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/account.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/browsing.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/social.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/trading.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/tools/utility.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/server/web.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/finance/test_mcp_tools.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/github/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/gmail/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/google_form/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/googledocs/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/hospital/mcp_server.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/core/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/core/courtlistener_store.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/core/data_loader.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/core/document_store.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/core/matter_manager.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/scripts/collect_data.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/server/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/legal/server/mcp.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/macos/client_test.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/macos/mcp_server.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/os-filesystem/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/paypal/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/research/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/salesforce/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/slack/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/snowflake/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/snowflake/probe.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/telecom/mcp_client.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/telecom/mcp_server.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/telegram/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/terminal/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/travel/client_test.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/travel/mcp_server.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/whatsapp/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/windows/client_test.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/windows/mcp_server.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/mcp_server/zoom/main.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/hooks/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/hooks/audit_log.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/hooks/hooks.json +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/run_benign.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/attacks.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/environment.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/hooks.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/judge.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/red_teaming_trajectory.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/task.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/src/types/trajectory.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/atlassian/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/atlassian/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/bigquery/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/bigquery/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/calendar/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/calendar/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/chase/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/chase/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/cs_env_client.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/abusive_comms.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/bulk_destructive.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/cross_customer.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/email_exfiltration.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/financial_fraud.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/llm_utils.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/pii_disclosure.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/policy_misrep.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/customer_service/judges/text_utils.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/databricks/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/databricks/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/finance/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/finance/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/github/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/github/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/gmail/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/gmail/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/google_form/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/google_form/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/legal/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/legal/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/macos/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/macos/env_setup.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/macos/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/os_filesystem/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/os_filesystem/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/paypal/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/paypal/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/port_allocator.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/research/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/research/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/robinhood/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/robinhood/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/salesforce/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/salesforce/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/slack/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/slack/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/snowflake/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/snowflake/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/telecom/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/telecom/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/telegram/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/telegram/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/terminal/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/terminal/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/travel/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/travel/env_client.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/travel/llm_judge.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/travel/prompts.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/utils/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/whatsapp/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/whatsapp/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/windows/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/windows/env_reset.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/windows/env_setup.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/windows/exfil_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/windows/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/zoom/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/dt_arena/utils/zoom/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/eval/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/eval/_ui.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/setup.cfg +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/a2a_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/agent_wrapper.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/compose_utils.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/env_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/eval_stats.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/injection_a2a_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/injection_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/injection_mcp_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/judge_utils.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/llm.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/logging.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/mcp_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/mcp_manager.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/memory_guard.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/red_teaming_sandbox.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/reset_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/resource_manager.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/skill_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/task_executor.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/task_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/utils/template_helpers.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: decodingtrust-agent-sdk
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.10
|
|
4
4
|
Summary: DecodingTrust Agent Platform (DTap) — A controllable and interactive red-teaming platform for AI agents
|
|
5
5
|
Author-email: DTap Team <zhaorun@uchicago.edu>
|
|
6
6
|
License: Apache License
|
|
@@ -245,6 +245,8 @@ Requires-Dist: rich>=13.0.0
|
|
|
245
245
|
Requires-Dist: pandas>=2.0.0
|
|
246
246
|
Requires-Dist: psutil>=5.9.0
|
|
247
247
|
Requires-Dist: huggingface_hub>=0.20.0
|
|
248
|
+
Requires-Dist: playwright>=1.53
|
|
249
|
+
Requires-Dist: pillow>=10.0.0
|
|
248
250
|
Provides-Extra: openai
|
|
249
251
|
Requires-Dist: openai>=2.6.1; extra == "openai"
|
|
250
252
|
Requires-Dist: openai-agents>=0.8.4; extra == "openai"
|
|
@@ -260,6 +262,10 @@ Requires-Dist: google-generativeai>=0.3.0; extra == "google"
|
|
|
260
262
|
Requires-Dist: google-genai>=1.0.0; extra == "google"
|
|
261
263
|
Requires-Dist: google-api-core>=2.28.0; extra == "google"
|
|
262
264
|
Requires-Dist: google-api-python-client>=2.100.0; extra == "google"
|
|
265
|
+
Requires-Dist: google-cloud-monitoring>=2.20.0; extra == "google"
|
|
266
|
+
Requires-Dist: google-cloud-trace>=1.13.0; extra == "google"
|
|
267
|
+
Requires-Dist: opentelemetry-exporter-gcp-trace>=1.7.0; extra == "google"
|
|
268
|
+
Requires-Dist: opentelemetry-exporter-gcp-monitoring>=1.7.0a0; extra == "google"
|
|
263
269
|
Provides-Extra: strands
|
|
264
270
|
Requires-Dist: strands-agents>=1.40.0; extra == "strands"
|
|
265
271
|
Provides-Extra: langchain
|
|
@@ -269,6 +275,7 @@ Requires-Dist: langchain-openai>=0.2.0; extra == "langchain"
|
|
|
269
275
|
Requires-Dist: langchain-anthropic>=0.2.0; extra == "langchain"
|
|
270
276
|
Provides-Extra: pocketflow
|
|
271
277
|
Requires-Dist: pocketflow==0.0.3; extra == "pocketflow"
|
|
278
|
+
Provides-Extra: browser
|
|
272
279
|
Provides-Extra: all
|
|
273
280
|
Requires-Dist: decodingtrust-agent-sdk[claude,google,langchain,openai,pocketflow,strands]; extra == "all"
|
|
274
281
|
Provides-Extra: dev
|
|
@@ -311,8 +318,18 @@ Website: <https://decodingtrust-agent.com>
|
|
|
311
318
|
|
|
312
319
|
---
|
|
313
320
|
|
|
321
|
+
## Evaluation Results & Trajectories
|
|
322
|
+
|
|
323
|
+
We have publicly released the full evaluation results, including the complete red-teaming trajectories for every evaluated agent, along with the verifiable judge output for each task that records each agent's pass/fail status:
|
|
324
|
+
|
|
325
|
+
- **Dataset and trajectories:** <https://huggingface.co/datasets/AI-Secure/DTap-Bench-Agent-Trajectories>
|
|
326
|
+
- **Benchmark website:** <https://decodingtrust-agent.com/benchmark>
|
|
327
|
+
|
|
328
|
+
---
|
|
329
|
+
|
|
314
330
|
## Table of Contents
|
|
315
331
|
|
|
332
|
+
- [Evaluation Results & Trajectories](#evaluation-results--trajectories)
|
|
316
333
|
- [Installation](#installation)
|
|
317
334
|
- [Quick Start](#quick-start)
|
|
318
335
|
- [Running the Default Benchmark](#running-the-default-benchmark)
|
|
@@ -330,14 +347,57 @@ Website: <https://decodingtrust-agent.com>
|
|
|
330
347
|
|
|
331
348
|
## Installation
|
|
332
349
|
|
|
350
|
+
### Option A — from PyPI (recommended for users)
|
|
351
|
+
|
|
352
|
+
```bash
|
|
353
|
+
pip install decodingtrust-agent-sdk # core (includes the browser domain deps)
|
|
354
|
+
# …plus the backend(s) you actually use (see "Agent backends" below):
|
|
355
|
+
pip install "decodingtrust-agent-sdk[openai]" # OpenAI Agents SDK
|
|
356
|
+
pip install "decodingtrust-agent-sdk[google]" # Google ADK / Gemini
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
This installs the `dtap` CLI. Use it instead of `python eval/evaluation.py`, and select
|
|
360
|
+
benchmark tasks with `--domain`:
|
|
361
|
+
|
|
362
|
+
```bash
|
|
363
|
+
dtap eval --domain crm --task-type benign --agent-type openaisdk --model gpt-5.4 --max-parallel 4
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
On first run, the per-task dataset is auto-downloaded from HuggingFace — **only for the
|
|
367
|
+
domain(s) you request**. Set `HF_TOKEN` to avoid unauthenticated rate-limiting (HTTP 429):
|
|
368
|
+
|
|
369
|
+
```bash
|
|
370
|
+
export HF_TOKEN=hf_...
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
### Option B — from source (for development)
|
|
374
|
+
|
|
333
375
|
```bash
|
|
334
376
|
git clone https://github.com/AI-secure/DecodingTrust-Agent.git
|
|
335
377
|
cd DecodingTrust-Agent
|
|
336
|
-
pip install -
|
|
337
|
-
|
|
378
|
+
pip install -e ".[openai]" # or [all] for every backend
|
|
379
|
+
# (here `python eval/evaluation.py --task-list benchmark/...` also works)
|
|
338
380
|
```
|
|
339
381
|
|
|
340
|
-
|
|
382
|
+
### Agent backends (optional extras)
|
|
383
|
+
|
|
384
|
+
Install only the framework you evaluate with:
|
|
385
|
+
|
|
386
|
+
| Extra | Backend (`--agent-type`) |
|
|
387
|
+
|---|---|
|
|
388
|
+
| `openai` | `openaisdk` |
|
|
389
|
+
| `claude` | `claudesdk` |
|
|
390
|
+
| `google` | `googleadk` |
|
|
391
|
+
| `langchain` | `langchain` |
|
|
392
|
+
| `strands` | `strands` |
|
|
393
|
+
| `pocketflow` | `pocketflow` |
|
|
394
|
+
| `all` | every backend above |
|
|
395
|
+
|
|
396
|
+
(The `browser` domain needs no extra — its Playwright deps are part of the core install.)
|
|
397
|
+
|
|
398
|
+
### Model keys & Docker
|
|
399
|
+
|
|
400
|
+
Set the API key for your backbone model (only the providers you use):
|
|
341
401
|
|
|
342
402
|
```bash
|
|
343
403
|
export OPENAI_API_KEY=sk-...
|
|
@@ -347,6 +407,10 @@ export GOOGLE_API_KEY=...
|
|
|
347
407
|
|
|
348
408
|
Docker is required: each task spins up isolated MCP servers and Docker-based environments through `TaskExecutor`.
|
|
349
409
|
|
|
410
|
+
> **Browser domain note:** browser tasks send full-page screenshots (large image-token
|
|
411
|
+
> input). With vision models on a metered tier, start at `--max-parallel 2` to avoid
|
|
412
|
+
> provider token-rate limits (HTTP 429), then raise it if your quota allows.
|
|
413
|
+
|
|
350
414
|
---
|
|
351
415
|
|
|
352
416
|
## Quick Start
|
|
@@ -357,7 +421,7 @@ A single benign CRM task with the OpenAI Agents SDK backbone:
|
|
|
357
421
|
python eval/evaluation.py \
|
|
358
422
|
--task-list benchmark/crm/benign.jsonl \
|
|
359
423
|
--agent-type openaisdk \
|
|
360
|
-
--model gpt-
|
|
424
|
+
--model gpt-5.4 \
|
|
361
425
|
--max-parallel 4
|
|
362
426
|
```
|
|
363
427
|
|
|
@@ -395,7 +459,7 @@ Run every benign + direct + indirect task in a domain by pointing `--task-list`
|
|
|
395
459
|
python eval/evaluation.py \
|
|
396
460
|
--task-list benchmark/finance \
|
|
397
461
|
--agent-type openaisdk \
|
|
398
|
-
--model gpt-
|
|
462
|
+
--model gpt-5.4 \
|
|
399
463
|
--max-parallel 8
|
|
400
464
|
```
|
|
401
465
|
|
|
@@ -405,7 +469,7 @@ python eval/evaluation.py \
|
|
|
405
469
|
|
|
406
470
|
```bash
|
|
407
471
|
# Benign utility only
|
|
408
|
-
python eval/evaluation.py --task-list benchmark/crm/benign.jsonl --agent-type openaisdk --model gpt-
|
|
472
|
+
python eval/evaluation.py --task-list benchmark/crm/benign.jsonl --agent-type openaisdk --model gpt-5.4
|
|
409
473
|
|
|
410
474
|
# Direct prompt injection only
|
|
411
475
|
python eval/evaluation.py --task-list benchmark/crm/direct.jsonl --agent-type claudesdk --model claude-sonnet-4-20250514
|
|
@@ -422,7 +486,7 @@ python eval/evaluation.py \
|
|
|
422
486
|
--task-type malicious \
|
|
423
487
|
--threat-model indirect \
|
|
424
488
|
--risk-category data-exfiltration \
|
|
425
|
-
--agent-type openaisdk --model gpt-
|
|
489
|
+
--agent-type openaisdk --model gpt-5.4
|
|
426
490
|
```
|
|
427
491
|
|
|
428
492
|
### The entire benchmark
|
|
@@ -433,7 +497,7 @@ Point `--task-list` at the top-level [`benchmark/`](benchmark/) directory to run
|
|
|
433
497
|
python eval/evaluation.py \
|
|
434
498
|
--task-list benchmark \
|
|
435
499
|
--agent-type openaisdk \
|
|
436
|
-
--model gpt-
|
|
500
|
+
--model gpt-5.4 \
|
|
437
501
|
--max-parallel 16 \
|
|
438
502
|
--skip-existing
|
|
439
503
|
```
|
|
@@ -468,7 +532,7 @@ Any JSONL file with the schema below is a valid `--task-list`. Pick a subset of
|
|
|
468
532
|
Run it like any built-in task list:
|
|
469
533
|
|
|
470
534
|
```bash
|
|
471
|
-
python eval/evaluation.py --task-list my_subset.jsonl --agent-type openaisdk --model gpt-
|
|
535
|
+
python eval/evaluation.py --task-list my_subset.jsonl --agent-type openaisdk --model gpt-5.4 --max-parallel 4
|
|
472
536
|
```
|
|
473
537
|
|
|
474
538
|
A few practical patterns:
|
|
@@ -476,11 +540,11 @@ A few practical patterns:
|
|
|
476
540
|
```bash
|
|
477
541
|
# Curate from an existing file
|
|
478
542
|
grep '"risk_category": "data-exfiltration"' benchmark/crm/indirect.jsonl > my_crm_exfil.jsonl
|
|
479
|
-
python eval/evaluation.py --task-list my_crm_exfil.jsonl --agent-type openaisdk --model gpt-
|
|
543
|
+
python eval/evaluation.py --task-list my_crm_exfil.jsonl --agent-type openaisdk --model gpt-5.4
|
|
480
544
|
|
|
481
545
|
# Try just one task end-to-end
|
|
482
546
|
echo '{"domain": "crm", "type": "benign", "task_id": "1"}' > one_task.jsonl
|
|
483
|
-
python eval/evaluation.py --task-list one_task.jsonl --agent-type openaisdk --model gpt-
|
|
547
|
+
python eval/evaluation.py --task-list one_task.jsonl --agent-type openaisdk --model gpt-5.4
|
|
484
548
|
```
|
|
485
549
|
|
|
486
550
|
For per-task internals (`config.yaml`, `judge.py`, `setup.sh`), see [docs/quickstart.md](docs/quickstart.md).
|
|
@@ -498,7 +562,7 @@ For per-task internals (`config.yaml`, `judge.py`, `setup.sh`), see [docs/quicks
|
|
|
498
562
|
| `--risk-category` | `None` | e.g. `data-exfiltration` |
|
|
499
563
|
| `--max-parallel` | `5` | Concurrent tasks (environments are reused across tasks) |
|
|
500
564
|
| `--agent-type` | `openaisdk` | `openaisdk`, `claudesdk`, `googleadk`, `langchain`, `pocketflow`, `openclaw` |
|
|
501
|
-
| `--model` | `gpt-
|
|
565
|
+
| `--model` | `gpt-5.4` | Backbone model identifier |
|
|
502
566
|
| `--temperature` | `None` | Sampling temperature (model default if unset) |
|
|
503
567
|
| `--port-range` | `None` | Dynamic MCP port range, e.g. `"10000-12000"` |
|
|
504
568
|
| `--direct-prompt` | off | For direct threat model, use the malicious goal as-is instead of replaying attack turns |
|
|
@@ -548,13 +612,13 @@ async def main():
|
|
|
548
612
|
native = OpenAIAgent(
|
|
549
613
|
name="MyAgent",
|
|
550
614
|
instructions="You are a helpful CRM assistant.",
|
|
551
|
-
model="gpt-
|
|
615
|
+
model="gpt-5.4",
|
|
552
616
|
mcp_servers=[my_custom_server],
|
|
553
617
|
)
|
|
554
618
|
|
|
555
619
|
# 2. Load the benchmark task config (adds salesforce, gmail, etc.)
|
|
556
620
|
agent_cfg = AgentConfig.from_yaml("dataset/crm/benign/1/config.yaml")
|
|
557
|
-
runtime_cfg = RuntimeConfig(model="gpt-
|
|
621
|
+
runtime_cfg = RuntimeConfig(model="gpt-5.4", temperature=0.1, max_turns=200,
|
|
558
622
|
output_dir="./results")
|
|
559
623
|
|
|
560
624
|
# 3. Wrap — auto-detects OpenAI SDK / LangChain / Claude SDK / Google ADK
|
|
@@ -30,8 +30,18 @@ Website: <https://decodingtrust-agent.com>
|
|
|
30
30
|
|
|
31
31
|
---
|
|
32
32
|
|
|
33
|
+
## Evaluation Results & Trajectories
|
|
34
|
+
|
|
35
|
+
We have publicly released the full evaluation results, including the complete red-teaming trajectories for every evaluated agent, along with the verifiable judge output for each task that records each agent's pass/fail status:
|
|
36
|
+
|
|
37
|
+
- **Dataset and trajectories:** <https://huggingface.co/datasets/AI-Secure/DTap-Bench-Agent-Trajectories>
|
|
38
|
+
- **Benchmark website:** <https://decodingtrust-agent.com/benchmark>
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
33
42
|
## Table of Contents
|
|
34
43
|
|
|
44
|
+
- [Evaluation Results & Trajectories](#evaluation-results--trajectories)
|
|
35
45
|
- [Installation](#installation)
|
|
36
46
|
- [Quick Start](#quick-start)
|
|
37
47
|
- [Running the Default Benchmark](#running-the-default-benchmark)
|
|
@@ -49,14 +59,57 @@ Website: <https://decodingtrust-agent.com>
|
|
|
49
59
|
|
|
50
60
|
## Installation
|
|
51
61
|
|
|
62
|
+
### Option A — from PyPI (recommended for users)
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pip install decodingtrust-agent-sdk # core (includes the browser domain deps)
|
|
66
|
+
# …plus the backend(s) you actually use (see "Agent backends" below):
|
|
67
|
+
pip install "decodingtrust-agent-sdk[openai]" # OpenAI Agents SDK
|
|
68
|
+
pip install "decodingtrust-agent-sdk[google]" # Google ADK / Gemini
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
This installs the `dtap` CLI. Use it instead of `python eval/evaluation.py`, and select
|
|
72
|
+
benchmark tasks with `--domain`:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
dtap eval --domain crm --task-type benign --agent-type openaisdk --model gpt-5.4 --max-parallel 4
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
On first run, the per-task dataset is auto-downloaded from HuggingFace — **only for the
|
|
79
|
+
domain(s) you request**. Set `HF_TOKEN` to avoid unauthenticated rate-limiting (HTTP 429):
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
export HF_TOKEN=hf_...
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Option B — from source (for development)
|
|
86
|
+
|
|
52
87
|
```bash
|
|
53
88
|
git clone https://github.com/AI-secure/DecodingTrust-Agent.git
|
|
54
89
|
cd DecodingTrust-Agent
|
|
55
|
-
pip install -
|
|
56
|
-
|
|
90
|
+
pip install -e ".[openai]" # or [all] for every backend
|
|
91
|
+
# (here `python eval/evaluation.py --task-list benchmark/...` also works)
|
|
57
92
|
```
|
|
58
93
|
|
|
59
|
-
|
|
94
|
+
### Agent backends (optional extras)
|
|
95
|
+
|
|
96
|
+
Install only the framework you evaluate with:
|
|
97
|
+
|
|
98
|
+
| Extra | Backend (`--agent-type`) |
|
|
99
|
+
|---|---|
|
|
100
|
+
| `openai` | `openaisdk` |
|
|
101
|
+
| `claude` | `claudesdk` |
|
|
102
|
+
| `google` | `googleadk` |
|
|
103
|
+
| `langchain` | `langchain` |
|
|
104
|
+
| `strands` | `strands` |
|
|
105
|
+
| `pocketflow` | `pocketflow` |
|
|
106
|
+
| `all` | every backend above |
|
|
107
|
+
|
|
108
|
+
(The `browser` domain needs no extra — its Playwright deps are part of the core install.)
|
|
109
|
+
|
|
110
|
+
### Model keys & Docker
|
|
111
|
+
|
|
112
|
+
Set the API key for your backbone model (only the providers you use):
|
|
60
113
|
|
|
61
114
|
```bash
|
|
62
115
|
export OPENAI_API_KEY=sk-...
|
|
@@ -66,6 +119,10 @@ export GOOGLE_API_KEY=...
|
|
|
66
119
|
|
|
67
120
|
Docker is required: each task spins up isolated MCP servers and Docker-based environments through `TaskExecutor`.
|
|
68
121
|
|
|
122
|
+
> **Browser domain note:** browser tasks send full-page screenshots (large image-token
|
|
123
|
+
> input). With vision models on a metered tier, start at `--max-parallel 2` to avoid
|
|
124
|
+
> provider token-rate limits (HTTP 429), then raise it if your quota allows.
|
|
125
|
+
|
|
69
126
|
---
|
|
70
127
|
|
|
71
128
|
## Quick Start
|
|
@@ -76,7 +133,7 @@ A single benign CRM task with the OpenAI Agents SDK backbone:
|
|
|
76
133
|
python eval/evaluation.py \
|
|
77
134
|
--task-list benchmark/crm/benign.jsonl \
|
|
78
135
|
--agent-type openaisdk \
|
|
79
|
-
--model gpt-
|
|
136
|
+
--model gpt-5.4 \
|
|
80
137
|
--max-parallel 4
|
|
81
138
|
```
|
|
82
139
|
|
|
@@ -114,7 +171,7 @@ Run every benign + direct + indirect task in a domain by pointing `--task-list`
|
|
|
114
171
|
python eval/evaluation.py \
|
|
115
172
|
--task-list benchmark/finance \
|
|
116
173
|
--agent-type openaisdk \
|
|
117
|
-
--model gpt-
|
|
174
|
+
--model gpt-5.4 \
|
|
118
175
|
--max-parallel 8
|
|
119
176
|
```
|
|
120
177
|
|
|
@@ -124,7 +181,7 @@ python eval/evaluation.py \
|
|
|
124
181
|
|
|
125
182
|
```bash
|
|
126
183
|
# Benign utility only
|
|
127
|
-
python eval/evaluation.py --task-list benchmark/crm/benign.jsonl --agent-type openaisdk --model gpt-
|
|
184
|
+
python eval/evaluation.py --task-list benchmark/crm/benign.jsonl --agent-type openaisdk --model gpt-5.4
|
|
128
185
|
|
|
129
186
|
# Direct prompt injection only
|
|
130
187
|
python eval/evaluation.py --task-list benchmark/crm/direct.jsonl --agent-type claudesdk --model claude-sonnet-4-20250514
|
|
@@ -141,7 +198,7 @@ python eval/evaluation.py \
|
|
|
141
198
|
--task-type malicious \
|
|
142
199
|
--threat-model indirect \
|
|
143
200
|
--risk-category data-exfiltration \
|
|
144
|
-
--agent-type openaisdk --model gpt-
|
|
201
|
+
--agent-type openaisdk --model gpt-5.4
|
|
145
202
|
```
|
|
146
203
|
|
|
147
204
|
### The entire benchmark
|
|
@@ -152,7 +209,7 @@ Point `--task-list` at the top-level [`benchmark/`](benchmark/) directory to run
|
|
|
152
209
|
python eval/evaluation.py \
|
|
153
210
|
--task-list benchmark \
|
|
154
211
|
--agent-type openaisdk \
|
|
155
|
-
--model gpt-
|
|
212
|
+
--model gpt-5.4 \
|
|
156
213
|
--max-parallel 16 \
|
|
157
214
|
--skip-existing
|
|
158
215
|
```
|
|
@@ -187,7 +244,7 @@ Any JSONL file with the schema below is a valid `--task-list`. Pick a subset of
|
|
|
187
244
|
Run it like any built-in task list:
|
|
188
245
|
|
|
189
246
|
```bash
|
|
190
|
-
python eval/evaluation.py --task-list my_subset.jsonl --agent-type openaisdk --model gpt-
|
|
247
|
+
python eval/evaluation.py --task-list my_subset.jsonl --agent-type openaisdk --model gpt-5.4 --max-parallel 4
|
|
191
248
|
```
|
|
192
249
|
|
|
193
250
|
A few practical patterns:
|
|
@@ -195,11 +252,11 @@ A few practical patterns:
|
|
|
195
252
|
```bash
|
|
196
253
|
# Curate from an existing file
|
|
197
254
|
grep '"risk_category": "data-exfiltration"' benchmark/crm/indirect.jsonl > my_crm_exfil.jsonl
|
|
198
|
-
python eval/evaluation.py --task-list my_crm_exfil.jsonl --agent-type openaisdk --model gpt-
|
|
255
|
+
python eval/evaluation.py --task-list my_crm_exfil.jsonl --agent-type openaisdk --model gpt-5.4
|
|
199
256
|
|
|
200
257
|
# Try just one task end-to-end
|
|
201
258
|
echo '{"domain": "crm", "type": "benign", "task_id": "1"}' > one_task.jsonl
|
|
202
|
-
python eval/evaluation.py --task-list one_task.jsonl --agent-type openaisdk --model gpt-
|
|
259
|
+
python eval/evaluation.py --task-list one_task.jsonl --agent-type openaisdk --model gpt-5.4
|
|
203
260
|
```
|
|
204
261
|
|
|
205
262
|
For per-task internals (`config.yaml`, `judge.py`, `setup.sh`), see [docs/quickstart.md](docs/quickstart.md).
|
|
@@ -217,7 +274,7 @@ For per-task internals (`config.yaml`, `judge.py`, `setup.sh`), see [docs/quicks
|
|
|
217
274
|
| `--risk-category` | `None` | e.g. `data-exfiltration` |
|
|
218
275
|
| `--max-parallel` | `5` | Concurrent tasks (environments are reused across tasks) |
|
|
219
276
|
| `--agent-type` | `openaisdk` | `openaisdk`, `claudesdk`, `googleadk`, `langchain`, `pocketflow`, `openclaw` |
|
|
220
|
-
| `--model` | `gpt-
|
|
277
|
+
| `--model` | `gpt-5.4` | Backbone model identifier |
|
|
221
278
|
| `--temperature` | `None` | Sampling temperature (model default if unset) |
|
|
222
279
|
| `--port-range` | `None` | Dynamic MCP port range, e.g. `"10000-12000"` |
|
|
223
280
|
| `--direct-prompt` | off | For direct threat model, use the malicious goal as-is instead of replaying attack turns |
|
|
@@ -267,13 +324,13 @@ async def main():
|
|
|
267
324
|
native = OpenAIAgent(
|
|
268
325
|
name="MyAgent",
|
|
269
326
|
instructions="You are a helpful CRM assistant.",
|
|
270
|
-
model="gpt-
|
|
327
|
+
model="gpt-5.4",
|
|
271
328
|
mcp_servers=[my_custom_server],
|
|
272
329
|
)
|
|
273
330
|
|
|
274
331
|
# 2. Load the benchmark task config (adds salesforce, gmail, etc.)
|
|
275
332
|
agent_cfg = AgentConfig.from_yaml("dataset/crm/benign/1/config.yaml")
|
|
276
|
-
runtime_cfg = RuntimeConfig(model="gpt-
|
|
333
|
+
runtime_cfg = RuntimeConfig(model="gpt-5.4", temperature=0.1, max_turns=200,
|
|
277
334
|
output_dir="./results")
|
|
278
335
|
|
|
279
336
|
# 3. Wrap — auto-detects OpenAI SDK / LangChain / Claude SDK / Google ADK
|
|
@@ -8,7 +8,7 @@ injection.
|
|
|
8
8
|
|
|
9
9
|
Usage:
|
|
10
10
|
python agent/hermes/example.py --config path/to/config.yaml
|
|
11
|
-
python agent/hermes/example.py --config path/to/config.yaml --model openai/gpt-
|
|
11
|
+
python agent/hermes/example.py --config path/to/config.yaml --model openai/gpt-5.4
|
|
12
12
|
python agent/hermes/example.py --config path/to/config.yaml --debug
|
|
13
13
|
|
|
14
14
|
Prerequisites:
|
{decodingtrust_agent_sdk-0.2.8 → decodingtrust_agent_sdk-0.2.10}/agent/hermes/src/hermes_runner.py
RENAMED
|
@@ -25,7 +25,7 @@ Request JSON schema::
|
|
|
25
25
|
"base_url": "https://api.openai.com/v1", # may be ""
|
|
26
26
|
"api_key": "sk-...", # may be null
|
|
27
27
|
"provider": "openai", # may be null
|
|
28
|
-
"model": "gpt-
|
|
28
|
+
"model": "gpt-5.4",
|
|
29
29
|
"max_turns": 30,
|
|
30
30
|
"system_prompt": "You are ...", # may be null
|
|
31
31
|
"enabled_toolsets": ["mcp-salesforce"], # MCP-only restriction
|
|
@@ -184,7 +184,7 @@ async def main() -> None:
|
|
|
184
184
|
agent = build_agent(
|
|
185
185
|
native_agent=native,
|
|
186
186
|
agent_cfg=AgentConfig(system_prompt=""),
|
|
187
|
-
runtime_cfg=RuntimeConfig(model="gpt-
|
|
187
|
+
runtime_cfg=RuntimeConfig(model="gpt-5.4", max_turns=10),
|
|
188
188
|
)
|
|
189
189
|
async with agent:
|
|
190
190
|
result = await agent.run("Say hello.", metadata={{"task_id": "smoke"}})
|
|
@@ -207,7 +207,7 @@ from .agent import {class_name}
|
|
|
207
207
|
async def main() -> None:
|
|
208
208
|
agent = {class_name}(
|
|
209
209
|
agent_config=AgentConfig(system_prompt=""),
|
|
210
|
-
runtime_config=RuntimeConfig(model="gpt-
|
|
210
|
+
runtime_config=RuntimeConfig(model="gpt-5.4", max_turns=10),
|
|
211
211
|
)
|
|
212
212
|
async with agent:
|
|
213
213
|
result = await agent.run("Say hello.", metadata={{"task_id": "smoke"}})
|