decodingtrust-agent-sdk 0.1.1__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {decodingtrust_agent_sdk-0.1.1/decodingtrust_agent_sdk.egg-info → decodingtrust_agent_sdk-0.2.1}/PKG-INFO +1 -1
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/cli/main.py +1 -1
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1/decodingtrust_agent_sdk.egg-info}/PKG-INFO +1 -1
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/decodingtrust_agent_sdk.egg-info/SOURCES.txt +1 -0
- decodingtrust_agent_sdk-0.2.1/eval/_ui.py +339 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/eval/evaluation.py +121 -24
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/eval/task_runner.py +16 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/pyproject.toml +1 -1
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/LICENSE +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/MANIFEST.in +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/README.md +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/claudesdk/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/claudesdk/example.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/claudesdk/src/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/claudesdk/src/agent.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/claudesdk/src/mcp_proxy.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/claudesdk/src/utils.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/googleadk/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/googleadk/example.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/googleadk/src/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/googleadk/src/agent.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/googleadk/src/mcp_wrapper.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/googleadk/src/utils.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/langchain/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/langchain/example.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/langchain/src/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/langchain/src/agent.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/langchain/src/utils.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openaisdk/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openaisdk/example.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openaisdk/src/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openaisdk/src/agent.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openaisdk/src/agent_wrapper.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openaisdk/src/mcp_wrapper.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openaisdk/src/utils.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/example.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/src/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/src/agent.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/src/helpers/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/src/helpers/auth_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/src/mcp_proxy.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/src/plugin_generator.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/openclaw/src/utils.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/example.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/prompts/react_agent.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/src/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/src/agent.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/src/agent_wrapper.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/src/async_helper.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/src/mcp_react_agent.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/src/native_agent.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/agent/pocketflow/src/nodes.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/browser/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/browser/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/browser/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/code/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/code/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/code/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/crm/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/crm/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/crm/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/customer-service/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/customer-service/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/customer-service/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/finance/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/finance/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/finance/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/legal/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/legal/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/legal/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/macos/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/macos/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/macos/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/medical/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/medical/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/medical/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/os-filesystem/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/os-filesystem/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/os-filesystem/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/research/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/research/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/research/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/telecom/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/telecom/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/telecom/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/travel/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/travel/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/travel/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/windows/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/windows/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/windows/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/workflow/benign.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/workflow/direct.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/benchmark/workflow/indirect.jsonl +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/cli/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/cli/scaffold.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/decodingtrust_agent_sdk.egg-info/dependency_links.txt +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/decodingtrust_agent_sdk.egg-info/entry_points.txt +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/decodingtrust_agent_sdk.egg-info/requires.txt +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/decodingtrust_agent_sdk.egg-info/top_level.txt +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/config/env.yaml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/config/injection_mcp.yaml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/config/mcp.yaml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/arxiv/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/arxiv/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/atlassian/docker/docker-compose.dev.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/atlassian/docker/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/atlassian/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/atlassian/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/bigquery/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/booking/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/calendar/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/calendar/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/custom-website/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/customer_service/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/databricks/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/databricks/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/ecommerce/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/ers/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/ers/hrms/docker/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/finance/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/github/docker/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/github/docker/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/gmail/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/gmail/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/google-form/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/google-form/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/googledocs/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/googledocs/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/hospital/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/hospital/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/legal/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/linkedin/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/macos/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/os-filesystem/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/os-filesystem/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/paypal/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/paypal/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/research/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/research/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/salesforce_crm/docker-compose.yaml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/slack/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/slack/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/snowflake/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/snowflake/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/telecom/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/telecom/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/telegram/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/telegram/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/terminal/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/terminal/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/travel/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/travel/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/whatsapp/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/whatsapp/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/windows/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/zoom/docker-compose-hub.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/envs/zoom/docker-compose.yml +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/atlassian/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/calendar/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/custom_website/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/customer_service/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/databricks/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/ecommerce/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/finance/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/github/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/gmail/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/google_form/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/googledocs/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/hospital/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/legal/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/macos/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/os-filesystem/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/paypal/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/research/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/salesforce/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/slack/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/snowflake/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/telecom/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/telegram/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/terminal/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/travel/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/whatsapp/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/windows/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/injection_mcp_server/zoom/env_injection.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/atlassian/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/atlassian/test_server.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/bigquery/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/booking/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/browser/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/calendar/example_multi_user.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/calendar/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/calendar/test_mcp.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/customer_service/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/databricks/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/databricks/probe.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/ers/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/core/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/core/data_loader.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/core/portfolio.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/evaluation/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/evaluation/evaluator.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/evaluation/logger.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/injection/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/injection/config.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/injection/content.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/injection/html.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/injection/locations.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/injection/methods.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/injection/presets.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/run_mcp.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/run_web.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/extractor.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/extractor_minimal.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/extractor_simple.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/injection_mcp.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/mcp.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/tools/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/tools/account.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/tools/browsing.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/tools/social.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/tools/trading.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/tools/utility.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/server/web.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/finance/test_mcp_tools.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/github/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/gmail/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/google_form/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/googledocs/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/hospital/mcp_server.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/core/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/core/courtlistener_store.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/core/data_loader.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/core/document_store.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/core/matter_manager.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/scripts/collect_data.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/server/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/legal/server/mcp.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/macos/client_test.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/macos/mcp_server.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/os-filesystem/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/paypal/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/research/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/salesforce/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/slack/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/snowflake/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/snowflake/probe.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/telecom/mcp_client.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/telecom/mcp_server.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/telegram/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/terminal/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/travel/client_test.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/travel/mcp_server.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/whatsapp/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/windows/client_test.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/windows/mcp_server.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/mcp_server/zoom/main.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/hooks/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/hooks/audit_log.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/hooks/hooks.json +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/run_benign.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/agent.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/attacks.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/environment.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/hooks.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/judge.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/red_teaming_trajectory.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/task.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/src/types/trajectory.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/atlassian/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/atlassian/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/bigquery/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/bigquery/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/calendar/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/calendar/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/cs_env_client.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/abusive_comms.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/bulk_destructive.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/cross_customer.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/email_exfiltration.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/financial_fraud.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/llm_utils.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/pii_disclosure.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/policy_misrep.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/customer_service/judges/text_utils.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/databricks/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/databricks/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/finance/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/finance/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/github/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/github/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/gmail/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/gmail/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/google_form/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/google_form/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/legal/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/legal/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/macos/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/macos/env_setup.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/macos/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/os_filesystem/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/os_filesystem/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/paypal/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/paypal/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/port_allocator.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/research/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/research/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/salesforce/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/salesforce/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/slack/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/slack/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/snowflake/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/snowflake/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/telecom/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/telecom/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/telegram/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/telegram/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/terminal/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/terminal/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/travel/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/travel/env_client.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/travel/llm_judge.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/travel/prompts.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/utils/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/whatsapp/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/whatsapp/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/windows/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/windows/env_reset.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/windows/env_setup.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/windows/exfil_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/windows/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/zoom/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/dt_arena/utils/zoom/helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/eval/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/setup.cfg +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/__init__.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/agent_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/agent_wrapper.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/compose_utils.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/config.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/env_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/eval_stats.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/injection_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/injection_mcp_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/judge_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/judge_utils.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/llm.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/logging.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/mcp_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/mcp_manager.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/memory_guard.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/red_teaming_sandbox.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/reset_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/resource_manager.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/skill_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/task_executor.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/task_helpers.py +0 -0
- {decodingtrust_agent_sdk-0.1.1 → decodingtrust_agent_sdk-0.2.1}/utils/template_helpers.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: decodingtrust-agent-sdk
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: DecodingTrust Agent Platform (DTap) — A controllable and interactive red-teaming platform for AI agents
|
|
5
5
|
Author-email: DTap Team <zhaorun@uchicago.edu>
|
|
6
6
|
License: Apache License
|
|
@@ -12,7 +12,7 @@ from .scaffold import SUPPORTED_FRAMEWORKS, scaffold
|
|
|
12
12
|
|
|
13
13
|
app = typer.Typer(
|
|
14
14
|
name="dtap",
|
|
15
|
-
help="DecodingTrust
|
|
15
|
+
help="DecodingTrust-Agent Platform (DTap): A Controllable and Interactive Red-Teaming Platform for AI Agents",
|
|
16
16
|
add_completion=False,
|
|
17
17
|
no_args_is_help=True,
|
|
18
18
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: decodingtrust-agent-sdk
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: DecodingTrust Agent Platform (DTap) — A controllable and interactive red-teaming platform for AI agents
|
|
5
5
|
Author-email: DTap Team <zhaorun@uchicago.edu>
|
|
6
6
|
License: Apache License
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
"""Rich live-progress UI for `dtap eval`.
|
|
2
|
+
|
|
3
|
+
Off when --verbose is set or stdout is not a TTY. The UI consumes structured
|
|
4
|
+
`[DTAP_STATUS]` lines emitted by task_runner.py via stdout-pipe capture.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import shutil
|
|
9
|
+
import subprocess
|
|
10
|
+
import threading
|
|
11
|
+
import time
|
|
12
|
+
from collections import deque
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Dict, Iterable, List, Optional, Tuple
|
|
16
|
+
|
|
17
|
+
import psutil
|
|
18
|
+
from rich.console import Console, Group
|
|
19
|
+
from rich.layout import Layout
|
|
20
|
+
from rich.live import Live
|
|
21
|
+
from rich.panel import Panel
|
|
22
|
+
from rich.progress import BarColumn, Progress, TextColumn, TimeRemainingColumn
|
|
23
|
+
from rich.table import Table
|
|
24
|
+
from rich.text import Text
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
STATUS_PREFIX = "[DTAP_STATUS]"
|
|
28
|
+
_MAX_FAILURES_SHOWN = 5
|
|
29
|
+
_MAX_RUNNING_SHOWN = 12
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ─── Live state ──────────────────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class TaskState:
|
|
36
|
+
task_id: str
|
|
37
|
+
domain: str
|
|
38
|
+
environments: Tuple[str, ...]
|
|
39
|
+
start_time: float
|
|
40
|
+
phase: str = "queued" # queued | loading | running | judging | done
|
|
41
|
+
turn: int = 0
|
|
42
|
+
total_turns: int = 0
|
|
43
|
+
log_path: Optional[Path] = None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class EvalUIState:
|
|
48
|
+
total_tasks: int
|
|
49
|
+
agent_type: str
|
|
50
|
+
model: str
|
|
51
|
+
max_parallel: int
|
|
52
|
+
filters: Dict[str, Optional[str]]
|
|
53
|
+
start_time: float = field(default_factory=time.monotonic)
|
|
54
|
+
|
|
55
|
+
running: Dict[str, TaskState] = field(default_factory=dict)
|
|
56
|
+
succeeded: int = 0
|
|
57
|
+
failed: int = 0
|
|
58
|
+
recent_failures: deque = field(default_factory=lambda: deque(maxlen=_MAX_FAILURES_SHOWN))
|
|
59
|
+
durations: List[float] = field(default_factory=list)
|
|
60
|
+
|
|
61
|
+
_lock: threading.Lock = field(default_factory=threading.Lock)
|
|
62
|
+
|
|
63
|
+
# mutation helpers — thread-safe (status lines arrive on background reader tasks)
|
|
64
|
+
|
|
65
|
+
def start_task(self, task_id: str, domain: str, environments: Iterable[str],
|
|
66
|
+
log_path: Optional[Path]) -> None:
|
|
67
|
+
with self._lock:
|
|
68
|
+
self.running[task_id] = TaskState(
|
|
69
|
+
task_id=task_id,
|
|
70
|
+
domain=domain,
|
|
71
|
+
environments=tuple(environments),
|
|
72
|
+
start_time=time.monotonic(),
|
|
73
|
+
log_path=log_path,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def update_status(self, task_id: str, fields: Dict[str, str]) -> None:
|
|
77
|
+
with self._lock:
|
|
78
|
+
st = self.running.get(task_id)
|
|
79
|
+
if st is None:
|
|
80
|
+
return
|
|
81
|
+
if "phase" in fields:
|
|
82
|
+
st.phase = fields["phase"]
|
|
83
|
+
if "turn" in fields:
|
|
84
|
+
st.turn = int(fields["turn"])
|
|
85
|
+
if "total_turns" in fields:
|
|
86
|
+
st.total_turns = int(fields["total_turns"])
|
|
87
|
+
if "turns" in fields and not st.total_turns:
|
|
88
|
+
st.total_turns = int(fields["turns"])
|
|
89
|
+
|
|
90
|
+
def finish_task(self, task_id: str, success: bool) -> None:
|
|
91
|
+
with self._lock:
|
|
92
|
+
st = self.running.pop(task_id, None)
|
|
93
|
+
elapsed = time.monotonic() - st.start_time if st else 0.0
|
|
94
|
+
self.durations.append(elapsed)
|
|
95
|
+
if success:
|
|
96
|
+
self.succeeded += 1
|
|
97
|
+
else:
|
|
98
|
+
self.failed += 1
|
|
99
|
+
if st:
|
|
100
|
+
self.recent_failures.append((task_id, st.log_path))
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def completed(self) -> int:
|
|
104
|
+
return self.succeeded + self.failed
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# ─── Status-line parsing ─────────────────────────────────────────────────────
|
|
108
|
+
|
|
109
|
+
def parse_status_line(line: str) -> Optional[Dict[str, str]]:
|
|
110
|
+
"""Parse a '[DTAP_STATUS] k=v k=v' line into a dict, or None if not a status."""
|
|
111
|
+
if not line.startswith(STATUS_PREFIX):
|
|
112
|
+
return None
|
|
113
|
+
body = line[len(STATUS_PREFIX):].strip()
|
|
114
|
+
fields: Dict[str, str] = {}
|
|
115
|
+
for tok in body.split():
|
|
116
|
+
if "=" in tok:
|
|
117
|
+
k, v = tok.split("=", 1)
|
|
118
|
+
fields[k] = v
|
|
119
|
+
return fields
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# ─── Rendering ───────────────────────────────────────────────────────────────
|
|
123
|
+
|
|
124
|
+
def _format_duration(secs: float) -> str:
|
|
125
|
+
secs = max(0, int(secs))
|
|
126
|
+
h, rem = divmod(secs, 3600)
|
|
127
|
+
m, s = divmod(rem, 60)
|
|
128
|
+
if h:
|
|
129
|
+
return f"{h:d}:{m:02d}:{s:02d}"
|
|
130
|
+
return f"{m:02d}:{s:02d}"
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _docker_container_count() -> Optional[int]:
|
|
134
|
+
if shutil.which("docker") is None:
|
|
135
|
+
return None
|
|
136
|
+
try:
|
|
137
|
+
out = subprocess.run(
|
|
138
|
+
["docker", "ps", "--quiet"],
|
|
139
|
+
capture_output=True, text=True, timeout=2,
|
|
140
|
+
)
|
|
141
|
+
if out.returncode != 0:
|
|
142
|
+
return None
|
|
143
|
+
return sum(1 for line in out.stdout.splitlines() if line.strip())
|
|
144
|
+
except Exception:
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _phase_chip(phase: str, turn: int, total_turns: int) -> Text:
|
|
149
|
+
color = {
|
|
150
|
+
"queued": "dim",
|
|
151
|
+
"loading": "yellow",
|
|
152
|
+
"running": "cyan",
|
|
153
|
+
"judging": "magenta",
|
|
154
|
+
"done": "green",
|
|
155
|
+
}.get(phase, "white")
|
|
156
|
+
label = phase
|
|
157
|
+
if phase == "running" and total_turns:
|
|
158
|
+
label = f"turn {turn or 1}/{total_turns}"
|
|
159
|
+
return Text(f"[{label}]", style=color)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _header_panel(state: EvalUIState) -> Panel:
|
|
163
|
+
filter_str = " · ".join(f"{k}={v}" for k, v in state.filters.items() if v)
|
|
164
|
+
line = Text.assemble(
|
|
165
|
+
(state.agent_type, "bold cyan"),
|
|
166
|
+
" · ",
|
|
167
|
+
(state.model, "bold"),
|
|
168
|
+
f" · max_parallel={state.max_parallel}",
|
|
169
|
+
)
|
|
170
|
+
if filter_str:
|
|
171
|
+
line.append(" · ")
|
|
172
|
+
line.append(filter_str, style="dim")
|
|
173
|
+
return Panel(line, title="DTap evaluation", title_align="left", border_style="cyan")
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _progress_panel(state: EvalUIState, progress: Progress, task_id: int) -> Panel:
|
|
177
|
+
progress.update(task_id, completed=state.completed)
|
|
178
|
+
elapsed = time.monotonic() - state.start_time
|
|
179
|
+
avg = (sum(state.durations) / len(state.durations)) if state.durations else 0.0
|
|
180
|
+
summary = Text.assemble(
|
|
181
|
+
("Succeeded: ", "bold"),
|
|
182
|
+
(str(state.succeeded), "green"),
|
|
183
|
+
" Failed: ",
|
|
184
|
+
(str(state.failed), "red" if state.failed else "dim"),
|
|
185
|
+
" Avg: ",
|
|
186
|
+
(f"{avg:0.1f}s", "cyan"),
|
|
187
|
+
" Elapsed: ",
|
|
188
|
+
(_format_duration(elapsed), "cyan"),
|
|
189
|
+
)
|
|
190
|
+
return Panel(Group(progress, summary), title="Progress", title_align="left", border_style="white")
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _running_panel(state: EvalUIState) -> Panel:
|
|
194
|
+
with state._lock:
|
|
195
|
+
items = list(state.running.values())
|
|
196
|
+
items.sort(key=lambda t: t.start_time)
|
|
197
|
+
table = Table.grid(padding=(0, 1))
|
|
198
|
+
table.add_column(width=3, no_wrap=True)
|
|
199
|
+
table.add_column(no_wrap=True)
|
|
200
|
+
table.add_column(no_wrap=True)
|
|
201
|
+
table.add_column(no_wrap=True, justify="right")
|
|
202
|
+
|
|
203
|
+
now = time.monotonic()
|
|
204
|
+
shown = items[:_MAX_RUNNING_SHOWN]
|
|
205
|
+
for st in shown:
|
|
206
|
+
envs = ",".join(st.environments) or "—"
|
|
207
|
+
table.add_row(
|
|
208
|
+
"▸",
|
|
209
|
+
Text(f"{st.domain}/{st.task_id}", style="bold"),
|
|
210
|
+
_phase_chip(st.phase, st.turn, st.total_turns),
|
|
211
|
+
Text(_format_duration(now - st.start_time), style="dim"),
|
|
212
|
+
)
|
|
213
|
+
if len(items) > _MAX_RUNNING_SHOWN:
|
|
214
|
+
table.add_row("", Text(f"... +{len(items) - _MAX_RUNNING_SHOWN} more", style="dim"), "", "")
|
|
215
|
+
if not items:
|
|
216
|
+
table.add_row("", Text("(idle)", style="dim"), "", "")
|
|
217
|
+
return Panel(
|
|
218
|
+
table,
|
|
219
|
+
title=f"Running ({len(items)}/{state.max_parallel})",
|
|
220
|
+
title_align="left",
|
|
221
|
+
border_style="cyan",
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _resources_panel(state: EvalUIState, port_provider=None) -> Panel:
|
|
226
|
+
cpu = psutil.cpu_percent(interval=None)
|
|
227
|
+
vm = psutil.virtual_memory()
|
|
228
|
+
mem_used_gb = (vm.total - vm.available) / (1024 ** 3)
|
|
229
|
+
mem_total_gb = vm.total / (1024 ** 3)
|
|
230
|
+
|
|
231
|
+
containers = _docker_container_count()
|
|
232
|
+
docker_str = f"{containers}" if containers is not None else "n/a"
|
|
233
|
+
|
|
234
|
+
ports = port_provider() if port_provider else []
|
|
235
|
+
if len(ports) > 12:
|
|
236
|
+
port_str = ", ".join(str(p) for p in sorted(ports)[:12]) + f", … (+{len(ports) - 12})"
|
|
237
|
+
else:
|
|
238
|
+
port_str = ", ".join(str(p) for p in sorted(ports)) if ports else "—"
|
|
239
|
+
|
|
240
|
+
line = Text.assemble(
|
|
241
|
+
("Docker: ", "bold"), (f"{docker_str} containers ", "white"),
|
|
242
|
+
("CPU: ", "bold"), (f"{cpu:0.0f}% ", "white"),
|
|
243
|
+
("Mem: ", "bold"), (f"{mem_used_gb:0.1f}/{mem_total_gb:0.0f} GB", "white"),
|
|
244
|
+
)
|
|
245
|
+
ports_line = Text.assemble(("Ports in use: ", "bold"), (port_str, "dim"))
|
|
246
|
+
return Panel(Group(line, ports_line), title="Resources", title_align="left", border_style="magenta")
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _failures_panel(state: EvalUIState) -> Optional[Panel]:
|
|
250
|
+
with state._lock:
|
|
251
|
+
failures = list(state.recent_failures)
|
|
252
|
+
if not failures:
|
|
253
|
+
return None
|
|
254
|
+
table = Table.grid(padding=(0, 1))
|
|
255
|
+
table.add_column(no_wrap=True)
|
|
256
|
+
table.add_column(no_wrap=False)
|
|
257
|
+
for task_id, log in failures:
|
|
258
|
+
log_str = str(log) if log else "(no log)"
|
|
259
|
+
table.add_row(Text(task_id, style="red"), Text(log_str, style="dim"))
|
|
260
|
+
return Panel(table, title="Recent failures", title_align="left", border_style="red")
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
# ─── LiveProgress controller ─────────────────────────────────────────────────
|
|
264
|
+
|
|
265
|
+
class LiveProgress:
|
|
266
|
+
"""Owns the Rich Live layout and exposes mutation hooks for the eval loop."""
|
|
267
|
+
|
|
268
|
+
def __init__(
|
|
269
|
+
self,
|
|
270
|
+
*,
|
|
271
|
+
total_tasks: int,
|
|
272
|
+
agent_type: str,
|
|
273
|
+
model: str,
|
|
274
|
+
max_parallel: int,
|
|
275
|
+
filters: Dict[str, Optional[str]],
|
|
276
|
+
port_provider=None,
|
|
277
|
+
console: Optional[Console] = None,
|
|
278
|
+
) -> None:
|
|
279
|
+
self.state = EvalUIState(
|
|
280
|
+
total_tasks=total_tasks,
|
|
281
|
+
agent_type=agent_type,
|
|
282
|
+
model=model,
|
|
283
|
+
max_parallel=max_parallel,
|
|
284
|
+
filters=filters,
|
|
285
|
+
)
|
|
286
|
+
self._console = console or Console()
|
|
287
|
+
self._port_provider = port_provider
|
|
288
|
+
self._progress = Progress(
|
|
289
|
+
TextColumn("[bold blue]{task.percentage:>5.1f}%"),
|
|
290
|
+
BarColumn(bar_width=None),
|
|
291
|
+
TextColumn("{task.completed}/{task.total}"),
|
|
292
|
+
TextColumn("ETA"),
|
|
293
|
+
TimeRemainingColumn(),
|
|
294
|
+
expand=True,
|
|
295
|
+
)
|
|
296
|
+
self._task_id = self._progress.add_task("overall", total=total_tasks)
|
|
297
|
+
self._live: Optional[Live] = None
|
|
298
|
+
|
|
299
|
+
def __enter__(self) -> "LiveProgress":
|
|
300
|
+
self._live = Live(
|
|
301
|
+
self._render(),
|
|
302
|
+
console=self._console,
|
|
303
|
+
refresh_per_second=4,
|
|
304
|
+
transient=False,
|
|
305
|
+
)
|
|
306
|
+
self._live.__enter__()
|
|
307
|
+
# Kick off a refresh thread so the elapsed-time / resources tick even
|
|
308
|
+
# when no events arrive.
|
|
309
|
+
self._stop = threading.Event()
|
|
310
|
+
self._refresher = threading.Thread(target=self._tick, daemon=True)
|
|
311
|
+
self._refresher.start()
|
|
312
|
+
return self
|
|
313
|
+
|
|
314
|
+
def __exit__(self, *exc) -> None:
|
|
315
|
+
self._stop.set()
|
|
316
|
+
self._refresher.join(timeout=2)
|
|
317
|
+
if self._live:
|
|
318
|
+
self._live.update(self._render())
|
|
319
|
+
self._live.__exit__(*exc)
|
|
320
|
+
|
|
321
|
+
def _tick(self) -> None:
|
|
322
|
+
while not self._stop.is_set():
|
|
323
|
+
if self._live is not None:
|
|
324
|
+
self._live.update(self._render())
|
|
325
|
+
self._stop.wait(0.25)
|
|
326
|
+
|
|
327
|
+
def _render(self) -> Layout:
|
|
328
|
+
layout = Layout()
|
|
329
|
+
sections = [
|
|
330
|
+
Layout(_header_panel(self.state), size=3, name="header"),
|
|
331
|
+
Layout(_progress_panel(self.state, self._progress, self._task_id), size=4, name="progress"),
|
|
332
|
+
Layout(_running_panel(self.state), name="running"),
|
|
333
|
+
Layout(_resources_panel(self.state, self._port_provider), size=4, name="resources"),
|
|
334
|
+
]
|
|
335
|
+
failures = _failures_panel(self.state)
|
|
336
|
+
if failures is not None:
|
|
337
|
+
sections.insert(4, Layout(failures, size=min(len(self.state.recent_failures) + 2, 8), name="failures"))
|
|
338
|
+
layout.split_column(*sections)
|
|
339
|
+
return layout
|
|
@@ -4,7 +4,7 @@ import os
|
|
|
4
4
|
import sys
|
|
5
5
|
import time
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Dict, List, Tuple
|
|
7
|
+
from typing import Dict, List, Optional, Tuple
|
|
8
8
|
|
|
9
9
|
from utils import (
|
|
10
10
|
PROJECT_ROOT,
|
|
@@ -18,6 +18,8 @@ from utils.memory_guard import check_memory_before_launch
|
|
|
18
18
|
from utils.task_executor import TaskExecutor, ScheduledTask, EnvInstance, get_task_environments
|
|
19
19
|
from utils.eval_stats import TaskTiming, EvaluationStats
|
|
20
20
|
|
|
21
|
+
from eval._ui import LiveProgress, parse_status_line
|
|
22
|
+
|
|
21
23
|
|
|
22
24
|
def _build_scheduled_tasks(task_specs: List[TaskSpec]) -> List[ScheduledTask]:
|
|
23
25
|
"""Convert TaskSpec list to ScheduledTask list with environment requirements."""
|
|
@@ -37,49 +39,65 @@ def _build_scheduled_tasks(task_specs: List[TaskSpec]) -> List[ScheduledTask]:
|
|
|
37
39
|
return scheduled
|
|
38
40
|
|
|
39
41
|
|
|
42
|
+
def _short_task_id(task: ScheduledTask) -> str:
|
|
43
|
+
"""Compact ID like 'finance/indirect/action_reversal/12' for the UI."""
|
|
44
|
+
if task.risk_category and task.task_id:
|
|
45
|
+
return f"{task.risk_category}/{task.task_id}"
|
|
46
|
+
return task.task_dir.name
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _task_log_path(args: argparse.Namespace, task: ScheduledTask) -> Path:
|
|
50
|
+
"""Where to write the captured subprocess output for this task."""
|
|
51
|
+
output_root = Path(os.getenv("EVAL_RESULTS_ROOT", str(Path.cwd() / "results")))
|
|
52
|
+
dataset_path = extract_dataset_path(task.task_dir)
|
|
53
|
+
safe_model = args.model.replace("/", "_").replace(":", "_")
|
|
54
|
+
result_type = "direct_prompt" if args.direct_prompt else "benchmark"
|
|
55
|
+
log_dir = output_root / result_type / args.agent_type / safe_model / dataset_path
|
|
56
|
+
log_dir.mkdir(parents=True, exist_ok=True)
|
|
57
|
+
return log_dir / "task.log"
|
|
58
|
+
|
|
59
|
+
|
|
40
60
|
async def _run_single_task_subprocess(
|
|
41
61
|
task: ScheduledTask,
|
|
42
62
|
instances: Dict[str, EnvInstance],
|
|
43
63
|
args: argparse.Namespace,
|
|
44
64
|
base_env: Dict[str, str],
|
|
65
|
+
ui: Optional[LiveProgress] = None,
|
|
45
66
|
) -> int:
|
|
46
|
-
"""
|
|
47
|
-
|
|
67
|
+
"""Invoke task_runner.py as a subprocess for one task.
|
|
68
|
+
|
|
69
|
+
One code path regardless of UI mode:
|
|
70
|
+
- subprocess stdout/stderr are always piped, written to <out>/task.log,
|
|
71
|
+
and scanned for [DTAP_STATUS] lines feeding the UI state.
|
|
72
|
+
- --verbose additionally echoes each captured line to the terminal.
|
|
48
73
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
can skip Docker startup and use the existing instances.
|
|
74
|
+
Docker environments are managed by TaskExecutor; we pass port mappings
|
|
75
|
+
via env vars so task_runner skips its own Docker startup.
|
|
52
76
|
"""
|
|
53
77
|
env = base_env.copy()
|
|
78
|
+
env["PYTHONUNBUFFERED"] = "1" # keep per-line latency low
|
|
54
79
|
|
|
55
|
-
#
|
|
80
|
+
# PYTHONPATH must include the project root for the subprocess
|
|
56
81
|
pythonpath = env.get("PYTHONPATH", "")
|
|
57
82
|
if str(PROJECT_ROOT) not in pythonpath:
|
|
58
83
|
env["PYTHONPATH"] = f"{PROJECT_ROOT}:{pythonpath}" if pythonpath else str(PROJECT_ROOT)
|
|
59
84
|
|
|
60
85
|
# Pass port mappings and project names from instances
|
|
61
|
-
print(f"[DEBUG] Instances: {list(instances.keys())}", flush=True)
|
|
62
86
|
for instance in instances.values():
|
|
63
|
-
print(f"[DEBUG] Instance {instance.env_name} ports: {instance.ports}", flush=True)
|
|
64
87
|
for var_name, port in instance.ports.items():
|
|
65
88
|
env[var_name] = str(port)
|
|
66
|
-
# Pass project name so env_seed.py can use it for docker commands
|
|
67
89
|
env_name_upper = instance.env_name.upper().replace("-", "_")
|
|
68
90
|
env[f"{env_name_upper}_PROJECT_NAME"] = instance.project_name
|
|
69
91
|
|
|
70
|
-
# Port range for MCP servers (allocated per-task)
|
|
71
92
|
if args.port_range:
|
|
72
93
|
env["DT_PORT_RANGE"] = args.port_range
|
|
73
94
|
|
|
74
95
|
cmd = [
|
|
75
96
|
sys.executable,
|
|
76
97
|
str(TASK_RUNNER_PATH),
|
|
77
|
-
"--task-dir",
|
|
78
|
-
|
|
79
|
-
"--
|
|
80
|
-
args.agent_type,
|
|
81
|
-
"--model",
|
|
82
|
-
args.model,
|
|
98
|
+
"--task-dir", str(task.task_dir),
|
|
99
|
+
"--agent-type", args.agent_type,
|
|
100
|
+
"--model", args.model,
|
|
83
101
|
]
|
|
84
102
|
if args.skip_mcp:
|
|
85
103
|
cmd.append("--skip-mcp")
|
|
@@ -94,14 +112,54 @@ async def _run_single_task_subprocess(
|
|
|
94
112
|
if args.disallowed_tools:
|
|
95
113
|
cmd.extend(["--disallowed-tools"] + args.disallowed_tools)
|
|
96
114
|
|
|
97
|
-
|
|
115
|
+
short_id = _short_task_id(task)
|
|
116
|
+
log_path = _task_log_path(args, task)
|
|
117
|
+
|
|
118
|
+
if ui is not None:
|
|
119
|
+
ui.state.start_task(
|
|
120
|
+
task_id=short_id,
|
|
121
|
+
domain=task.domain or "?",
|
|
122
|
+
environments=task.environments,
|
|
123
|
+
log_path=log_path,
|
|
124
|
+
)
|
|
125
|
+
if args.verbose:
|
|
126
|
+
sys.stdout.write(
|
|
127
|
+
f"[EVAL] {short_id} envs={list(task.environments)} log={log_path}\n"
|
|
128
|
+
)
|
|
129
|
+
sys.stdout.flush()
|
|
130
|
+
|
|
98
131
|
proc = await asyncio.create_subprocess_exec(
|
|
99
132
|
*cmd,
|
|
100
133
|
cwd=str(PROJECT_ROOT),
|
|
101
134
|
env=env,
|
|
135
|
+
stdout=asyncio.subprocess.PIPE,
|
|
136
|
+
stderr=asyncio.subprocess.STDOUT, # merge so the log has the full timeline
|
|
102
137
|
)
|
|
138
|
+
assert proc.stdout is not None
|
|
139
|
+
|
|
140
|
+
with log_path.open("wb") as logf:
|
|
141
|
+
async for line in proc.stdout:
|
|
142
|
+
logf.write(line)
|
|
143
|
+
logf.flush()
|
|
144
|
+
|
|
145
|
+
text = line.decode(errors="replace").rstrip()
|
|
146
|
+
|
|
147
|
+
# Status lines drive the UI state regardless of display mode.
|
|
148
|
+
fields = parse_status_line(text)
|
|
149
|
+
if fields and ui is not None:
|
|
150
|
+
ui.state.update_status(short_id, fields)
|
|
151
|
+
|
|
152
|
+
# Verbose mode echoes raw output to the terminal.
|
|
153
|
+
if args.verbose:
|
|
154
|
+
sys.stdout.write(text + "\n")
|
|
155
|
+
sys.stdout.flush()
|
|
156
|
+
|
|
103
157
|
rc = await proc.wait()
|
|
104
|
-
|
|
158
|
+
if ui is not None:
|
|
159
|
+
ui.state.finish_task(short_id, success=(rc == 0))
|
|
160
|
+
if args.verbose:
|
|
161
|
+
sys.stdout.write(f"[EVAL] {short_id} finished (rc={rc})\n")
|
|
162
|
+
sys.stdout.flush()
|
|
105
163
|
return rc
|
|
106
164
|
|
|
107
165
|
|
|
@@ -223,6 +281,31 @@ async def _run_all_tasks(args: argparse.Namespace) -> int:
|
|
|
223
281
|
base_env = os.environ.copy()
|
|
224
282
|
task_timings: Dict[str, TaskTiming] = {}
|
|
225
283
|
|
|
284
|
+
# Decide between Rich live UI vs raw passthrough.
|
|
285
|
+
# Live UI is the default; --verbose or non-TTY falls back to legacy logs.
|
|
286
|
+
use_live_ui = (not args.verbose) and sys.stdout.isatty()
|
|
287
|
+
|
|
288
|
+
def _ports_in_use() -> list:
|
|
289
|
+
try:
|
|
290
|
+
ports: set = set()
|
|
291
|
+
for inst in getattr(executor, "_all_instances", {}).values():
|
|
292
|
+
for p in (inst.ports or {}).values():
|
|
293
|
+
ports.add(int(p))
|
|
294
|
+
return sorted(ports)
|
|
295
|
+
except Exception:
|
|
296
|
+
return []
|
|
297
|
+
|
|
298
|
+
ui_ctx: Optional[LiveProgress] = None
|
|
299
|
+
if use_live_ui:
|
|
300
|
+
ui_ctx = LiveProgress(
|
|
301
|
+
total_tasks=len(scheduled_tasks),
|
|
302
|
+
agent_type=args.agent_type,
|
|
303
|
+
model=args.model,
|
|
304
|
+
max_parallel=args.max_parallel,
|
|
305
|
+
filters=filter_items,
|
|
306
|
+
port_provider=_ports_in_use,
|
|
307
|
+
)
|
|
308
|
+
|
|
226
309
|
async def run_task(task: ScheduledTask, instances: Dict[str, EnvInstance]) -> int:
|
|
227
310
|
"""Wrapper to run a task and track timing."""
|
|
228
311
|
task_name = task.task_dir.name
|
|
@@ -230,11 +313,12 @@ async def _run_all_tasks(args: argparse.Namespace) -> int:
|
|
|
230
313
|
timing.start_time = time.time()
|
|
231
314
|
|
|
232
315
|
try:
|
|
233
|
-
rc = await _run_single_task_subprocess(task, instances, args, base_env)
|
|
316
|
+
rc = await _run_single_task_subprocess(task, instances, args, base_env, ui=ui_ctx)
|
|
234
317
|
timing.success = (rc == 0)
|
|
235
318
|
return rc
|
|
236
319
|
except Exception as e:
|
|
237
|
-
|
|
320
|
+
if not use_live_ui:
|
|
321
|
+
print(f"[EVAL] Error running task {task_name}: {e}", flush=True)
|
|
238
322
|
timing.success = False
|
|
239
323
|
return 1
|
|
240
324
|
finally:
|
|
@@ -243,9 +327,13 @@ async def _run_all_tasks(args: argparse.Namespace) -> int:
|
|
|
243
327
|
stats.task_timings.append(timing)
|
|
244
328
|
|
|
245
329
|
try:
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
330
|
+
if not use_live_ui:
|
|
331
|
+
print(f"\n[EVAL] Starting evaluation...", flush=True)
|
|
332
|
+
if ui_ctx is not None:
|
|
333
|
+
with ui_ctx:
|
|
334
|
+
results = await executor.run_all(scheduled_tasks, run_task)
|
|
335
|
+
else:
|
|
336
|
+
results = await executor.run_all(scheduled_tasks, run_task)
|
|
249
337
|
|
|
250
338
|
finally:
|
|
251
339
|
# Record end time
|
|
@@ -387,6 +475,15 @@ def main() -> None:
|
|
|
387
475
|
action="store_true",
|
|
388
476
|
help="Enable debug mode to save extra info like tool descriptions in trajectory.",
|
|
389
477
|
)
|
|
478
|
+
parser.add_argument(
|
|
479
|
+
"--verbose",
|
|
480
|
+
action="store_true",
|
|
481
|
+
help=(
|
|
482
|
+
"Stream raw task_runner stdout/stderr to the terminal. "
|
|
483
|
+
"Without this, dtap eval shows a live progress UI and writes "
|
|
484
|
+
"per-task logs to <results>/.../task.log."
|
|
485
|
+
),
|
|
486
|
+
)
|
|
390
487
|
parser.add_argument(
|
|
391
488
|
"--direct-prompt",
|
|
392
489
|
action="store_true",
|
|
@@ -35,6 +35,16 @@ from dt_arena.src.types.agent import AgentConfig, RuntimeConfig
|
|
|
35
35
|
from dt_arena.src.types.task import AttackConfig, TaskConfig
|
|
36
36
|
|
|
37
37
|
|
|
38
|
+
def _status(**fields: object) -> None:
|
|
39
|
+
"""Emit a machine-readable status line consumed by the dtap eval progress UI.
|
|
40
|
+
|
|
41
|
+
Format: [DTAP_STATUS] key1=val1 key2=val2 ...
|
|
42
|
+
Always flushes so the parent gets near-real-time updates.
|
|
43
|
+
"""
|
|
44
|
+
parts = " ".join(f"{k}={v}" for k, v in fields.items())
|
|
45
|
+
print(f"[DTAP_STATUS] {parts}", flush=True)
|
|
46
|
+
|
|
47
|
+
|
|
38
48
|
async def run_single_task(
|
|
39
49
|
task_dir: Path,
|
|
40
50
|
*,
|
|
@@ -68,6 +78,7 @@ async def run_single_task(
|
|
|
68
78
|
print(f"[ERROR] Configuration file not found: {config_path}")
|
|
69
79
|
return 1
|
|
70
80
|
|
|
81
|
+
_status(phase="loading")
|
|
71
82
|
print(f"[INFO] Loading agent config from: {config_path}")
|
|
72
83
|
print("-" * 80)
|
|
73
84
|
|
|
@@ -241,6 +252,7 @@ async def run_single_task(
|
|
|
241
252
|
print(attack_cfg.malicious_goal.strip())
|
|
242
253
|
print("-" * 80)
|
|
243
254
|
|
|
255
|
+
_status(phase="running", turns=len(user_instruction) if isinstance(user_instruction, list) else 1)
|
|
244
256
|
# Run agent with per-turn env injection support
|
|
245
257
|
async with agent:
|
|
246
258
|
metadata = {
|
|
@@ -281,6 +293,7 @@ async def run_single_task(
|
|
|
281
293
|
print(f" Error: {inj_result['error']}")
|
|
282
294
|
|
|
283
295
|
# Run agent for this turn
|
|
296
|
+
_status(turn=turn_id, total_turns=len(instructions))
|
|
284
297
|
print(f"\n[TURN {turn_id}] Processing query...")
|
|
285
298
|
try:
|
|
286
299
|
result = await agent.run(turn_instruction, metadata=metadata)
|
|
@@ -315,6 +328,7 @@ async def run_single_task(
|
|
|
315
328
|
|
|
316
329
|
# Run judge
|
|
317
330
|
if not skip_judge:
|
|
331
|
+
_status(phase="judging")
|
|
318
332
|
judge_result = run_judge(task_dir, response=final_output)
|
|
319
333
|
print_judge_result(judge_result)
|
|
320
334
|
|
|
@@ -325,10 +339,12 @@ async def run_single_task(
|
|
|
325
339
|
print(f"[INFO] Judge result saved to: {judge_output_path}")
|
|
326
340
|
|
|
327
341
|
if judge_result.get("task_success") is False:
|
|
342
|
+
_status(phase="done", success=0)
|
|
328
343
|
return 1
|
|
329
344
|
else:
|
|
330
345
|
print("[JUDGE] Skipped judge as requested.")
|
|
331
346
|
|
|
347
|
+
_status(phase="done", success=1)
|
|
332
348
|
return 0
|
|
333
349
|
|
|
334
350
|
except subprocess.CalledProcessError as e:
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "decodingtrust-agent-sdk"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.1"
|
|
8
8
|
description = "DecodingTrust Agent Platform (DTap) — A controllable and interactive red-teaming platform for AI agents"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { file = "LICENSE" }
|
|
File without changes
|
|
File without changes
|
|
File without changes
|