decodingtrust-agent-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent/__init__.py +30 -0
- agent/claudesdk/__init__.py +8 -0
- agent/claudesdk/example.py +221 -0
- agent/claudesdk/src/__init__.py +8 -0
- agent/claudesdk/src/agent.py +400 -0
- agent/claudesdk/src/mcp_proxy.py +409 -0
- agent/claudesdk/src/utils.py +420 -0
- agent/googleadk/__init__.py +15 -0
- agent/googleadk/example.py +237 -0
- agent/googleadk/src/__init__.py +12 -0
- agent/googleadk/src/agent.py +401 -0
- agent/googleadk/src/mcp_wrapper.py +163 -0
- agent/googleadk/src/utils.py +602 -0
- agent/langchain/__init__.py +8 -0
- agent/langchain/example.py +213 -0
- agent/langchain/src/__init__.py +8 -0
- agent/langchain/src/agent.py +645 -0
- agent/langchain/src/utils.py +433 -0
- agent/openaisdk/__init__.py +17 -0
- agent/openaisdk/example.py +228 -0
- agent/openaisdk/src/__init__.py +12 -0
- agent/openaisdk/src/agent.py +491 -0
- agent/openaisdk/src/agent_wrapper.py +143 -0
- agent/openaisdk/src/mcp_wrapper.py +395 -0
- agent/openaisdk/src/utils.py +493 -0
- agent/openclaw/__init__.py +10 -0
- agent/openclaw/example.py +251 -0
- agent/openclaw/src/__init__.py +14 -0
- agent/openclaw/src/agent.py +930 -0
- agent/openclaw/src/helpers/__init__.py +1 -0
- agent/openclaw/src/helpers/auth_helpers.py +55 -0
- agent/openclaw/src/mcp_proxy.py +564 -0
- agent/openclaw/src/plugin_generator.py +231 -0
- agent/openclaw/src/utils.py +341 -0
- agent/pocketflow/__init__.py +18 -0
- agent/pocketflow/example.py +221 -0
- agent/pocketflow/prompts/react_agent.py +46 -0
- agent/pocketflow/src/__init__.py +6 -0
- agent/pocketflow/src/agent.py +507 -0
- agent/pocketflow/src/agent_wrapper.py +159 -0
- agent/pocketflow/src/async_helper.py +92 -0
- agent/pocketflow/src/mcp_react_agent.py +279 -0
- agent/pocketflow/src/native_agent.py +74 -0
- agent/pocketflow/src/nodes.py +467 -0
- benchmark/__init__.py +0 -0
- benchmark/browser/benign.jsonl +34 -0
- benchmark/browser/direct.jsonl +85 -0
- benchmark/browser/indirect.jsonl +82 -0
- benchmark/code/benign.jsonl +0 -0
- benchmark/code/direct.jsonl +121 -0
- benchmark/code/indirect.jsonl +165 -0
- benchmark/crm/benign.jsonl +165 -0
- benchmark/crm/direct.jsonl +90 -0
- benchmark/crm/indirect.jsonl +150 -0
- benchmark/customer-service/benign.jsonl +160 -0
- benchmark/customer-service/direct.jsonl +100 -0
- benchmark/customer-service/indirect.jsonl +101 -0
- benchmark/finance/benign.jsonl +0 -0
- benchmark/finance/direct.jsonl +200 -0
- benchmark/finance/indirect.jsonl +200 -0
- benchmark/legal/benign.jsonl +0 -0
- benchmark/legal/direct.jsonl +200 -0
- benchmark/legal/indirect.jsonl +200 -0
- benchmark/macos/benign.jsonl +30 -0
- benchmark/macos/direct.jsonl +50 -0
- benchmark/macos/indirect.jsonl +50 -0
- benchmark/medical/benign.jsonl +642 -0
- benchmark/medical/direct.jsonl +229 -0
- benchmark/medical/indirect.jsonl +222 -0
- benchmark/os-filesystem/benign.jsonl +200 -0
- benchmark/os-filesystem/direct.jsonl +200 -0
- benchmark/os-filesystem/indirect.jsonl +200 -0
- benchmark/research/benign.jsonl +0 -0
- benchmark/research/direct.jsonl +119 -0
- benchmark/research/indirect.jsonl +125 -0
- benchmark/telecom/benign.jsonl +120 -0
- benchmark/telecom/direct.jsonl +161 -0
- benchmark/telecom/indirect.jsonl +166 -0
- benchmark/travel/benign.jsonl +130 -0
- benchmark/travel/direct.jsonl +105 -0
- benchmark/travel/indirect.jsonl +120 -0
- benchmark/windows/benign.jsonl +100 -0
- benchmark/windows/direct.jsonl +140 -0
- benchmark/windows/indirect.jsonl +107 -0
- benchmark/workflow/benign.jsonl +335 -0
- benchmark/workflow/direct.jsonl +78 -0
- benchmark/workflow/indirect.jsonl +107 -0
- cli/__init__.py +5 -0
- cli/main.py +182 -0
- cli/scaffold.py +334 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
- dt_arena/config/env.yaml +515 -0
- dt_arena/config/injection_mcp.yaml +430 -0
- dt_arena/config/mcp.yaml +642 -0
- dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
- dt_arena/envs/arxiv/docker-compose.yml +36 -0
- dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
- dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
- dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
- dt_arena/envs/atlassian/docker-compose.yml +72 -0
- dt_arena/envs/bigquery/docker-compose.yml +20 -0
- dt_arena/envs/booking/docker-compose.yml +59 -0
- dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
- dt_arena/envs/calendar/docker-compose.yml +42 -0
- dt_arena/envs/custom-website/docker-compose.yml +6 -0
- dt_arena/envs/customer_service/docker-compose.yml +59 -0
- dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
- dt_arena/envs/databricks/docker-compose.yml +51 -0
- dt_arena/envs/ecommerce/docker-compose.yml +6 -0
- dt_arena/envs/ers/docker-compose.yml +36 -0
- dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
- dt_arena/envs/finance/docker-compose.yml +23 -0
- dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
- dt_arena/envs/github/docker/docker-compose.yml +50 -0
- dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
- dt_arena/envs/gmail/docker-compose.yml +65 -0
- dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
- dt_arena/envs/google-form/docker-compose.yml +41 -0
- dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
- dt_arena/envs/googledocs/docker-compose.yml +78 -0
- dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
- dt_arena/envs/hospital/docker-compose.yml +27 -0
- dt_arena/envs/legal/docker-compose.yml +22 -0
- dt_arena/envs/linkedin/docker-compose.yml +63 -0
- dt_arena/envs/macos/docker-compose.yml +79 -0
- dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
- dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
- dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
- dt_arena/envs/paypal/docker-compose.yml +63 -0
- dt_arena/envs/research/docker-compose-hub.yml +13 -0
- dt_arena/envs/research/docker-compose.yml +24 -0
- dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
- dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
- dt_arena/envs/slack/docker-compose-hub.yml +28 -0
- dt_arena/envs/slack/docker-compose.yml +41 -0
- dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
- dt_arena/envs/snowflake/docker-compose.yml +44 -0
- dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
- dt_arena/envs/telecom/docker-compose.yml +17 -0
- dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
- dt_arena/envs/telegram/docker-compose.yml +62 -0
- dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
- dt_arena/envs/terminal/docker-compose.yml +26 -0
- dt_arena/envs/travel/docker-compose-hub.yml +19 -0
- dt_arena/envs/travel/docker-compose.yml +19 -0
- dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
- dt_arena/envs/whatsapp/docker-compose.yml +78 -0
- dt_arena/envs/windows/docker-compose.yml +71 -0
- dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
- dt_arena/envs/zoom/docker-compose.yml +40 -0
- dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
- dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
- dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
- dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
- dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
- dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
- dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
- dt_arena/injection_mcp_server/github/env_injection.py +206 -0
- dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
- dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
- dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
- dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
- dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
- dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
- dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
- dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
- dt_arena/injection_mcp_server/research/env_injection.py +616 -0
- dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
- dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
- dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
- dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
- dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
- dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
- dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
- dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
- dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
- dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
- dt_arena/mcp_server/atlassian/main.py +1554 -0
- dt_arena/mcp_server/atlassian/test_server.py +66 -0
- dt_arena/mcp_server/bigquery/main.py +333 -0
- dt_arena/mcp_server/booking/main.py +310 -0
- dt_arena/mcp_server/browser/main.py +1741 -0
- dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
- dt_arena/mcp_server/calendar/main.py +792 -0
- dt_arena/mcp_server/calendar/test_mcp.py +135 -0
- dt_arena/mcp_server/customer_service/main.py +1063 -0
- dt_arena/mcp_server/databricks/main.py +566 -0
- dt_arena/mcp_server/databricks/probe.py +102 -0
- dt_arena/mcp_server/ers/main.py +845 -0
- dt_arena/mcp_server/finance/__init__.py +87 -0
- dt_arena/mcp_server/finance/core/__init__.py +12 -0
- dt_arena/mcp_server/finance/core/data_loader.py +558 -0
- dt_arena/mcp_server/finance/core/portfolio.py +565 -0
- dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
- dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
- dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
- dt_arena/mcp_server/finance/injection/__init__.py +66 -0
- dt_arena/mcp_server/finance/injection/config.py +176 -0
- dt_arena/mcp_server/finance/injection/content.py +755 -0
- dt_arena/mcp_server/finance/injection/html.py +409 -0
- dt_arena/mcp_server/finance/injection/locations.py +167 -0
- dt_arena/mcp_server/finance/injection/methods.py +193 -0
- dt_arena/mcp_server/finance/injection/presets.py +1023 -0
- dt_arena/mcp_server/finance/main.py +361 -0
- dt_arena/mcp_server/finance/run_mcp.py +21 -0
- dt_arena/mcp_server/finance/run_web.py +26 -0
- dt_arena/mcp_server/finance/server/__init__.py +41 -0
- dt_arena/mcp_server/finance/server/extractor.py +1453 -0
- dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
- dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
- dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
- dt_arena/mcp_server/finance/server/mcp.py +451 -0
- dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
- dt_arena/mcp_server/finance/server/tools/account.py +88 -0
- dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
- dt_arena/mcp_server/finance/server/tools/social.py +73 -0
- dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
- dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
- dt_arena/mcp_server/finance/server/web.py +2139 -0
- dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
- dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
- dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
- dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
- dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
- dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
- dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
- dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
- dt_arena/mcp_server/github/main.py +441 -0
- dt_arena/mcp_server/gmail/main.py +1004 -0
- dt_arena/mcp_server/google_form/main.py +141 -0
- dt_arena/mcp_server/googledocs/main.py +458 -0
- dt_arena/mcp_server/hospital/mcp_server.py +458 -0
- dt_arena/mcp_server/legal/__init__.py +9 -0
- dt_arena/mcp_server/legal/core/__init__.py +14 -0
- dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
- dt_arena/mcp_server/legal/core/data_loader.py +266 -0
- dt_arena/mcp_server/legal/core/document_store.py +197 -0
- dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
- dt_arena/mcp_server/legal/main.py +89 -0
- dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
- dt_arena/mcp_server/legal/server/__init__.py +14 -0
- dt_arena/mcp_server/legal/server/mcp.py +2330 -0
- dt_arena/mcp_server/macos/client_test.py +270 -0
- dt_arena/mcp_server/macos/mcp_server.py +285 -0
- dt_arena/mcp_server/os-filesystem/main.py +1380 -0
- dt_arena/mcp_server/paypal/main.py +501 -0
- dt_arena/mcp_server/research/main.py +777 -0
- dt_arena/mcp_server/salesforce/main.py +2006 -0
- dt_arena/mcp_server/slack/main.py +318 -0
- dt_arena/mcp_server/snowflake/main.py +612 -0
- dt_arena/mcp_server/snowflake/probe.py +183 -0
- dt_arena/mcp_server/telecom/mcp_client.py +423 -0
- dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
- dt_arena/mcp_server/telegram/main.py +338 -0
- dt_arena/mcp_server/terminal/main.py +163 -0
- dt_arena/mcp_server/travel/client_test.py +16 -0
- dt_arena/mcp_server/travel/mcp_server.py +404 -0
- dt_arena/mcp_server/whatsapp/main.py +318 -0
- dt_arena/mcp_server/windows/client_test.py +270 -0
- dt_arena/mcp_server/windows/mcp_server.py +218 -0
- dt_arena/mcp_server/zoom/main.py +466 -0
- dt_arena/src/__init__.py +0 -0
- dt_arena/src/hooks/__init__.py +0 -0
- dt_arena/src/hooks/audit_log.py +30 -0
- dt_arena/src/hooks/hooks.json +3 -0
- dt_arena/src/run_benign.py +142 -0
- dt_arena/src/types/__init__.py +0 -0
- dt_arena/src/types/agent.py +441 -0
- dt_arena/src/types/attacks.py +2 -0
- dt_arena/src/types/environment.py +2 -0
- dt_arena/src/types/hooks.py +174 -0
- dt_arena/src/types/judge.py +52 -0
- dt_arena/src/types/red_teaming_trajectory.py +385 -0
- dt_arena/src/types/task.py +260 -0
- dt_arena/src/types/trajectory.py +315 -0
- dt_arena/utils/__init__.py +1 -0
- dt_arena/utils/atlassian/__init__.py +27 -0
- dt_arena/utils/atlassian/helpers.py +520 -0
- dt_arena/utils/bigquery/__init__.py +1 -0
- dt_arena/utils/bigquery/helpers.py +246 -0
- dt_arena/utils/calendar/__init__.py +1 -0
- dt_arena/utils/calendar/helpers.py +87 -0
- dt_arena/utils/customer_service/__init__.py +17 -0
- dt_arena/utils/customer_service/cs_env_client.py +940 -0
- dt_arena/utils/customer_service/helpers.py +339 -0
- dt_arena/utils/customer_service/judges/__init__.py +20 -0
- dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
- dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
- dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
- dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
- dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
- dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
- dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
- dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
- dt_arena/utils/customer_service/judges/text_utils.py +21 -0
- dt_arena/utils/databricks/__init__.py +2 -0
- dt_arena/utils/databricks/helpers.py +210 -0
- dt_arena/utils/finance/__init__.py +0 -0
- dt_arena/utils/finance/helpers.py +263 -0
- dt_arena/utils/github/__init__.py +1 -0
- dt_arena/utils/github/helpers.py +249 -0
- dt_arena/utils/gmail/__init__.py +1 -0
- dt_arena/utils/gmail/helpers.py +344 -0
- dt_arena/utils/google_form/__init__.py +2 -0
- dt_arena/utils/google_form/helpers.py +133 -0
- dt_arena/utils/legal/__init__.py +0 -0
- dt_arena/utils/legal/helpers.py +228 -0
- dt_arena/utils/macos/__init__.py +0 -0
- dt_arena/utils/macos/env_setup.py +215 -0
- dt_arena/utils/macos/helpers.py +61 -0
- dt_arena/utils/os_filesystem/__init__.py +1 -0
- dt_arena/utils/os_filesystem/helpers.py +366 -0
- dt_arena/utils/paypal/__init__.py +1 -0
- dt_arena/utils/paypal/helpers.py +178 -0
- dt_arena/utils/port_allocator.py +266 -0
- dt_arena/utils/research/__init__.py +0 -0
- dt_arena/utils/research/helpers.py +251 -0
- dt_arena/utils/salesforce/__init__.py +1 -0
- dt_arena/utils/salesforce/helpers.py +719 -0
- dt_arena/utils/slack/__init__.py +1 -0
- dt_arena/utils/slack/helpers.py +176 -0
- dt_arena/utils/snowflake/__init__.py +1 -0
- dt_arena/utils/snowflake/helpers.py +166 -0
- dt_arena/utils/telecom/__init__.py +1 -0
- dt_arena/utils/telecom/helpers.py +760 -0
- dt_arena/utils/telegram/__init__.py +0 -0
- dt_arena/utils/telegram/helpers.py +174 -0
- dt_arena/utils/terminal/__init__.py +0 -0
- dt_arena/utils/terminal/helpers.py +20 -0
- dt_arena/utils/travel/__init__.py +0 -0
- dt_arena/utils/travel/env_client.py +537 -0
- dt_arena/utils/travel/llm_judge.py +137 -0
- dt_arena/utils/travel/prompts.py +64 -0
- dt_arena/utils/utils/__init__.py +122 -0
- dt_arena/utils/whatsapp/__init__.py +0 -0
- dt_arena/utils/whatsapp/helpers.py +226 -0
- dt_arena/utils/windows/__init__.py +0 -0
- dt_arena/utils/windows/env_reset.py +224 -0
- dt_arena/utils/windows/env_setup.py +280 -0
- dt_arena/utils/windows/exfil_helpers.py +170 -0
- dt_arena/utils/windows/helpers.py +74 -0
- dt_arena/utils/zoom/__init__.py +1 -0
- dt_arena/utils/zoom/helpers.py +70 -0
- eval/__init__.py +1 -0
- eval/evaluation.py +426 -0
- eval/task_runner.py +449 -0
- utils/__init__.py +148 -0
- utils/agent_helpers.py +308 -0
- utils/agent_wrapper.py +189 -0
- utils/compose_utils.py +135 -0
- utils/config.py +77 -0
- utils/env_helpers.py +104 -0
- utils/eval_stats.py +88 -0
- utils/injection_helpers.py +429 -0
- utils/injection_mcp_helpers.py +152 -0
- utils/judge_helpers.py +181 -0
- utils/judge_utils.py +472 -0
- utils/llm.py +196 -0
- utils/logging.py +45 -0
- utils/mcp_helpers.py +232 -0
- utils/mcp_manager.py +235 -0
- utils/memory_guard.py +18 -0
- utils/red_teaming_sandbox.py +476 -0
- utils/reset_helpers.py +318 -0
- utils/resource_manager.py +370 -0
- utils/skill_helpers.py +447 -0
- utils/task_executor.py +904 -0
- utils/task_helpers.py +270 -0
- utils/template_helpers.py +179 -0
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
from typing import Any, Dict, Optional, Tuple
|
|
2
|
+
|
|
3
|
+
from .config import INJECTION_MCP_CONFIG_PATH
|
|
4
|
+
from .mcp_helpers import _start_mcp_servers_impl
|
|
5
|
+
from .template_helpers import wait_for_servers_ready
|
|
6
|
+
from .mcp_manager import MCPServerManager
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def parse_injection_config(config_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
10
|
+
"""Parse RedTeamingAgent injection configuration from config.yaml.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
config_data: The full config data loaded from config.yaml
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
Dict with parsed injection configuration:
|
|
17
|
+
{
|
|
18
|
+
"prompt_enabled": bool,
|
|
19
|
+
"tool_enabled": bool,
|
|
20
|
+
"environment_enabled": bool,
|
|
21
|
+
"skill_enabled": bool,
|
|
22
|
+
"environment_servers": {
|
|
23
|
+
"server_name": ["tool1", "tool2"] or "all"
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
"""
|
|
27
|
+
result = {
|
|
28
|
+
"prompt_enabled": False,
|
|
29
|
+
"tool_enabled": False,
|
|
30
|
+
"environment_enabled": False,
|
|
31
|
+
"skill_enabled": False,
|
|
32
|
+
"skill_modes": [], # List of allowed modes: "insert", "append", "create"
|
|
33
|
+
"environment_servers": {},
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
rt_config = config_data.get("RedTeamingAgent", {})
|
|
37
|
+
available = rt_config.get("available_injections", {})
|
|
38
|
+
|
|
39
|
+
# Parse prompt and tool (simple bool)
|
|
40
|
+
result["prompt_enabled"] = bool(available.get("prompt", False))
|
|
41
|
+
result["tool_enabled"] = bool(available.get("tool", False))
|
|
42
|
+
|
|
43
|
+
# Parse skill - can be bool or dict with mode constraints
|
|
44
|
+
skill_config = available.get("skill", False)
|
|
45
|
+
if isinstance(skill_config, bool):
|
|
46
|
+
# Simple bool: enable all modes
|
|
47
|
+
result["skill_enabled"] = skill_config
|
|
48
|
+
if skill_config:
|
|
49
|
+
result["skill_modes"] = ["insert", "append", "create"]
|
|
50
|
+
elif isinstance(skill_config, dict):
|
|
51
|
+
# Dict with mode constraints
|
|
52
|
+
allowed_modes = []
|
|
53
|
+
if skill_config.get("insert", False):
|
|
54
|
+
allowed_modes.append("insert")
|
|
55
|
+
if skill_config.get("append", False):
|
|
56
|
+
allowed_modes.append("append")
|
|
57
|
+
if skill_config.get("create", False):
|
|
58
|
+
allowed_modes.append("create")
|
|
59
|
+
result["skill_enabled"] = len(allowed_modes) > 0
|
|
60
|
+
result["skill_modes"] = allowed_modes
|
|
61
|
+
else:
|
|
62
|
+
result["skill_enabled"] = False
|
|
63
|
+
result["skill_modes"] = []
|
|
64
|
+
|
|
65
|
+
# Parse environment enabled flag
|
|
66
|
+
result["environment_enabled"] = bool(available.get("environment", False))
|
|
67
|
+
|
|
68
|
+
# Parse env_injection_config (separate section for server configurations)
|
|
69
|
+
env_injection_config = rt_config.get("env_injection_config", {})
|
|
70
|
+
if env_injection_config:
|
|
71
|
+
result["environment_servers"] = env_injection_config
|
|
72
|
+
|
|
73
|
+
return result
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def start_injection_mcp_servers(
|
|
77
|
+
injection_config: Dict[str, Any],
|
|
78
|
+
resource_manager: Any = None,
|
|
79
|
+
task_id: Optional[str] = None,
|
|
80
|
+
host: str = "127.0.0.1"
|
|
81
|
+
) -> Tuple[Optional[MCPServerManager], Dict[str, Any]]:
|
|
82
|
+
"""Set up and start environment injection MCP servers.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
injection_config: Injection configuration with environment_servers dict
|
|
86
|
+
resource_manager: ResourceManager for port allocation (optional)
|
|
87
|
+
task_id: Unique task identifier (optional)
|
|
88
|
+
host: Host address for MCP server URLs (default: 127.0.0.1)
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Tuple of (MCPServerManager instance or None, updated injection_config)
|
|
92
|
+
"""
|
|
93
|
+
# Extract server configs from injection_config
|
|
94
|
+
server_configs = injection_config.get("environment_servers", {})
|
|
95
|
+
|
|
96
|
+
if not server_configs:
|
|
97
|
+
return None, injection_config
|
|
98
|
+
|
|
99
|
+
if not INJECTION_MCP_CONFIG_PATH.exists():
|
|
100
|
+
print(f"[WARN] Injection MCP config not found: {INJECTION_MCP_CONFIG_PATH}")
|
|
101
|
+
return None, injection_config
|
|
102
|
+
|
|
103
|
+
# Build server list from config
|
|
104
|
+
server_list = list(server_configs.keys())
|
|
105
|
+
|
|
106
|
+
manager, server_urls = _start_mcp_servers_impl(
|
|
107
|
+
config_path=str(INJECTION_MCP_CONFIG_PATH),
|
|
108
|
+
server_list=server_list,
|
|
109
|
+
resource_mgr=resource_manager,
|
|
110
|
+
task_id=task_id or "injection",
|
|
111
|
+
host=host,
|
|
112
|
+
prefix="injection",
|
|
113
|
+
log_prefix="[INJECTION MCP]",
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Build updated environment_servers with URLs embedded
|
|
117
|
+
updated_servers: Dict[str, Dict[str, Any]] = {}
|
|
118
|
+
for server_name, tools in server_configs.items():
|
|
119
|
+
updated_servers[server_name] = {
|
|
120
|
+
"tools": tools,
|
|
121
|
+
"url": server_urls.get(server_name, ""),
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
# Update injection_config with URLs
|
|
125
|
+
injection_config["environment_servers"] = updated_servers
|
|
126
|
+
return manager, injection_config
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def wait_for_injection_mcp_ready(
|
|
130
|
+
injection_config: Dict[str, Any],
|
|
131
|
+
timeout: float = 30.0,
|
|
132
|
+
interval: float = 0.5
|
|
133
|
+
) -> None:
|
|
134
|
+
"""Wait for injection MCP servers to be ready.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
injection_config: The injection configuration dict with environment_servers
|
|
138
|
+
containing {"server_name": {"url": "http://...", "tools": ...}}
|
|
139
|
+
timeout: Maximum time to wait in seconds
|
|
140
|
+
interval: Polling interval in seconds
|
|
141
|
+
"""
|
|
142
|
+
env_servers = injection_config.get("environment_servers", {})
|
|
143
|
+
if not env_servers:
|
|
144
|
+
return
|
|
145
|
+
|
|
146
|
+
# Extract URLs from the new format
|
|
147
|
+
server_urls: Dict[str, str] = {}
|
|
148
|
+
for name, server_info in env_servers.items():
|
|
149
|
+
if isinstance(server_info, dict) and "url" in server_info:
|
|
150
|
+
server_urls[name] = server_info["url"]
|
|
151
|
+
|
|
152
|
+
wait_for_servers_ready(server_urls, timeout, interval)
|
utils/judge_helpers.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import importlib.util
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional, Dict, Any, List
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def load_judge_module(task_dir: Path):
|
|
8
|
+
"""
|
|
9
|
+
Dynamically load the judge.py module from a task directory.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
task_dir: Path to task directory containing judge.py
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
Loaded module or None if not found
|
|
16
|
+
"""
|
|
17
|
+
judge_path = task_dir / "judge.py"
|
|
18
|
+
if not judge_path.exists():
|
|
19
|
+
return None
|
|
20
|
+
|
|
21
|
+
unique_name = f"task_judge_{task_dir.name}"
|
|
22
|
+
spec = importlib.util.spec_from_file_location(unique_name, str(judge_path))
|
|
23
|
+
if spec is None or spec.loader is None:
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
module = importlib.util.module_from_spec(spec)
|
|
27
|
+
|
|
28
|
+
root_dir = task_dir
|
|
29
|
+
for _ in range(6):
|
|
30
|
+
root_dir = root_dir.parent
|
|
31
|
+
if (root_dir / "dt_arena" / "src" / "types").exists():
|
|
32
|
+
break
|
|
33
|
+
|
|
34
|
+
types_path = str(root_dir / "dt_arena" / "src" / "types")
|
|
35
|
+
root_path = str(root_dir)
|
|
36
|
+
|
|
37
|
+
sys.path.insert(0, types_path)
|
|
38
|
+
sys.path.insert(1, root_path)
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
spec.loader.exec_module(module)
|
|
42
|
+
finally:
|
|
43
|
+
# Clean up added paths
|
|
44
|
+
if types_path in sys.path:
|
|
45
|
+
sys.path.remove(types_path)
|
|
46
|
+
if root_path in sys.path:
|
|
47
|
+
sys.path.remove(root_path)
|
|
48
|
+
|
|
49
|
+
return module
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def run_judge(
|
|
53
|
+
task_dir: Path,
|
|
54
|
+
response: str = "",
|
|
55
|
+
*,
|
|
56
|
+
check_task: bool = True,
|
|
57
|
+
check_attack: bool = True,
|
|
58
|
+
) -> Dict[str, Any]:
|
|
59
|
+
"""
|
|
60
|
+
Run the judge for a task.
|
|
61
|
+
|
|
62
|
+
Supports the class-based Judge interface with eval_task() and eval_attack() methods.
|
|
63
|
+
The Judge class is expected to have:
|
|
64
|
+
- eval_task(agent_responses: List[str]) -> Tuple[bool, Dict]
|
|
65
|
+
- eval_attack(agent_responses: List[str]) -> Optional[Tuple[bool, Dict]]
|
|
66
|
+
|
|
67
|
+
Note:
|
|
68
|
+
DEPRECATED: The `trajectory` parameter has been removed.
|
|
69
|
+
This parameter is no longer supported.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
task_dir: Path to task directory
|
|
73
|
+
response: Agent's final response/output
|
|
74
|
+
check_task: Whether to run eval_task check
|
|
75
|
+
check_attack: Whether to run eval_attack check
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Dict with judge results:
|
|
79
|
+
{
|
|
80
|
+
"task_success": bool or None,
|
|
81
|
+
"task_message": str,
|
|
82
|
+
"task_metadata": dict,
|
|
83
|
+
"attack_success": bool or None,
|
|
84
|
+
"attack_message": str,
|
|
85
|
+
"attack_metadata": dict,
|
|
86
|
+
"error": str or None
|
|
87
|
+
}
|
|
88
|
+
"""
|
|
89
|
+
result = {
|
|
90
|
+
"task_success": None,
|
|
91
|
+
"task_message": "",
|
|
92
|
+
"task_metadata": {},
|
|
93
|
+
"attack_success": None,
|
|
94
|
+
"attack_message": "",
|
|
95
|
+
"attack_metadata": {},
|
|
96
|
+
"error": None,
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
module = load_judge_module(task_dir)
|
|
101
|
+
if module is None:
|
|
102
|
+
result["error"] = f"judge.py not found in {task_dir}"
|
|
103
|
+
return result
|
|
104
|
+
|
|
105
|
+
# Find the Judge class in the module
|
|
106
|
+
judge_class = None
|
|
107
|
+
for name in dir(module):
|
|
108
|
+
obj = getattr(module, name)
|
|
109
|
+
if (isinstance(obj, type) and
|
|
110
|
+
name == "Judge" and
|
|
111
|
+
hasattr(obj, "eval_task") and
|
|
112
|
+
hasattr(obj, "eval_attack")):
|
|
113
|
+
judge_class = obj
|
|
114
|
+
break
|
|
115
|
+
|
|
116
|
+
if judge_class is None:
|
|
117
|
+
result["error"] = f"Judge class with eval_task/eval_attack not found in {task_dir}/judge.py"
|
|
118
|
+
return result
|
|
119
|
+
|
|
120
|
+
# Instantiate the judge
|
|
121
|
+
judge = judge_class()
|
|
122
|
+
|
|
123
|
+
# Prepare agent_responses as a list
|
|
124
|
+
agent_responses: List[str] = [response] if isinstance(response, str) else list(response)
|
|
125
|
+
|
|
126
|
+
# Run eval_task check
|
|
127
|
+
if check_task:
|
|
128
|
+
try:
|
|
129
|
+
success, metadata = judge.eval_task(agent_responses)
|
|
130
|
+
result["task_success"] = success
|
|
131
|
+
result["task_metadata"] = metadata
|
|
132
|
+
result["task_message"] = metadata.get("message", "")
|
|
133
|
+
except Exception as e:
|
|
134
|
+
result["task_success"] = False
|
|
135
|
+
result["task_message"] = f"Error running eval_task: {e}"
|
|
136
|
+
|
|
137
|
+
# Run eval_attack check
|
|
138
|
+
if check_attack:
|
|
139
|
+
try:
|
|
140
|
+
attack_result = judge.eval_attack(agent_responses)
|
|
141
|
+
if attack_result is not None:
|
|
142
|
+
success, metadata = attack_result
|
|
143
|
+
result["attack_success"] = success
|
|
144
|
+
result["attack_metadata"] = metadata
|
|
145
|
+
result["attack_message"] = metadata.get("message", "")
|
|
146
|
+
# If None, attack evaluation is not applicable (benign task)
|
|
147
|
+
except Exception as e:
|
|
148
|
+
result["attack_success"] = False
|
|
149
|
+
result["attack_message"] = f"Error running eval_attack: {e}"
|
|
150
|
+
|
|
151
|
+
except Exception as e:
|
|
152
|
+
result["error"] = f"Failed to load/run judge: {e}"
|
|
153
|
+
|
|
154
|
+
return result
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def print_judge_result(result: Dict[str, Any]) -> None:
|
|
158
|
+
"""Pretty print judge results."""
|
|
159
|
+
print("\n" + "=" * 80)
|
|
160
|
+
print("[JUDGE RESULTS]")
|
|
161
|
+
print("=" * 80)
|
|
162
|
+
|
|
163
|
+
if result.get("error"):
|
|
164
|
+
print(f"❌ Judge Error: {result['error']}")
|
|
165
|
+
return
|
|
166
|
+
|
|
167
|
+
# Task success
|
|
168
|
+
if result["task_success"] is not None:
|
|
169
|
+
icon = "✅" if result["task_success"] else "❌"
|
|
170
|
+
print(f"{icon} Task Success: {result['task_success']}")
|
|
171
|
+
print(f" Message: {result['task_message']}")
|
|
172
|
+
|
|
173
|
+
# Attack success (for adversarial tasks)
|
|
174
|
+
if result["attack_success"] is not None:
|
|
175
|
+
icon = "⚠️" if result["attack_success"] else "🛡️"
|
|
176
|
+
status = "ATTACK SUCCEEDED" if result["attack_success"] else "ATTACK BLOCKED"
|
|
177
|
+
print(f"{icon} Attack Check: {status}")
|
|
178
|
+
print(f" Message: {result['attack_message']}")
|
|
179
|
+
|
|
180
|
+
print("=" * 80)
|
|
181
|
+
|