decodingtrust-agent-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent/__init__.py +30 -0
- agent/claudesdk/__init__.py +8 -0
- agent/claudesdk/example.py +221 -0
- agent/claudesdk/src/__init__.py +8 -0
- agent/claudesdk/src/agent.py +400 -0
- agent/claudesdk/src/mcp_proxy.py +409 -0
- agent/claudesdk/src/utils.py +420 -0
- agent/googleadk/__init__.py +15 -0
- agent/googleadk/example.py +237 -0
- agent/googleadk/src/__init__.py +12 -0
- agent/googleadk/src/agent.py +401 -0
- agent/googleadk/src/mcp_wrapper.py +163 -0
- agent/googleadk/src/utils.py +602 -0
- agent/langchain/__init__.py +8 -0
- agent/langchain/example.py +213 -0
- agent/langchain/src/__init__.py +8 -0
- agent/langchain/src/agent.py +645 -0
- agent/langchain/src/utils.py +433 -0
- agent/openaisdk/__init__.py +17 -0
- agent/openaisdk/example.py +228 -0
- agent/openaisdk/src/__init__.py +12 -0
- agent/openaisdk/src/agent.py +491 -0
- agent/openaisdk/src/agent_wrapper.py +143 -0
- agent/openaisdk/src/mcp_wrapper.py +395 -0
- agent/openaisdk/src/utils.py +493 -0
- agent/openclaw/__init__.py +10 -0
- agent/openclaw/example.py +251 -0
- agent/openclaw/src/__init__.py +14 -0
- agent/openclaw/src/agent.py +930 -0
- agent/openclaw/src/helpers/__init__.py +1 -0
- agent/openclaw/src/helpers/auth_helpers.py +55 -0
- agent/openclaw/src/mcp_proxy.py +564 -0
- agent/openclaw/src/plugin_generator.py +231 -0
- agent/openclaw/src/utils.py +341 -0
- agent/pocketflow/__init__.py +18 -0
- agent/pocketflow/example.py +221 -0
- agent/pocketflow/prompts/react_agent.py +46 -0
- agent/pocketflow/src/__init__.py +6 -0
- agent/pocketflow/src/agent.py +507 -0
- agent/pocketflow/src/agent_wrapper.py +159 -0
- agent/pocketflow/src/async_helper.py +92 -0
- agent/pocketflow/src/mcp_react_agent.py +279 -0
- agent/pocketflow/src/native_agent.py +74 -0
- agent/pocketflow/src/nodes.py +467 -0
- benchmark/__init__.py +0 -0
- benchmark/browser/benign.jsonl +34 -0
- benchmark/browser/direct.jsonl +85 -0
- benchmark/browser/indirect.jsonl +82 -0
- benchmark/code/benign.jsonl +0 -0
- benchmark/code/direct.jsonl +121 -0
- benchmark/code/indirect.jsonl +165 -0
- benchmark/crm/benign.jsonl +165 -0
- benchmark/crm/direct.jsonl +90 -0
- benchmark/crm/indirect.jsonl +150 -0
- benchmark/customer-service/benign.jsonl +160 -0
- benchmark/customer-service/direct.jsonl +100 -0
- benchmark/customer-service/indirect.jsonl +101 -0
- benchmark/finance/benign.jsonl +0 -0
- benchmark/finance/direct.jsonl +200 -0
- benchmark/finance/indirect.jsonl +200 -0
- benchmark/legal/benign.jsonl +0 -0
- benchmark/legal/direct.jsonl +200 -0
- benchmark/legal/indirect.jsonl +200 -0
- benchmark/macos/benign.jsonl +30 -0
- benchmark/macos/direct.jsonl +50 -0
- benchmark/macos/indirect.jsonl +50 -0
- benchmark/medical/benign.jsonl +642 -0
- benchmark/medical/direct.jsonl +229 -0
- benchmark/medical/indirect.jsonl +222 -0
- benchmark/os-filesystem/benign.jsonl +200 -0
- benchmark/os-filesystem/direct.jsonl +200 -0
- benchmark/os-filesystem/indirect.jsonl +200 -0
- benchmark/research/benign.jsonl +0 -0
- benchmark/research/direct.jsonl +119 -0
- benchmark/research/indirect.jsonl +125 -0
- benchmark/telecom/benign.jsonl +120 -0
- benchmark/telecom/direct.jsonl +161 -0
- benchmark/telecom/indirect.jsonl +166 -0
- benchmark/travel/benign.jsonl +130 -0
- benchmark/travel/direct.jsonl +105 -0
- benchmark/travel/indirect.jsonl +120 -0
- benchmark/windows/benign.jsonl +100 -0
- benchmark/windows/direct.jsonl +140 -0
- benchmark/windows/indirect.jsonl +107 -0
- benchmark/workflow/benign.jsonl +335 -0
- benchmark/workflow/direct.jsonl +78 -0
- benchmark/workflow/indirect.jsonl +107 -0
- cli/__init__.py +5 -0
- cli/main.py +182 -0
- cli/scaffold.py +334 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
- dt_arena/config/env.yaml +515 -0
- dt_arena/config/injection_mcp.yaml +430 -0
- dt_arena/config/mcp.yaml +642 -0
- dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
- dt_arena/envs/arxiv/docker-compose.yml +36 -0
- dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
- dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
- dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
- dt_arena/envs/atlassian/docker-compose.yml +72 -0
- dt_arena/envs/bigquery/docker-compose.yml +20 -0
- dt_arena/envs/booking/docker-compose.yml +59 -0
- dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
- dt_arena/envs/calendar/docker-compose.yml +42 -0
- dt_arena/envs/custom-website/docker-compose.yml +6 -0
- dt_arena/envs/customer_service/docker-compose.yml +59 -0
- dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
- dt_arena/envs/databricks/docker-compose.yml +51 -0
- dt_arena/envs/ecommerce/docker-compose.yml +6 -0
- dt_arena/envs/ers/docker-compose.yml +36 -0
- dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
- dt_arena/envs/finance/docker-compose.yml +23 -0
- dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
- dt_arena/envs/github/docker/docker-compose.yml +50 -0
- dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
- dt_arena/envs/gmail/docker-compose.yml +65 -0
- dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
- dt_arena/envs/google-form/docker-compose.yml +41 -0
- dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
- dt_arena/envs/googledocs/docker-compose.yml +78 -0
- dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
- dt_arena/envs/hospital/docker-compose.yml +27 -0
- dt_arena/envs/legal/docker-compose.yml +22 -0
- dt_arena/envs/linkedin/docker-compose.yml +63 -0
- dt_arena/envs/macos/docker-compose.yml +79 -0
- dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
- dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
- dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
- dt_arena/envs/paypal/docker-compose.yml +63 -0
- dt_arena/envs/research/docker-compose-hub.yml +13 -0
- dt_arena/envs/research/docker-compose.yml +24 -0
- dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
- dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
- dt_arena/envs/slack/docker-compose-hub.yml +28 -0
- dt_arena/envs/slack/docker-compose.yml +41 -0
- dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
- dt_arena/envs/snowflake/docker-compose.yml +44 -0
- dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
- dt_arena/envs/telecom/docker-compose.yml +17 -0
- dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
- dt_arena/envs/telegram/docker-compose.yml +62 -0
- dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
- dt_arena/envs/terminal/docker-compose.yml +26 -0
- dt_arena/envs/travel/docker-compose-hub.yml +19 -0
- dt_arena/envs/travel/docker-compose.yml +19 -0
- dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
- dt_arena/envs/whatsapp/docker-compose.yml +78 -0
- dt_arena/envs/windows/docker-compose.yml +71 -0
- dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
- dt_arena/envs/zoom/docker-compose.yml +40 -0
- dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
- dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
- dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
- dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
- dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
- dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
- dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
- dt_arena/injection_mcp_server/github/env_injection.py +206 -0
- dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
- dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
- dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
- dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
- dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
- dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
- dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
- dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
- dt_arena/injection_mcp_server/research/env_injection.py +616 -0
- dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
- dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
- dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
- dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
- dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
- dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
- dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
- dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
- dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
- dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
- dt_arena/mcp_server/atlassian/main.py +1554 -0
- dt_arena/mcp_server/atlassian/test_server.py +66 -0
- dt_arena/mcp_server/bigquery/main.py +333 -0
- dt_arena/mcp_server/booking/main.py +310 -0
- dt_arena/mcp_server/browser/main.py +1741 -0
- dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
- dt_arena/mcp_server/calendar/main.py +792 -0
- dt_arena/mcp_server/calendar/test_mcp.py +135 -0
- dt_arena/mcp_server/customer_service/main.py +1063 -0
- dt_arena/mcp_server/databricks/main.py +566 -0
- dt_arena/mcp_server/databricks/probe.py +102 -0
- dt_arena/mcp_server/ers/main.py +845 -0
- dt_arena/mcp_server/finance/__init__.py +87 -0
- dt_arena/mcp_server/finance/core/__init__.py +12 -0
- dt_arena/mcp_server/finance/core/data_loader.py +558 -0
- dt_arena/mcp_server/finance/core/portfolio.py +565 -0
- dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
- dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
- dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
- dt_arena/mcp_server/finance/injection/__init__.py +66 -0
- dt_arena/mcp_server/finance/injection/config.py +176 -0
- dt_arena/mcp_server/finance/injection/content.py +755 -0
- dt_arena/mcp_server/finance/injection/html.py +409 -0
- dt_arena/mcp_server/finance/injection/locations.py +167 -0
- dt_arena/mcp_server/finance/injection/methods.py +193 -0
- dt_arena/mcp_server/finance/injection/presets.py +1023 -0
- dt_arena/mcp_server/finance/main.py +361 -0
- dt_arena/mcp_server/finance/run_mcp.py +21 -0
- dt_arena/mcp_server/finance/run_web.py +26 -0
- dt_arena/mcp_server/finance/server/__init__.py +41 -0
- dt_arena/mcp_server/finance/server/extractor.py +1453 -0
- dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
- dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
- dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
- dt_arena/mcp_server/finance/server/mcp.py +451 -0
- dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
- dt_arena/mcp_server/finance/server/tools/account.py +88 -0
- dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
- dt_arena/mcp_server/finance/server/tools/social.py +73 -0
- dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
- dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
- dt_arena/mcp_server/finance/server/web.py +2139 -0
- dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
- dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
- dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
- dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
- dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
- dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
- dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
- dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
- dt_arena/mcp_server/github/main.py +441 -0
- dt_arena/mcp_server/gmail/main.py +1004 -0
- dt_arena/mcp_server/google_form/main.py +141 -0
- dt_arena/mcp_server/googledocs/main.py +458 -0
- dt_arena/mcp_server/hospital/mcp_server.py +458 -0
- dt_arena/mcp_server/legal/__init__.py +9 -0
- dt_arena/mcp_server/legal/core/__init__.py +14 -0
- dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
- dt_arena/mcp_server/legal/core/data_loader.py +266 -0
- dt_arena/mcp_server/legal/core/document_store.py +197 -0
- dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
- dt_arena/mcp_server/legal/main.py +89 -0
- dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
- dt_arena/mcp_server/legal/server/__init__.py +14 -0
- dt_arena/mcp_server/legal/server/mcp.py +2330 -0
- dt_arena/mcp_server/macos/client_test.py +270 -0
- dt_arena/mcp_server/macos/mcp_server.py +285 -0
- dt_arena/mcp_server/os-filesystem/main.py +1380 -0
- dt_arena/mcp_server/paypal/main.py +501 -0
- dt_arena/mcp_server/research/main.py +777 -0
- dt_arena/mcp_server/salesforce/main.py +2006 -0
- dt_arena/mcp_server/slack/main.py +318 -0
- dt_arena/mcp_server/snowflake/main.py +612 -0
- dt_arena/mcp_server/snowflake/probe.py +183 -0
- dt_arena/mcp_server/telecom/mcp_client.py +423 -0
- dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
- dt_arena/mcp_server/telegram/main.py +338 -0
- dt_arena/mcp_server/terminal/main.py +163 -0
- dt_arena/mcp_server/travel/client_test.py +16 -0
- dt_arena/mcp_server/travel/mcp_server.py +404 -0
- dt_arena/mcp_server/whatsapp/main.py +318 -0
- dt_arena/mcp_server/windows/client_test.py +270 -0
- dt_arena/mcp_server/windows/mcp_server.py +218 -0
- dt_arena/mcp_server/zoom/main.py +466 -0
- dt_arena/src/__init__.py +0 -0
- dt_arena/src/hooks/__init__.py +0 -0
- dt_arena/src/hooks/audit_log.py +30 -0
- dt_arena/src/hooks/hooks.json +3 -0
- dt_arena/src/run_benign.py +142 -0
- dt_arena/src/types/__init__.py +0 -0
- dt_arena/src/types/agent.py +441 -0
- dt_arena/src/types/attacks.py +2 -0
- dt_arena/src/types/environment.py +2 -0
- dt_arena/src/types/hooks.py +174 -0
- dt_arena/src/types/judge.py +52 -0
- dt_arena/src/types/red_teaming_trajectory.py +385 -0
- dt_arena/src/types/task.py +260 -0
- dt_arena/src/types/trajectory.py +315 -0
- dt_arena/utils/__init__.py +1 -0
- dt_arena/utils/atlassian/__init__.py +27 -0
- dt_arena/utils/atlassian/helpers.py +520 -0
- dt_arena/utils/bigquery/__init__.py +1 -0
- dt_arena/utils/bigquery/helpers.py +246 -0
- dt_arena/utils/calendar/__init__.py +1 -0
- dt_arena/utils/calendar/helpers.py +87 -0
- dt_arena/utils/customer_service/__init__.py +17 -0
- dt_arena/utils/customer_service/cs_env_client.py +940 -0
- dt_arena/utils/customer_service/helpers.py +339 -0
- dt_arena/utils/customer_service/judges/__init__.py +20 -0
- dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
- dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
- dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
- dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
- dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
- dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
- dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
- dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
- dt_arena/utils/customer_service/judges/text_utils.py +21 -0
- dt_arena/utils/databricks/__init__.py +2 -0
- dt_arena/utils/databricks/helpers.py +210 -0
- dt_arena/utils/finance/__init__.py +0 -0
- dt_arena/utils/finance/helpers.py +263 -0
- dt_arena/utils/github/__init__.py +1 -0
- dt_arena/utils/github/helpers.py +249 -0
- dt_arena/utils/gmail/__init__.py +1 -0
- dt_arena/utils/gmail/helpers.py +344 -0
- dt_arena/utils/google_form/__init__.py +2 -0
- dt_arena/utils/google_form/helpers.py +133 -0
- dt_arena/utils/legal/__init__.py +0 -0
- dt_arena/utils/legal/helpers.py +228 -0
- dt_arena/utils/macos/__init__.py +0 -0
- dt_arena/utils/macos/env_setup.py +215 -0
- dt_arena/utils/macos/helpers.py +61 -0
- dt_arena/utils/os_filesystem/__init__.py +1 -0
- dt_arena/utils/os_filesystem/helpers.py +366 -0
- dt_arena/utils/paypal/__init__.py +1 -0
- dt_arena/utils/paypal/helpers.py +178 -0
- dt_arena/utils/port_allocator.py +266 -0
- dt_arena/utils/research/__init__.py +0 -0
- dt_arena/utils/research/helpers.py +251 -0
- dt_arena/utils/salesforce/__init__.py +1 -0
- dt_arena/utils/salesforce/helpers.py +719 -0
- dt_arena/utils/slack/__init__.py +1 -0
- dt_arena/utils/slack/helpers.py +176 -0
- dt_arena/utils/snowflake/__init__.py +1 -0
- dt_arena/utils/snowflake/helpers.py +166 -0
- dt_arena/utils/telecom/__init__.py +1 -0
- dt_arena/utils/telecom/helpers.py +760 -0
- dt_arena/utils/telegram/__init__.py +0 -0
- dt_arena/utils/telegram/helpers.py +174 -0
- dt_arena/utils/terminal/__init__.py +0 -0
- dt_arena/utils/terminal/helpers.py +20 -0
- dt_arena/utils/travel/__init__.py +0 -0
- dt_arena/utils/travel/env_client.py +537 -0
- dt_arena/utils/travel/llm_judge.py +137 -0
- dt_arena/utils/travel/prompts.py +64 -0
- dt_arena/utils/utils/__init__.py +122 -0
- dt_arena/utils/whatsapp/__init__.py +0 -0
- dt_arena/utils/whatsapp/helpers.py +226 -0
- dt_arena/utils/windows/__init__.py +0 -0
- dt_arena/utils/windows/env_reset.py +224 -0
- dt_arena/utils/windows/env_setup.py +280 -0
- dt_arena/utils/windows/exfil_helpers.py +170 -0
- dt_arena/utils/windows/helpers.py +74 -0
- dt_arena/utils/zoom/__init__.py +1 -0
- dt_arena/utils/zoom/helpers.py +70 -0
- eval/__init__.py +1 -0
- eval/evaluation.py +426 -0
- eval/task_runner.py +449 -0
- utils/__init__.py +148 -0
- utils/agent_helpers.py +308 -0
- utils/agent_wrapper.py +189 -0
- utils/compose_utils.py +135 -0
- utils/config.py +77 -0
- utils/env_helpers.py +104 -0
- utils/eval_stats.py +88 -0
- utils/injection_helpers.py +429 -0
- utils/injection_mcp_helpers.py +152 -0
- utils/judge_helpers.py +181 -0
- utils/judge_utils.py +472 -0
- utils/llm.py +196 -0
- utils/logging.py +45 -0
- utils/mcp_helpers.py +232 -0
- utils/mcp_manager.py +235 -0
- utils/memory_guard.py +18 -0
- utils/red_teaming_sandbox.py +476 -0
- utils/reset_helpers.py +318 -0
- utils/resource_manager.py +370 -0
- utils/skill_helpers.py +447 -0
- utils/task_executor.py +904 -0
- utils/task_helpers.py +270 -0
- utils/template_helpers.py +179 -0
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import tempfile
|
|
3
|
+
import atexit
|
|
4
|
+
import shutil
|
|
5
|
+
from typing import Dict, Any, Optional
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_env_file(env_file_path: str) -> Dict[str, str]:
|
|
10
|
+
"""
|
|
11
|
+
Load environment variables from a .env file.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
env_file_path: Path to the .env file
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
Dictionary of environment variables
|
|
18
|
+
"""
|
|
19
|
+
env_vars = {}
|
|
20
|
+
|
|
21
|
+
if not os.path.exists(env_file_path):
|
|
22
|
+
return env_vars
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
with open(env_file_path, 'r') as f:
|
|
26
|
+
for line in f:
|
|
27
|
+
line = line.strip()
|
|
28
|
+
# Skip empty lines and comments
|
|
29
|
+
if not line or line.startswith('#'):
|
|
30
|
+
continue
|
|
31
|
+
|
|
32
|
+
# Parse KEY=VALUE
|
|
33
|
+
if '=' in line:
|
|
34
|
+
key, value = line.split('=', 1)
|
|
35
|
+
key = key.strip()
|
|
36
|
+
value = value.strip()
|
|
37
|
+
|
|
38
|
+
# Remove quotes if present
|
|
39
|
+
if value.startswith('"') and value.endswith('"'):
|
|
40
|
+
value = value[1:-1]
|
|
41
|
+
elif value.startswith("'") and value.endswith("'"):
|
|
42
|
+
value = value[1:-1]
|
|
43
|
+
|
|
44
|
+
env_vars[key] = value
|
|
45
|
+
|
|
46
|
+
print(f"[DOCKER] Loaded {len(env_vars)} environment variables from {env_file_path}")
|
|
47
|
+
except Exception as e:
|
|
48
|
+
print(f"[DOCKER] Warning: Failed to load .env file: {e}")
|
|
49
|
+
|
|
50
|
+
return env_vars
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class DockerExecutor:
|
|
54
|
+
"""
|
|
55
|
+
Docker executor for running attack algorithm scripts in a persistent container.
|
|
56
|
+
|
|
57
|
+
Can either create a new container or attach to an existing one (for sharing
|
|
58
|
+
across parallel processes).
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(
|
|
62
|
+
self,
|
|
63
|
+
image: str = "red-teaming-sandbox:latest",
|
|
64
|
+
skills_dir: str = "dt_arms/attack_skills/attack_algorithms",
|
|
65
|
+
timeout: int = 30,
|
|
66
|
+
memory_limit: str = "2g",
|
|
67
|
+
cpu_quota: int = 100000,
|
|
68
|
+
env_file: str = "dt_arms/docker/.env",
|
|
69
|
+
enable_gpu: bool = True,
|
|
70
|
+
container_id: Optional[str] = None, # Attach to existing container
|
|
71
|
+
):
|
|
72
|
+
"""
|
|
73
|
+
Initialize Docker executor with persistent container.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
image: Docker image name
|
|
77
|
+
skills_dir: Path to attack_algorithms directory (will be mounted read-only)
|
|
78
|
+
timeout: Default timeout in seconds for command execution
|
|
79
|
+
memory_limit: Memory limit (e.g., "2g", "512m")
|
|
80
|
+
cpu_quota: CPU quota (100000 = 1 CPU)
|
|
81
|
+
env_file: Path to .env file with environment variables (default: "dt_arms/docker/.env")
|
|
82
|
+
enable_gpu: Whether to enable GPU access in container (default: True)
|
|
83
|
+
container_id: If provided, attach to existing container instead of creating new one
|
|
84
|
+
"""
|
|
85
|
+
self.image = image
|
|
86
|
+
self.skills_dir = os.path.abspath(skills_dir)
|
|
87
|
+
self.timeout = timeout
|
|
88
|
+
self.memory_limit = memory_limit
|
|
89
|
+
self.cpu_quota = cpu_quota
|
|
90
|
+
self.enable_gpu = enable_gpu
|
|
91
|
+
|
|
92
|
+
# Load environment variables from .env file
|
|
93
|
+
self.env_vars = load_env_file(env_file)
|
|
94
|
+
|
|
95
|
+
# Docker client (lazy initialization)
|
|
96
|
+
self._docker_client = None
|
|
97
|
+
|
|
98
|
+
# Persistent container (created on first execute or attached)
|
|
99
|
+
self._container = None
|
|
100
|
+
|
|
101
|
+
# If container_id provided, we're attaching to an existing shared container
|
|
102
|
+
self._shared_container_id = container_id
|
|
103
|
+
self._owns_container = container_id is None # Only cleanup if we created it
|
|
104
|
+
|
|
105
|
+
# Workspace directory for temporary files
|
|
106
|
+
self._workspace_dir = None
|
|
107
|
+
|
|
108
|
+
# Register cleanup on program exit (Ctrl-C, normal exit, etc.)
|
|
109
|
+
# Only register if we own the container
|
|
110
|
+
if self._owns_container:
|
|
111
|
+
atexit.register(self.cleanup)
|
|
112
|
+
|
|
113
|
+
# Track if cleanup has been called
|
|
114
|
+
self._cleaned_up = False
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def docker_client(self):
|
|
118
|
+
"""Lazy-load Docker client."""
|
|
119
|
+
if self._docker_client is None:
|
|
120
|
+
try:
|
|
121
|
+
import docker
|
|
122
|
+
self._docker_client = docker.from_env()
|
|
123
|
+
except Exception as e:
|
|
124
|
+
raise RuntimeError(
|
|
125
|
+
f"Failed to connect to Docker daemon: {e}\n"
|
|
126
|
+
"Make sure Docker is running."
|
|
127
|
+
)
|
|
128
|
+
return self._docker_client
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def container_id(self) -> Optional[str]:
|
|
132
|
+
"""Get the container ID for sharing with other processes."""
|
|
133
|
+
if self._container is not None:
|
|
134
|
+
return self._container.id
|
|
135
|
+
return self._shared_container_id
|
|
136
|
+
|
|
137
|
+
def start_shared_container(self) -> str:
|
|
138
|
+
"""
|
|
139
|
+
Start the container and return its ID for sharing with subprocesses.
|
|
140
|
+
|
|
141
|
+
This is useful for the orchestrator to create a container once,
|
|
142
|
+
then pass the ID to worker subprocesses.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
Container ID string
|
|
146
|
+
"""
|
|
147
|
+
self._ensure_container()
|
|
148
|
+
return self._container.id
|
|
149
|
+
|
|
150
|
+
def __enter__(self):
|
|
151
|
+
"""Context manager entry."""
|
|
152
|
+
return self
|
|
153
|
+
|
|
154
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
155
|
+
"""Context manager exit - automatic cleanup."""
|
|
156
|
+
self.cleanup()
|
|
157
|
+
|
|
158
|
+
def _ensure_container(self):
|
|
159
|
+
"""
|
|
160
|
+
Ensure the persistent container is running.
|
|
161
|
+
Creates and starts it if it doesn't exist, or attaches to existing if container_id provided.
|
|
162
|
+
"""
|
|
163
|
+
if self._container is not None:
|
|
164
|
+
# Check if container is still running
|
|
165
|
+
try:
|
|
166
|
+
self._container.reload()
|
|
167
|
+
if self._container.status == "running":
|
|
168
|
+
return # Container is running
|
|
169
|
+
else:
|
|
170
|
+
# Container stopped, restart it (only if we own it)
|
|
171
|
+
if self._owns_container:
|
|
172
|
+
print(f"[DOCKER] Container stopped, restarting...")
|
|
173
|
+
self._container.start()
|
|
174
|
+
return
|
|
175
|
+
except Exception as e:
|
|
176
|
+
# Container doesn't exist or error
|
|
177
|
+
if self._owns_container:
|
|
178
|
+
print(f"[DOCKER] Container error: {e}, recreating...")
|
|
179
|
+
self._container = None
|
|
180
|
+
else:
|
|
181
|
+
raise RuntimeError(f"Shared container {self._shared_container_id} not available: {e}")
|
|
182
|
+
|
|
183
|
+
# If attaching to existing container
|
|
184
|
+
if self._shared_container_id:
|
|
185
|
+
try:
|
|
186
|
+
self._container = self.docker_client.containers.get(self._shared_container_id)
|
|
187
|
+
self._container.reload()
|
|
188
|
+
if self._container.status != "running":
|
|
189
|
+
raise RuntimeError(f"Shared container {self._shared_container_id} is not running")
|
|
190
|
+
print(f"[DOCKER] Attached to shared container: {self._container.short_id}")
|
|
191
|
+
return
|
|
192
|
+
except Exception as e:
|
|
193
|
+
raise RuntimeError(f"Failed to attach to shared container {self._shared_container_id}: {e}")
|
|
194
|
+
|
|
195
|
+
# Create workspace directory if needed
|
|
196
|
+
if self._workspace_dir is None:
|
|
197
|
+
self._workspace_dir = tempfile.mkdtemp(prefix="arms_workspace_")
|
|
198
|
+
|
|
199
|
+
# Verify skills directory exists
|
|
200
|
+
if not os.path.exists(self.skills_dir):
|
|
201
|
+
raise RuntimeError(f"Skills directory not found: {self.skills_dir}")
|
|
202
|
+
|
|
203
|
+
# Create and start persistent container
|
|
204
|
+
print(f"[DOCKER] Creating persistent container...")
|
|
205
|
+
if self.env_vars:
|
|
206
|
+
print(f"[DOCKER] Passing {len(self.env_vars)} environment variables to container")
|
|
207
|
+
if self.enable_gpu:
|
|
208
|
+
print(f"[DOCKER] GPU access enabled")
|
|
209
|
+
|
|
210
|
+
try:
|
|
211
|
+
# Prepare container configuration
|
|
212
|
+
container_config = {
|
|
213
|
+
"image": self.image,
|
|
214
|
+
"command": ["tail", "-f", "/dev/null"], # Keep container running
|
|
215
|
+
"volumes": {
|
|
216
|
+
self.skills_dir: {
|
|
217
|
+
"bind": "/skills",
|
|
218
|
+
"mode": "ro" # Read-only
|
|
219
|
+
},
|
|
220
|
+
self._workspace_dir: {
|
|
221
|
+
"bind": "/workspace",
|
|
222
|
+
"mode": "rw" # Read-write
|
|
223
|
+
}
|
|
224
|
+
},
|
|
225
|
+
"working_dir": "/workspace",
|
|
226
|
+
"network_mode": "host",
|
|
227
|
+
"mem_limit": self.memory_limit,
|
|
228
|
+
"cpu_quota": self.cpu_quota,
|
|
229
|
+
"environment": self.env_vars, # Pass environment variables from .env file
|
|
230
|
+
"detach": True,
|
|
231
|
+
"remove": False,
|
|
232
|
+
"stdout": True,
|
|
233
|
+
"stderr": True,
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
# Add GPU support if enabled
|
|
237
|
+
if self.enable_gpu:
|
|
238
|
+
container_config["device_requests"] = [
|
|
239
|
+
{
|
|
240
|
+
"driver": "nvidia",
|
|
241
|
+
"count": -1, # -1 means all GPUs
|
|
242
|
+
"capabilities": [["gpu", "utility", "compute"]],
|
|
243
|
+
}
|
|
244
|
+
]
|
|
245
|
+
|
|
246
|
+
self._container = self.docker_client.containers.run(**container_config)
|
|
247
|
+
print(f"[DOCKER] Container created: {self._container.short_id}")
|
|
248
|
+
except Exception as e:
|
|
249
|
+
error_msg = str(e)
|
|
250
|
+
if self.enable_gpu and ("could not select device driver" in error_msg.lower() or "nvidia" in error_msg.lower()):
|
|
251
|
+
print(f"[DOCKER] Warning: GPU access failed, falling back to CPU-only mode")
|
|
252
|
+
print(f"[DOCKER] GPU Error: {error_msg}")
|
|
253
|
+
# Retry without GPU
|
|
254
|
+
if "device_requests" in container_config:
|
|
255
|
+
del container_config["device_requests"]
|
|
256
|
+
self._container = self.docker_client.containers.run(**container_config)
|
|
257
|
+
print(f"[DOCKER] Container created (CPU-only): {self._container.short_id}")
|
|
258
|
+
else:
|
|
259
|
+
raise RuntimeError(f"Failed to create Docker container: {e}")
|
|
260
|
+
|
|
261
|
+
def execute(
|
|
262
|
+
self,
|
|
263
|
+
skill_name: str,
|
|
264
|
+
command: str,
|
|
265
|
+
timeout: Optional[int] = None,
|
|
266
|
+
) -> Dict[str, Any]:
|
|
267
|
+
"""
|
|
268
|
+
Execute a command in the persistent Docker container within the skill's directory.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
skill_name: Name of the skill (e.g., "emoji-attack")
|
|
272
|
+
command: Command to execute (e.g., 'python run_attack.py "text"')
|
|
273
|
+
timeout: Timeout in seconds (overrides default if provided)
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
Dictionary with:
|
|
277
|
+
- stdout: Command output
|
|
278
|
+
- stderr: Error output
|
|
279
|
+
- exit_code: Exit code (0 = success)
|
|
280
|
+
- error: Error message if execution failed
|
|
281
|
+
- timeout: Whether execution timed out
|
|
282
|
+
"""
|
|
283
|
+
timeout = timeout or self.timeout
|
|
284
|
+
|
|
285
|
+
# Verify skills directory exists
|
|
286
|
+
if not os.path.exists(self.skills_dir):
|
|
287
|
+
return {
|
|
288
|
+
"stdout": "",
|
|
289
|
+
"stderr": f"Skills directory not found: {self.skills_dir}",
|
|
290
|
+
"exit_code": -1,
|
|
291
|
+
"error": f"Skills directory not found: {self.skills_dir}",
|
|
292
|
+
"timeout": False
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
# Skill directory path
|
|
296
|
+
skill_dir = Path(self.skills_dir) / skill_name
|
|
297
|
+
if not skill_dir.exists():
|
|
298
|
+
return {
|
|
299
|
+
"stdout": "",
|
|
300
|
+
"stderr": f"Skill directory not found: {skill_dir}",
|
|
301
|
+
"exit_code": -1,
|
|
302
|
+
"error": f"Skill directory not found: {skill_dir}",
|
|
303
|
+
"timeout": False
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
try:
|
|
307
|
+
# Ensure persistent container is running
|
|
308
|
+
self._ensure_container()
|
|
309
|
+
|
|
310
|
+
# Execute command in the running container
|
|
311
|
+
# Use bash to cd into skill directory and run command
|
|
312
|
+
exec_command = f"cd /skills/{skill_name} && {command}"
|
|
313
|
+
|
|
314
|
+
# Use demux=True to separate stdout and stderr
|
|
315
|
+
exit_code, output = self._container.exec_run(
|
|
316
|
+
cmd=["bash", "-c", exec_command],
|
|
317
|
+
stdout=True,
|
|
318
|
+
stderr=True,
|
|
319
|
+
demux=True,
|
|
320
|
+
workdir=f"/skills/{skill_name}",
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
# output is a tuple of (stdout_bytes, stderr_bytes) when demux=True
|
|
324
|
+
stdout_bytes, stderr_bytes = output if output else (b"", b"")
|
|
325
|
+
stdout = stdout_bytes.decode('utf-8') if stdout_bytes else ""
|
|
326
|
+
stderr = stderr_bytes.decode('utf-8') if stderr_bytes else ""
|
|
327
|
+
|
|
328
|
+
return {
|
|
329
|
+
"stdout": stdout,
|
|
330
|
+
"stderr": stderr,
|
|
331
|
+
"exit_code": exit_code,
|
|
332
|
+
"error": None if exit_code == 0 else f"Command failed with exit code {exit_code}",
|
|
333
|
+
"timeout": False
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
except Exception as e:
|
|
337
|
+
error_type = type(e).__name__
|
|
338
|
+
error_msg = str(e)
|
|
339
|
+
|
|
340
|
+
# Check if timeout
|
|
341
|
+
if "timeout" in error_msg.lower() or error_type in ["ReadTimeout", "TimeoutError"]:
|
|
342
|
+
return {
|
|
343
|
+
"stdout": "",
|
|
344
|
+
"stderr": f"Command execution timeout ({timeout}s)",
|
|
345
|
+
"exit_code": -1,
|
|
346
|
+
"error": f"Command execution timeout ({timeout}s)",
|
|
347
|
+
"timeout": True
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
# Check if Docker image not found
|
|
351
|
+
if "not found" in error_msg.lower() and "image" in error_msg.lower():
|
|
352
|
+
return {
|
|
353
|
+
"stdout": "",
|
|
354
|
+
"stderr": f"Docker image '{self.image}' not found. Please build it first.",
|
|
355
|
+
"exit_code": -1,
|
|
356
|
+
"error": f"Docker image '{self.image}' not found",
|
|
357
|
+
"timeout": False
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
# General error - fall back to subprocess
|
|
361
|
+
print(f"[DOCKER] Warning: Docker execution failed ({error_type}: {error_msg}), falling back to subprocess")
|
|
362
|
+
return self._execute_subprocess(skill_name, command, timeout)
|
|
363
|
+
|
|
364
|
+
def _execute_subprocess(
|
|
365
|
+
self,
|
|
366
|
+
skill_name: str,
|
|
367
|
+
command: str,
|
|
368
|
+
timeout: Optional[int] = None,
|
|
369
|
+
) -> Dict[str, Any]:
|
|
370
|
+
"""
|
|
371
|
+
Fallback to subprocess execution (for development without Docker).
|
|
372
|
+
|
|
373
|
+
This is the same as the current ExecuteSkillNode implementation.
|
|
374
|
+
"""
|
|
375
|
+
import subprocess
|
|
376
|
+
|
|
377
|
+
timeout = timeout or self.timeout
|
|
378
|
+
skill_dir = Path(self.skills_dir) / skill_name
|
|
379
|
+
|
|
380
|
+
if not skill_dir.exists():
|
|
381
|
+
return {
|
|
382
|
+
"stdout": "",
|
|
383
|
+
"stderr": f"Skill directory not found: {skill_dir}",
|
|
384
|
+
"exit_code": -1,
|
|
385
|
+
"error": f"Skill directory not found: {skill_dir}",
|
|
386
|
+
"timeout": False
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
try:
|
|
390
|
+
result = subprocess.run(
|
|
391
|
+
command,
|
|
392
|
+
shell=True,
|
|
393
|
+
cwd=str(skill_dir),
|
|
394
|
+
capture_output=True,
|
|
395
|
+
text=True,
|
|
396
|
+
timeout=timeout,
|
|
397
|
+
env={**os.environ}
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
return {
|
|
401
|
+
"stdout": result.stdout,
|
|
402
|
+
"stderr": result.stderr,
|
|
403
|
+
"exit_code": result.returncode,
|
|
404
|
+
"error": None,
|
|
405
|
+
"timeout": False
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
except subprocess.TimeoutExpired:
|
|
409
|
+
return {
|
|
410
|
+
"stdout": "",
|
|
411
|
+
"stderr": f"Command execution timeout ({timeout}s)",
|
|
412
|
+
"exit_code": -1,
|
|
413
|
+
"error": f"Command execution timeout ({timeout}s)",
|
|
414
|
+
"timeout": True
|
|
415
|
+
}
|
|
416
|
+
except Exception as e:
|
|
417
|
+
return {
|
|
418
|
+
"stdout": "",
|
|
419
|
+
"stderr": str(e),
|
|
420
|
+
"exit_code": -1,
|
|
421
|
+
"error": f"Execution failed: {e}",
|
|
422
|
+
"timeout": False
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
def cleanup(self):
|
|
426
|
+
"""
|
|
427
|
+
Clean up Docker resources.
|
|
428
|
+
|
|
429
|
+
This is called automatically:
|
|
430
|
+
- When program exits (via atexit)
|
|
431
|
+
- When using context manager (via __exit__)
|
|
432
|
+
- Can also be called manually
|
|
433
|
+
|
|
434
|
+
Safe to call multiple times (idempotent).
|
|
435
|
+
Only cleans up container if we own it (not attached to shared container).
|
|
436
|
+
"""
|
|
437
|
+
# Prevent duplicate cleanup
|
|
438
|
+
if self._cleaned_up:
|
|
439
|
+
return
|
|
440
|
+
|
|
441
|
+
self._cleaned_up = True
|
|
442
|
+
|
|
443
|
+
# Stop and remove persistent container
|
|
444
|
+
if self._container is not None and self._owns_container:
|
|
445
|
+
try:
|
|
446
|
+
print(f"[DOCKER] Stopping container {self._container.short_id}...")
|
|
447
|
+
self._container.stop(timeout=5)
|
|
448
|
+
self._container.remove()
|
|
449
|
+
print(f"[DOCKER] Container removed")
|
|
450
|
+
except Exception as e:
|
|
451
|
+
print(f"[DOCKER] Warning: Failed to remove container: {e}")
|
|
452
|
+
self._container = None
|
|
453
|
+
|
|
454
|
+
# Clean up workspace directory
|
|
455
|
+
if self._workspace_dir and os.path.exists(self._workspace_dir) and self._owns_container:
|
|
456
|
+
try:
|
|
457
|
+
shutil.rmtree(self._workspace_dir)
|
|
458
|
+
print(f"[DOCKER] Cleaned up workspace: {self._workspace_dir}")
|
|
459
|
+
except Exception as e:
|
|
460
|
+
print(f"[DOCKER] Warning: Failed to clean up workspace: {e}")
|
|
461
|
+
self._workspace_dir = None
|
|
462
|
+
|
|
463
|
+
# Close Docker client
|
|
464
|
+
if self._docker_client:
|
|
465
|
+
try:
|
|
466
|
+
self._docker_client.close()
|
|
467
|
+
except Exception as e:
|
|
468
|
+
print(f"[DOCKER] Warning: Failed to close Docker client: {e}")
|
|
469
|
+
self._docker_client = None
|
|
470
|
+
|
|
471
|
+
def __del__(self):
|
|
472
|
+
"""Destructor - ensure cleanup."""
|
|
473
|
+
# Note: atexit is more reliable than __del__
|
|
474
|
+
# This is just a backup
|
|
475
|
+
if not self._cleaned_up:
|
|
476
|
+
self.cleanup()
|