decodingtrust-agent-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent/__init__.py +30 -0
- agent/claudesdk/__init__.py +8 -0
- agent/claudesdk/example.py +221 -0
- agent/claudesdk/src/__init__.py +8 -0
- agent/claudesdk/src/agent.py +400 -0
- agent/claudesdk/src/mcp_proxy.py +409 -0
- agent/claudesdk/src/utils.py +420 -0
- agent/googleadk/__init__.py +15 -0
- agent/googleadk/example.py +237 -0
- agent/googleadk/src/__init__.py +12 -0
- agent/googleadk/src/agent.py +401 -0
- agent/googleadk/src/mcp_wrapper.py +163 -0
- agent/googleadk/src/utils.py +602 -0
- agent/langchain/__init__.py +8 -0
- agent/langchain/example.py +213 -0
- agent/langchain/src/__init__.py +8 -0
- agent/langchain/src/agent.py +645 -0
- agent/langchain/src/utils.py +433 -0
- agent/openaisdk/__init__.py +17 -0
- agent/openaisdk/example.py +228 -0
- agent/openaisdk/src/__init__.py +12 -0
- agent/openaisdk/src/agent.py +491 -0
- agent/openaisdk/src/agent_wrapper.py +143 -0
- agent/openaisdk/src/mcp_wrapper.py +395 -0
- agent/openaisdk/src/utils.py +493 -0
- agent/openclaw/__init__.py +10 -0
- agent/openclaw/example.py +251 -0
- agent/openclaw/src/__init__.py +14 -0
- agent/openclaw/src/agent.py +930 -0
- agent/openclaw/src/helpers/__init__.py +1 -0
- agent/openclaw/src/helpers/auth_helpers.py +55 -0
- agent/openclaw/src/mcp_proxy.py +564 -0
- agent/openclaw/src/plugin_generator.py +231 -0
- agent/openclaw/src/utils.py +341 -0
- agent/pocketflow/__init__.py +18 -0
- agent/pocketflow/example.py +221 -0
- agent/pocketflow/prompts/react_agent.py +46 -0
- agent/pocketflow/src/__init__.py +6 -0
- agent/pocketflow/src/agent.py +507 -0
- agent/pocketflow/src/agent_wrapper.py +159 -0
- agent/pocketflow/src/async_helper.py +92 -0
- agent/pocketflow/src/mcp_react_agent.py +279 -0
- agent/pocketflow/src/native_agent.py +74 -0
- agent/pocketflow/src/nodes.py +467 -0
- benchmark/__init__.py +0 -0
- benchmark/browser/benign.jsonl +34 -0
- benchmark/browser/direct.jsonl +85 -0
- benchmark/browser/indirect.jsonl +82 -0
- benchmark/code/benign.jsonl +0 -0
- benchmark/code/direct.jsonl +121 -0
- benchmark/code/indirect.jsonl +165 -0
- benchmark/crm/benign.jsonl +165 -0
- benchmark/crm/direct.jsonl +90 -0
- benchmark/crm/indirect.jsonl +150 -0
- benchmark/customer-service/benign.jsonl +160 -0
- benchmark/customer-service/direct.jsonl +100 -0
- benchmark/customer-service/indirect.jsonl +101 -0
- benchmark/finance/benign.jsonl +0 -0
- benchmark/finance/direct.jsonl +200 -0
- benchmark/finance/indirect.jsonl +200 -0
- benchmark/legal/benign.jsonl +0 -0
- benchmark/legal/direct.jsonl +200 -0
- benchmark/legal/indirect.jsonl +200 -0
- benchmark/macos/benign.jsonl +30 -0
- benchmark/macos/direct.jsonl +50 -0
- benchmark/macos/indirect.jsonl +50 -0
- benchmark/medical/benign.jsonl +642 -0
- benchmark/medical/direct.jsonl +229 -0
- benchmark/medical/indirect.jsonl +222 -0
- benchmark/os-filesystem/benign.jsonl +200 -0
- benchmark/os-filesystem/direct.jsonl +200 -0
- benchmark/os-filesystem/indirect.jsonl +200 -0
- benchmark/research/benign.jsonl +0 -0
- benchmark/research/direct.jsonl +119 -0
- benchmark/research/indirect.jsonl +125 -0
- benchmark/telecom/benign.jsonl +120 -0
- benchmark/telecom/direct.jsonl +161 -0
- benchmark/telecom/indirect.jsonl +166 -0
- benchmark/travel/benign.jsonl +130 -0
- benchmark/travel/direct.jsonl +105 -0
- benchmark/travel/indirect.jsonl +120 -0
- benchmark/windows/benign.jsonl +100 -0
- benchmark/windows/direct.jsonl +140 -0
- benchmark/windows/indirect.jsonl +107 -0
- benchmark/workflow/benign.jsonl +335 -0
- benchmark/workflow/direct.jsonl +78 -0
- benchmark/workflow/indirect.jsonl +107 -0
- cli/__init__.py +5 -0
- cli/main.py +182 -0
- cli/scaffold.py +334 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
- dt_arena/config/env.yaml +515 -0
- dt_arena/config/injection_mcp.yaml +430 -0
- dt_arena/config/mcp.yaml +642 -0
- dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
- dt_arena/envs/arxiv/docker-compose.yml +36 -0
- dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
- dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
- dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
- dt_arena/envs/atlassian/docker-compose.yml +72 -0
- dt_arena/envs/bigquery/docker-compose.yml +20 -0
- dt_arena/envs/booking/docker-compose.yml +59 -0
- dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
- dt_arena/envs/calendar/docker-compose.yml +42 -0
- dt_arena/envs/custom-website/docker-compose.yml +6 -0
- dt_arena/envs/customer_service/docker-compose.yml +59 -0
- dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
- dt_arena/envs/databricks/docker-compose.yml +51 -0
- dt_arena/envs/ecommerce/docker-compose.yml +6 -0
- dt_arena/envs/ers/docker-compose.yml +36 -0
- dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
- dt_arena/envs/finance/docker-compose.yml +23 -0
- dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
- dt_arena/envs/github/docker/docker-compose.yml +50 -0
- dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
- dt_arena/envs/gmail/docker-compose.yml +65 -0
- dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
- dt_arena/envs/google-form/docker-compose.yml +41 -0
- dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
- dt_arena/envs/googledocs/docker-compose.yml +78 -0
- dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
- dt_arena/envs/hospital/docker-compose.yml +27 -0
- dt_arena/envs/legal/docker-compose.yml +22 -0
- dt_arena/envs/linkedin/docker-compose.yml +63 -0
- dt_arena/envs/macos/docker-compose.yml +79 -0
- dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
- dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
- dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
- dt_arena/envs/paypal/docker-compose.yml +63 -0
- dt_arena/envs/research/docker-compose-hub.yml +13 -0
- dt_arena/envs/research/docker-compose.yml +24 -0
- dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
- dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
- dt_arena/envs/slack/docker-compose-hub.yml +28 -0
- dt_arena/envs/slack/docker-compose.yml +41 -0
- dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
- dt_arena/envs/snowflake/docker-compose.yml +44 -0
- dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
- dt_arena/envs/telecom/docker-compose.yml +17 -0
- dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
- dt_arena/envs/telegram/docker-compose.yml +62 -0
- dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
- dt_arena/envs/terminal/docker-compose.yml +26 -0
- dt_arena/envs/travel/docker-compose-hub.yml +19 -0
- dt_arena/envs/travel/docker-compose.yml +19 -0
- dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
- dt_arena/envs/whatsapp/docker-compose.yml +78 -0
- dt_arena/envs/windows/docker-compose.yml +71 -0
- dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
- dt_arena/envs/zoom/docker-compose.yml +40 -0
- dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
- dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
- dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
- dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
- dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
- dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
- dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
- dt_arena/injection_mcp_server/github/env_injection.py +206 -0
- dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
- dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
- dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
- dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
- dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
- dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
- dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
- dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
- dt_arena/injection_mcp_server/research/env_injection.py +616 -0
- dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
- dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
- dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
- dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
- dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
- dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
- dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
- dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
- dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
- dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
- dt_arena/mcp_server/atlassian/main.py +1554 -0
- dt_arena/mcp_server/atlassian/test_server.py +66 -0
- dt_arena/mcp_server/bigquery/main.py +333 -0
- dt_arena/mcp_server/booking/main.py +310 -0
- dt_arena/mcp_server/browser/main.py +1741 -0
- dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
- dt_arena/mcp_server/calendar/main.py +792 -0
- dt_arena/mcp_server/calendar/test_mcp.py +135 -0
- dt_arena/mcp_server/customer_service/main.py +1063 -0
- dt_arena/mcp_server/databricks/main.py +566 -0
- dt_arena/mcp_server/databricks/probe.py +102 -0
- dt_arena/mcp_server/ers/main.py +845 -0
- dt_arena/mcp_server/finance/__init__.py +87 -0
- dt_arena/mcp_server/finance/core/__init__.py +12 -0
- dt_arena/mcp_server/finance/core/data_loader.py +558 -0
- dt_arena/mcp_server/finance/core/portfolio.py +565 -0
- dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
- dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
- dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
- dt_arena/mcp_server/finance/injection/__init__.py +66 -0
- dt_arena/mcp_server/finance/injection/config.py +176 -0
- dt_arena/mcp_server/finance/injection/content.py +755 -0
- dt_arena/mcp_server/finance/injection/html.py +409 -0
- dt_arena/mcp_server/finance/injection/locations.py +167 -0
- dt_arena/mcp_server/finance/injection/methods.py +193 -0
- dt_arena/mcp_server/finance/injection/presets.py +1023 -0
- dt_arena/mcp_server/finance/main.py +361 -0
- dt_arena/mcp_server/finance/run_mcp.py +21 -0
- dt_arena/mcp_server/finance/run_web.py +26 -0
- dt_arena/mcp_server/finance/server/__init__.py +41 -0
- dt_arena/mcp_server/finance/server/extractor.py +1453 -0
- dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
- dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
- dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
- dt_arena/mcp_server/finance/server/mcp.py +451 -0
- dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
- dt_arena/mcp_server/finance/server/tools/account.py +88 -0
- dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
- dt_arena/mcp_server/finance/server/tools/social.py +73 -0
- dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
- dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
- dt_arena/mcp_server/finance/server/web.py +2139 -0
- dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
- dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
- dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
- dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
- dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
- dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
- dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
- dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
- dt_arena/mcp_server/github/main.py +441 -0
- dt_arena/mcp_server/gmail/main.py +1004 -0
- dt_arena/mcp_server/google_form/main.py +141 -0
- dt_arena/mcp_server/googledocs/main.py +458 -0
- dt_arena/mcp_server/hospital/mcp_server.py +458 -0
- dt_arena/mcp_server/legal/__init__.py +9 -0
- dt_arena/mcp_server/legal/core/__init__.py +14 -0
- dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
- dt_arena/mcp_server/legal/core/data_loader.py +266 -0
- dt_arena/mcp_server/legal/core/document_store.py +197 -0
- dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
- dt_arena/mcp_server/legal/main.py +89 -0
- dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
- dt_arena/mcp_server/legal/server/__init__.py +14 -0
- dt_arena/mcp_server/legal/server/mcp.py +2330 -0
- dt_arena/mcp_server/macos/client_test.py +270 -0
- dt_arena/mcp_server/macos/mcp_server.py +285 -0
- dt_arena/mcp_server/os-filesystem/main.py +1380 -0
- dt_arena/mcp_server/paypal/main.py +501 -0
- dt_arena/mcp_server/research/main.py +777 -0
- dt_arena/mcp_server/salesforce/main.py +2006 -0
- dt_arena/mcp_server/slack/main.py +318 -0
- dt_arena/mcp_server/snowflake/main.py +612 -0
- dt_arena/mcp_server/snowflake/probe.py +183 -0
- dt_arena/mcp_server/telecom/mcp_client.py +423 -0
- dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
- dt_arena/mcp_server/telegram/main.py +338 -0
- dt_arena/mcp_server/terminal/main.py +163 -0
- dt_arena/mcp_server/travel/client_test.py +16 -0
- dt_arena/mcp_server/travel/mcp_server.py +404 -0
- dt_arena/mcp_server/whatsapp/main.py +318 -0
- dt_arena/mcp_server/windows/client_test.py +270 -0
- dt_arena/mcp_server/windows/mcp_server.py +218 -0
- dt_arena/mcp_server/zoom/main.py +466 -0
- dt_arena/src/__init__.py +0 -0
- dt_arena/src/hooks/__init__.py +0 -0
- dt_arena/src/hooks/audit_log.py +30 -0
- dt_arena/src/hooks/hooks.json +3 -0
- dt_arena/src/run_benign.py +142 -0
- dt_arena/src/types/__init__.py +0 -0
- dt_arena/src/types/agent.py +441 -0
- dt_arena/src/types/attacks.py +2 -0
- dt_arena/src/types/environment.py +2 -0
- dt_arena/src/types/hooks.py +174 -0
- dt_arena/src/types/judge.py +52 -0
- dt_arena/src/types/red_teaming_trajectory.py +385 -0
- dt_arena/src/types/task.py +260 -0
- dt_arena/src/types/trajectory.py +315 -0
- dt_arena/utils/__init__.py +1 -0
- dt_arena/utils/atlassian/__init__.py +27 -0
- dt_arena/utils/atlassian/helpers.py +520 -0
- dt_arena/utils/bigquery/__init__.py +1 -0
- dt_arena/utils/bigquery/helpers.py +246 -0
- dt_arena/utils/calendar/__init__.py +1 -0
- dt_arena/utils/calendar/helpers.py +87 -0
- dt_arena/utils/customer_service/__init__.py +17 -0
- dt_arena/utils/customer_service/cs_env_client.py +940 -0
- dt_arena/utils/customer_service/helpers.py +339 -0
- dt_arena/utils/customer_service/judges/__init__.py +20 -0
- dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
- dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
- dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
- dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
- dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
- dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
- dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
- dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
- dt_arena/utils/customer_service/judges/text_utils.py +21 -0
- dt_arena/utils/databricks/__init__.py +2 -0
- dt_arena/utils/databricks/helpers.py +210 -0
- dt_arena/utils/finance/__init__.py +0 -0
- dt_arena/utils/finance/helpers.py +263 -0
- dt_arena/utils/github/__init__.py +1 -0
- dt_arena/utils/github/helpers.py +249 -0
- dt_arena/utils/gmail/__init__.py +1 -0
- dt_arena/utils/gmail/helpers.py +344 -0
- dt_arena/utils/google_form/__init__.py +2 -0
- dt_arena/utils/google_form/helpers.py +133 -0
- dt_arena/utils/legal/__init__.py +0 -0
- dt_arena/utils/legal/helpers.py +228 -0
- dt_arena/utils/macos/__init__.py +0 -0
- dt_arena/utils/macos/env_setup.py +215 -0
- dt_arena/utils/macos/helpers.py +61 -0
- dt_arena/utils/os_filesystem/__init__.py +1 -0
- dt_arena/utils/os_filesystem/helpers.py +366 -0
- dt_arena/utils/paypal/__init__.py +1 -0
- dt_arena/utils/paypal/helpers.py +178 -0
- dt_arena/utils/port_allocator.py +266 -0
- dt_arena/utils/research/__init__.py +0 -0
- dt_arena/utils/research/helpers.py +251 -0
- dt_arena/utils/salesforce/__init__.py +1 -0
- dt_arena/utils/salesforce/helpers.py +719 -0
- dt_arena/utils/slack/__init__.py +1 -0
- dt_arena/utils/slack/helpers.py +176 -0
- dt_arena/utils/snowflake/__init__.py +1 -0
- dt_arena/utils/snowflake/helpers.py +166 -0
- dt_arena/utils/telecom/__init__.py +1 -0
- dt_arena/utils/telecom/helpers.py +760 -0
- dt_arena/utils/telegram/__init__.py +0 -0
- dt_arena/utils/telegram/helpers.py +174 -0
- dt_arena/utils/terminal/__init__.py +0 -0
- dt_arena/utils/terminal/helpers.py +20 -0
- dt_arena/utils/travel/__init__.py +0 -0
- dt_arena/utils/travel/env_client.py +537 -0
- dt_arena/utils/travel/llm_judge.py +137 -0
- dt_arena/utils/travel/prompts.py +64 -0
- dt_arena/utils/utils/__init__.py +122 -0
- dt_arena/utils/whatsapp/__init__.py +0 -0
- dt_arena/utils/whatsapp/helpers.py +226 -0
- dt_arena/utils/windows/__init__.py +0 -0
- dt_arena/utils/windows/env_reset.py +224 -0
- dt_arena/utils/windows/env_setup.py +280 -0
- dt_arena/utils/windows/exfil_helpers.py +170 -0
- dt_arena/utils/windows/helpers.py +74 -0
- dt_arena/utils/zoom/__init__.py +1 -0
- dt_arena/utils/zoom/helpers.py +70 -0
- eval/__init__.py +1 -0
- eval/evaluation.py +426 -0
- eval/task_runner.py +449 -0
- utils/__init__.py +148 -0
- utils/agent_helpers.py +308 -0
- utils/agent_wrapper.py +189 -0
- utils/compose_utils.py +135 -0
- utils/config.py +77 -0
- utils/env_helpers.py +104 -0
- utils/eval_stats.py +88 -0
- utils/injection_helpers.py +429 -0
- utils/injection_mcp_helpers.py +152 -0
- utils/judge_helpers.py +181 -0
- utils/judge_utils.py +472 -0
- utils/llm.py +196 -0
- utils/logging.py +45 -0
- utils/mcp_helpers.py +232 -0
- utils/mcp_manager.py +235 -0
- utils/memory_guard.py +18 -0
- utils/red_teaming_sandbox.py +476 -0
- utils/reset_helpers.py +318 -0
- utils/resource_manager.py +370 -0
- utils/skill_helpers.py +447 -0
- utils/task_executor.py +904 -0
- utils/task_helpers.py +270 -0
- utils/template_helpers.py +179 -0
utils/agent_helpers.py
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
from typing import Any, Dict, List, Optional, Type
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
OS_FILESYSTEM_CLAUDE_SDK_DISALLOWED_TOOLS = [
|
|
5
|
+
"Bash",
|
|
6
|
+
"Read",
|
|
7
|
+
"Write",
|
|
8
|
+
"Edit",
|
|
9
|
+
"MultiEdit",
|
|
10
|
+
"Glob",
|
|
11
|
+
"Grep",
|
|
12
|
+
"NotebookEdit",
|
|
13
|
+
"AskUserQuestion",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
OS_FILESYSTEM_OPENCLAW_DISALLOWED_TOOLS = [
|
|
18
|
+
"group:fs",
|
|
19
|
+
"group:runtime",
|
|
20
|
+
"group:web",
|
|
21
|
+
"group:memory",
|
|
22
|
+
"group:ui",
|
|
23
|
+
"group:sessions",
|
|
24
|
+
"group:automation",
|
|
25
|
+
"group:messaging",
|
|
26
|
+
"agents_list",
|
|
27
|
+
"image",
|
|
28
|
+
"nodes",
|
|
29
|
+
"tts",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_default_disallowed_tools(agent_type: Optional[str], domain: Optional[str]) -> Optional[List[str]]:
|
|
34
|
+
"""Return domain-specific native tools to disable when none are provided explicitly."""
|
|
35
|
+
if (domain or "").lower() != "os-filesystem":
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
agent = (agent_type or "").lower()
|
|
39
|
+
if agent == "claudesdk":
|
|
40
|
+
return list(OS_FILESYSTEM_CLAUDE_SDK_DISALLOWED_TOOLS)
|
|
41
|
+
if agent == "openclaw":
|
|
42
|
+
return list(OS_FILESYSTEM_OPENCLAW_DISALLOWED_TOOLS)
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
_AgentConfig = None
|
|
46
|
+
_RuntimeConfig = None
|
|
47
|
+
_TaskConfig = None
|
|
48
|
+
_AttackConfig = None
|
|
49
|
+
_OpenAISDKAgent = None
|
|
50
|
+
_LangChainAgent = None
|
|
51
|
+
_ClaudeSDKAgent = None
|
|
52
|
+
_GoogleADKAgent = None
|
|
53
|
+
_MCPReactAgent = None
|
|
54
|
+
_OpenClawAgent = None
|
|
55
|
+
|
|
56
|
+
# Native agent wrappers (lazy imports)
|
|
57
|
+
_OpenAISDKNativeWrapper = None
|
|
58
|
+
_MCPReactAgentNativeWrapper = None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _ensure_imports():
|
|
62
|
+
"""Lazy import of agent-related types."""
|
|
63
|
+
global _AgentConfig, _RuntimeConfig, _TaskConfig, _AttackConfig
|
|
64
|
+
global _OpenAISDKAgent, _LangChainAgent, _ClaudeSDKAgent, _GoogleADKAgent, _MCPReactAgent, _OpenClawAgent
|
|
65
|
+
global _OpenAISDKNativeWrapper, _MCPReactAgentNativeWrapper
|
|
66
|
+
|
|
67
|
+
if _AgentConfig is None:
|
|
68
|
+
from dt_arena.src.types.agent import AgentConfig, RuntimeConfig
|
|
69
|
+
from dt_arena.src.types.task import AttackConfig, TaskConfig
|
|
70
|
+
_AgentConfig = AgentConfig
|
|
71
|
+
_RuntimeConfig = RuntimeConfig
|
|
72
|
+
_TaskConfig = TaskConfig
|
|
73
|
+
_AttackConfig = AttackConfig
|
|
74
|
+
|
|
75
|
+
# Import OpenAI SDK agent
|
|
76
|
+
if _OpenAISDKAgent is None:
|
|
77
|
+
try:
|
|
78
|
+
from agent.openaisdk import OpenAISDKAgent
|
|
79
|
+
_OpenAISDKAgent = OpenAISDKAgent
|
|
80
|
+
except ImportError as e:
|
|
81
|
+
print(f"[WARNING] OpenAI SDK agent not available: {e}")
|
|
82
|
+
|
|
83
|
+
# Import LangChain agent
|
|
84
|
+
if _LangChainAgent is None:
|
|
85
|
+
try:
|
|
86
|
+
from agent.langchain import LangChainAgent
|
|
87
|
+
_LangChainAgent = LangChainAgent
|
|
88
|
+
except ImportError as e:
|
|
89
|
+
print(f"[WARNING] LangChain agent not available: {e}")
|
|
90
|
+
|
|
91
|
+
# Import Claude SDK agent
|
|
92
|
+
if _ClaudeSDKAgent is None:
|
|
93
|
+
try:
|
|
94
|
+
from agent.claudesdk import ClaudeSDKAgent
|
|
95
|
+
_ClaudeSDKAgent = ClaudeSDKAgent
|
|
96
|
+
except ImportError as e:
|
|
97
|
+
print(f"[WARNING] Claude SDK agent not available: {e}")
|
|
98
|
+
|
|
99
|
+
# Import Google ADK agent
|
|
100
|
+
if _GoogleADKAgent is None:
|
|
101
|
+
try:
|
|
102
|
+
from agent.googleadk import GoogleADKAgent
|
|
103
|
+
_GoogleADKAgent = GoogleADKAgent
|
|
104
|
+
except ImportError as e:
|
|
105
|
+
print(f"[WARNING] Google ADK agent not available: {e}")
|
|
106
|
+
|
|
107
|
+
# Import PocketFlow agent
|
|
108
|
+
if _MCPReactAgent is None:
|
|
109
|
+
try:
|
|
110
|
+
from agent.pocketflow import MCPReactAgent
|
|
111
|
+
_MCPReactAgent = MCPReactAgent
|
|
112
|
+
except ImportError as e:
|
|
113
|
+
print(f"[WARNING] PocketFlow agent not available: {e}")
|
|
114
|
+
|
|
115
|
+
# Import OpenClaw agent
|
|
116
|
+
if _OpenClawAgent is None:
|
|
117
|
+
try:
|
|
118
|
+
from agent.openclaw import OpenClawAgent
|
|
119
|
+
_OpenClawAgent = OpenClawAgent
|
|
120
|
+
except ImportError as e:
|
|
121
|
+
print(f"[WARNING] OpenClaw agent not available: {e}")
|
|
122
|
+
|
|
123
|
+
# Import native agent wrappers
|
|
124
|
+
if _OpenAISDKNativeWrapper is None:
|
|
125
|
+
try:
|
|
126
|
+
from agent.openaisdk.src.agent_wrapper import OpenAISDKNativeWrapper
|
|
127
|
+
_OpenAISDKNativeWrapper = OpenAISDKNativeWrapper
|
|
128
|
+
except ImportError as e:
|
|
129
|
+
print(f"[WARNING] OpenAI SDK native wrapper not available: {e}")
|
|
130
|
+
|
|
131
|
+
if _MCPReactAgentNativeWrapper is None:
|
|
132
|
+
try:
|
|
133
|
+
from agent.pocketflow.src.agent_wrapper import MCPReactAgentNativeWrapper
|
|
134
|
+
_MCPReactAgentNativeWrapper = MCPReactAgentNativeWrapper
|
|
135
|
+
except ImportError as e:
|
|
136
|
+
print(f"[WARNING] PocketFlow native wrapper not available: {e}")
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _get_agent_registry() -> Dict[str, Type]:
|
|
140
|
+
"""Get the agent registry with lazy imports."""
|
|
141
|
+
_ensure_imports()
|
|
142
|
+
registry = {}
|
|
143
|
+
|
|
144
|
+
if _OpenAISDKAgent is not None:
|
|
145
|
+
registry["openaisdk"] = _OpenAISDKAgent
|
|
146
|
+
if _LangChainAgent is not None:
|
|
147
|
+
registry["langchain"] = _LangChainAgent
|
|
148
|
+
if _ClaudeSDKAgent is not None:
|
|
149
|
+
registry["claudesdk"] = _ClaudeSDKAgent
|
|
150
|
+
if _GoogleADKAgent is not None:
|
|
151
|
+
registry["googleadk"] = _GoogleADKAgent
|
|
152
|
+
if _MCPReactAgent is not None:
|
|
153
|
+
registry["pocketflow"] = _MCPReactAgent
|
|
154
|
+
if _OpenClawAgent is not None:
|
|
155
|
+
registry["openclaw"] = _OpenClawAgent
|
|
156
|
+
|
|
157
|
+
return registry
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# Agent registry - lazily populated
|
|
161
|
+
AGENT_REGISTRY: Dict[str, Type] = {}
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def build_agent(
|
|
165
|
+
agent_type: Optional[str] = None,
|
|
166
|
+
agent_cfg: Optional[Any] = None, # AgentConfig
|
|
167
|
+
runtime_cfg: Optional[Any] = None, # RuntimeConfig
|
|
168
|
+
*,
|
|
169
|
+
native_agent: Optional[Any] = None, # Pre-built native SDK agent
|
|
170
|
+
) -> Any:
|
|
171
|
+
"""Instantiate the requested agent implementation.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
agent_type: Type of agent to build (e.g., "openaisdk", "langchain").
|
|
175
|
+
Required if native_agent is not provided.
|
|
176
|
+
agent_cfg: AgentConfig instance. For native agents, this provides
|
|
177
|
+
additional benchmark MCP servers to add.
|
|
178
|
+
runtime_cfg: RuntimeConfig instance with model, temperature, max_turns,
|
|
179
|
+
output_dir, and mcp_injection settings.
|
|
180
|
+
native_agent: Pre-built native SDK agent to wrap. When provided,
|
|
181
|
+
the framework is auto-detected and the agent is wrapped for
|
|
182
|
+
evaluation.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
Initialized agent instance
|
|
186
|
+
|
|
187
|
+
Raises:
|
|
188
|
+
ValueError: If neither agent_type nor native_agent is provided,
|
|
189
|
+
or if the native agent framework cannot be detected.
|
|
190
|
+
"""
|
|
191
|
+
_ensure_imports()
|
|
192
|
+
|
|
193
|
+
# Use provided runtime_cfg or create default
|
|
194
|
+
if runtime_cfg is None:
|
|
195
|
+
runtime_cfg = _RuntimeConfig()
|
|
196
|
+
|
|
197
|
+
# Mode 2: Wrap a pre-built native agent
|
|
198
|
+
if native_agent is not None:
|
|
199
|
+
return _wrap_native_agent(
|
|
200
|
+
native_agent=native_agent,
|
|
201
|
+
agent_cfg=agent_cfg,
|
|
202
|
+
runtime_cfg=runtime_cfg,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# Mode 1: Build from scratch (existing behavior)
|
|
206
|
+
if agent_type is None:
|
|
207
|
+
raise ValueError(
|
|
208
|
+
"Either 'agent_type' or 'native_agent' must be provided. "
|
|
209
|
+
"Use agent_type to build from scratch, or native_agent to wrap "
|
|
210
|
+
"a pre-built agent."
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
registry = _get_agent_registry()
|
|
214
|
+
agent_type = agent_type.lower()
|
|
215
|
+
|
|
216
|
+
if agent_type not in registry:
|
|
217
|
+
raise ValueError(f"Unknown agent type '{agent_type}'. Available: {list(registry.keys())}")
|
|
218
|
+
|
|
219
|
+
AgentCls = registry[agent_type]
|
|
220
|
+
|
|
221
|
+
return AgentCls(
|
|
222
|
+
agent_config=agent_cfg,
|
|
223
|
+
runtime_config=runtime_cfg,
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _wrap_native_agent(
|
|
228
|
+
native_agent: Any,
|
|
229
|
+
agent_cfg: Optional[Any],
|
|
230
|
+
runtime_cfg: Any,
|
|
231
|
+
) -> Any:
|
|
232
|
+
"""
|
|
233
|
+
Wrap a pre-built native SDK agent for evaluation.
|
|
234
|
+
|
|
235
|
+
Auto-detects the agent framework and returns the appropriate wrapper.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
native_agent: Pre-built agent from a native SDK
|
|
239
|
+
agent_cfg: AgentConfig with benchmark MCP servers to add
|
|
240
|
+
runtime_cfg: RuntimeConfig with model settings, output paths, injections
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Wrapped agent instance
|
|
244
|
+
|
|
245
|
+
Raises:
|
|
246
|
+
ValueError: If framework cannot be detected or no wrapper is available
|
|
247
|
+
"""
|
|
248
|
+
from utils.agent_wrapper import detect_native_framework
|
|
249
|
+
|
|
250
|
+
# Detect framework
|
|
251
|
+
framework = detect_native_framework(native_agent)
|
|
252
|
+
|
|
253
|
+
if framework is None:
|
|
254
|
+
raise ValueError(
|
|
255
|
+
f"Could not detect framework for agent of type {type(native_agent).__name__}. "
|
|
256
|
+
f"Supported native agent types: OpenAI SDK Agent, LangChain Agent, "
|
|
257
|
+
f"Claude SDK Client, Google ADK Agent, PocketFlow NativeMCPReactAgent."
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
# Get the appropriate wrapper
|
|
261
|
+
if framework == "openaisdk":
|
|
262
|
+
if _OpenAISDKNativeWrapper is None:
|
|
263
|
+
raise ValueError(
|
|
264
|
+
"OpenAI SDK native wrapper is not available. "
|
|
265
|
+
"Please ensure 'agents' package is installed."
|
|
266
|
+
)
|
|
267
|
+
return _OpenAISDKNativeWrapper(
|
|
268
|
+
native_agent=native_agent,
|
|
269
|
+
agent_config=agent_cfg,
|
|
270
|
+
runtime_config=runtime_cfg,
|
|
271
|
+
)
|
|
272
|
+
elif framework == "langchain":
|
|
273
|
+
# TODO: Implement LangChainNativeWrapper
|
|
274
|
+
raise NotImplementedError(
|
|
275
|
+
f"Native agent wrapper for '{framework}' is not yet implemented. "
|
|
276
|
+
f"Currently supported: openaisdk, pocketflow"
|
|
277
|
+
)
|
|
278
|
+
elif framework == "claudesdk":
|
|
279
|
+
# TODO: Implement ClaudeSDKNativeWrapper
|
|
280
|
+
raise NotImplementedError(
|
|
281
|
+
f"Native agent wrapper for '{framework}' is not yet implemented. "
|
|
282
|
+
f"Currently supported: openaisdk, pocketflow"
|
|
283
|
+
)
|
|
284
|
+
elif framework == "googleadk":
|
|
285
|
+
# TODO: Implement GoogleADKNativeWrapper
|
|
286
|
+
raise NotImplementedError(
|
|
287
|
+
f"Native agent wrapper for '{framework}' is not yet implemented. "
|
|
288
|
+
f"Currently supported: openaisdk, pocketflow"
|
|
289
|
+
)
|
|
290
|
+
elif framework == "pocketflow":
|
|
291
|
+
if _MCPReactAgentNativeWrapper is None:
|
|
292
|
+
raise ValueError(
|
|
293
|
+
"PocketFlow native wrapper is not available. "
|
|
294
|
+
"Please ensure 'pocketflow' package is installed."
|
|
295
|
+
)
|
|
296
|
+
return _MCPReactAgentNativeWrapper(
|
|
297
|
+
native_agent=native_agent,
|
|
298
|
+
agent_config=agent_cfg,
|
|
299
|
+
runtime_config=runtime_cfg,
|
|
300
|
+
)
|
|
301
|
+
else:
|
|
302
|
+
raise ValueError(f"No wrapper available for framework '{framework}'.")
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def get_agent_choices() -> list[str]:
|
|
306
|
+
"""Get sorted list of available agent types."""
|
|
307
|
+
_ensure_imports()
|
|
308
|
+
return sorted(_get_agent_registry().keys())
|
utils/agent_wrapper.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Native Agent Detection Utilities
|
|
3
|
+
|
|
4
|
+
This module provides helper functions for detecting the framework of
|
|
5
|
+
pre-built native agents. The actual wrapper classes are located in
|
|
6
|
+
their respective agent folders (e.g., agent/openaisdk/src/native_wrapper.py).
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
from utils.agent_wrapper import detect_native_framework
|
|
10
|
+
|
|
11
|
+
native_agent = ... # User's pre-built agent
|
|
12
|
+
framework = detect_native_framework(native_agent)
|
|
13
|
+
# Returns: "openaisdk", "langchain", "claudesdk", "googleadk", or None
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from typing import Any, Optional
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def is_openai_sdk_native_agent(agent: Any) -> bool:
|
|
20
|
+
"""
|
|
21
|
+
Detect if agent is a native OpenAI SDK Agent.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
agent: Object to check
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
True if this is an OpenAI Agents SDK agent
|
|
28
|
+
"""
|
|
29
|
+
# Check for OpenAI Agents SDK (from 'agents' package)
|
|
30
|
+
try:
|
|
31
|
+
from agents import Agent as OpenAIAgent
|
|
32
|
+
if isinstance(agent, OpenAIAgent):
|
|
33
|
+
return True
|
|
34
|
+
except ImportError:
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
# Duck-type check: OpenAI SDK agents have these specific attributes
|
|
38
|
+
return (
|
|
39
|
+
hasattr(agent, 'name') and
|
|
40
|
+
hasattr(agent, 'instructions') and
|
|
41
|
+
hasattr(agent, 'model') and
|
|
42
|
+
hasattr(agent, 'mcp_servers') and
|
|
43
|
+
hasattr(agent, 'model_settings') and
|
|
44
|
+
hasattr(agent, 'clone')
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def is_langchain_native_agent(agent: Any) -> bool:
|
|
49
|
+
"""
|
|
50
|
+
Detect if agent is a native LangChain agent/graph.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
agent: Object to check
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
True if this is a LangChain agent
|
|
57
|
+
"""
|
|
58
|
+
# Check for LangChain CompiledGraph (from create_agent)
|
|
59
|
+
try:
|
|
60
|
+
from langgraph.graph.state import CompiledGraph, CompiledStateGraph
|
|
61
|
+
if isinstance(agent, (CompiledGraph, CompiledStateGraph)):
|
|
62
|
+
return True
|
|
63
|
+
except ImportError:
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
# Check for legacy AgentExecutor
|
|
67
|
+
try:
|
|
68
|
+
from langchain.agents import AgentExecutor
|
|
69
|
+
if isinstance(agent, AgentExecutor):
|
|
70
|
+
return True
|
|
71
|
+
except ImportError:
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
# Duck-type check
|
|
75
|
+
return (
|
|
76
|
+
hasattr(agent, 'invoke') and
|
|
77
|
+
hasattr(agent, 'ainvoke') and
|
|
78
|
+
(hasattr(agent, 'get_graph') or hasattr(agent, 'nodes'))
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def is_claude_sdk_native_agent(agent: Any) -> bool:
|
|
83
|
+
"""
|
|
84
|
+
Detect if agent is a native Claude SDK agent.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
agent: Object to check
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
True if this is a Claude SDK agent
|
|
91
|
+
"""
|
|
92
|
+
try:
|
|
93
|
+
from claude_code_sdk import Client
|
|
94
|
+
if isinstance(agent, Client):
|
|
95
|
+
return True
|
|
96
|
+
except ImportError:
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
# Duck-type check
|
|
100
|
+
return (
|
|
101
|
+
hasattr(agent, 'query') and
|
|
102
|
+
hasattr(agent, 'receive_response') and
|
|
103
|
+
hasattr(agent, '_model')
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def is_google_adk_native_agent(agent: Any) -> bool:
|
|
108
|
+
"""
|
|
109
|
+
Detect if agent is a native Google ADK agent.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
agent: Object to check
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
True if this is a Google ADK agent
|
|
116
|
+
"""
|
|
117
|
+
try:
|
|
118
|
+
from google.adk.agents import Agent as GoogleAgent, LlmAgent
|
|
119
|
+
if isinstance(agent, (GoogleAgent, LlmAgent)):
|
|
120
|
+
return True
|
|
121
|
+
except ImportError:
|
|
122
|
+
pass
|
|
123
|
+
|
|
124
|
+
# Duck-type check
|
|
125
|
+
return (
|
|
126
|
+
hasattr(agent, 'model') and
|
|
127
|
+
hasattr(agent, 'instruction') and
|
|
128
|
+
hasattr(agent, 'tools') and
|
|
129
|
+
hasattr(type(agent), '__module__') and
|
|
130
|
+
type(agent).__module__.startswith('google.adk')
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def is_pocketflow_native_agent(agent: Any) -> bool:
|
|
135
|
+
"""
|
|
136
|
+
Detect if agent is a native PocketFlow agent (NativeMCPReactAgent).
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
agent: Object to check
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
True if this is a PocketFlow NativeMCPReactAgent
|
|
143
|
+
"""
|
|
144
|
+
# Type check for NativeMCPReactAgent
|
|
145
|
+
try:
|
|
146
|
+
from agent.pocketflow import NativeMCPReactAgent
|
|
147
|
+
if isinstance(agent, NativeMCPReactAgent):
|
|
148
|
+
return True
|
|
149
|
+
except ImportError:
|
|
150
|
+
pass
|
|
151
|
+
|
|
152
|
+
# Duck-type check: NativeMCPReactAgent has these specific attributes
|
|
153
|
+
return (
|
|
154
|
+
hasattr(agent, 'mcp_servers') and
|
|
155
|
+
hasattr(agent, 'tool_to_server') and
|
|
156
|
+
hasattr(agent, 'all_tools') and
|
|
157
|
+
isinstance(getattr(agent, 'mcp_servers', None), dict) and
|
|
158
|
+
isinstance(getattr(agent, 'tool_to_server', None), dict) and
|
|
159
|
+
isinstance(getattr(agent, 'all_tools', None), list)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def detect_native_framework(agent: Any) -> Optional[str]:
|
|
164
|
+
"""
|
|
165
|
+
Detect which framework a native agent belongs to.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
agent: A pre-built native SDK agent instance
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
Framework name ("openaisdk", "langchain", "claudesdk", "googleadk", "pocketflow")
|
|
172
|
+
or None if not detected
|
|
173
|
+
"""
|
|
174
|
+
detectors = [
|
|
175
|
+
("openaisdk", is_openai_sdk_native_agent),
|
|
176
|
+
("langchain", is_langchain_native_agent),
|
|
177
|
+
("claudesdk", is_claude_sdk_native_agent),
|
|
178
|
+
("googleadk", is_google_adk_native_agent),
|
|
179
|
+
("pocketflow", is_pocketflow_native_agent),
|
|
180
|
+
]
|
|
181
|
+
|
|
182
|
+
for framework, detector in detectors:
|
|
183
|
+
try:
|
|
184
|
+
if detector(agent):
|
|
185
|
+
return framework
|
|
186
|
+
except Exception:
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
return None
|
utils/compose_utils.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import subprocess
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import List, Optional, Union
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_project_name(env_name: str) -> Optional[str]:
|
|
8
|
+
"""
|
|
9
|
+
Get the Docker compose project name for a given environment.
|
|
10
|
+
|
|
11
|
+
In pooled mode, the project name is passed via environment variable
|
|
12
|
+
like GMAIL_PROJECT_NAME, SLACK_PROJECT_NAME, etc.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
env_name: Environment name (gmail, slack, atlassian, etc.)
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Project name if found, None otherwise
|
|
19
|
+
"""
|
|
20
|
+
# Check for pool-provided project name (e.g., GMAIL_PROJECT_NAME)
|
|
21
|
+
env_var = f"{env_name.upper().replace('-', '_')}_PROJECT_NAME"
|
|
22
|
+
project_name = os.getenv(env_var, "")
|
|
23
|
+
if project_name:
|
|
24
|
+
return project_name
|
|
25
|
+
|
|
26
|
+
# Fallback to base project name
|
|
27
|
+
base_project = os.getenv("COMPOSE_PROJECT_NAME", "")
|
|
28
|
+
if base_project:
|
|
29
|
+
return f"{base_project}-{env_name}"
|
|
30
|
+
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_project_args(env_name: str) -> List[str]:
|
|
35
|
+
"""
|
|
36
|
+
Get docker compose project name arguments.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
env_name: Environment name
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
List like ["-p", "project_name"] or empty list
|
|
43
|
+
"""
|
|
44
|
+
project_name = get_project_name(env_name)
|
|
45
|
+
if project_name:
|
|
46
|
+
return ["-p", project_name]
|
|
47
|
+
return []
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def run_compose(
|
|
51
|
+
env_name: str,
|
|
52
|
+
compose_file: Union[str, Path],
|
|
53
|
+
args: List[str],
|
|
54
|
+
cwd: Optional[Union[str, Path]] = None,
|
|
55
|
+
check: bool = True,
|
|
56
|
+
verbose: bool = True,
|
|
57
|
+
) -> subprocess.CompletedProcess:
|
|
58
|
+
"""
|
|
59
|
+
Run docker compose command with correct project name.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
env_name: Environment name (gmail, slack, etc.)
|
|
63
|
+
compose_file: Path to docker-compose.yml
|
|
64
|
+
args: Additional arguments for docker compose
|
|
65
|
+
cwd: Working directory
|
|
66
|
+
check: Raise exception on non-zero exit
|
|
67
|
+
verbose: Print command before running
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
CompletedProcess instance
|
|
71
|
+
"""
|
|
72
|
+
project_args = get_project_args(env_name)
|
|
73
|
+
cmd = ["docker", "compose"] + project_args + ["-f", str(compose_file)] + args
|
|
74
|
+
|
|
75
|
+
if verbose:
|
|
76
|
+
print("$ " + " ".join(cmd))
|
|
77
|
+
|
|
78
|
+
return subprocess.run(cmd, cwd=cwd, check=check)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def run_compose_exec(
|
|
82
|
+
env_name: str,
|
|
83
|
+
compose_file: Union[str, Path],
|
|
84
|
+
service: str,
|
|
85
|
+
command: List[str],
|
|
86
|
+
cwd: Optional[Union[str, Path]] = None,
|
|
87
|
+
check: bool = True,
|
|
88
|
+
verbose: bool = True,
|
|
89
|
+
) -> subprocess.CompletedProcess:
|
|
90
|
+
"""
|
|
91
|
+
Run command inside a docker compose service.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
env_name: Environment name (gmail, slack, etc.)
|
|
95
|
+
compose_file: Path to docker-compose.yml
|
|
96
|
+
service: Service name to exec into
|
|
97
|
+
command: Command to run
|
|
98
|
+
cwd: Working directory
|
|
99
|
+
check: Raise exception on non-zero exit
|
|
100
|
+
verbose: Print command before running
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
CompletedProcess instance
|
|
104
|
+
"""
|
|
105
|
+
args = ["exec", "-T", service] + command
|
|
106
|
+
return run_compose(env_name, compose_file, args, cwd=cwd, check=check, verbose=verbose)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def run_compose_cp(
|
|
110
|
+
env_name: str,
|
|
111
|
+
compose_file: Union[str, Path],
|
|
112
|
+
src: str,
|
|
113
|
+
dst: str,
|
|
114
|
+
cwd: Optional[Union[str, Path]] = None,
|
|
115
|
+
check: bool = True,
|
|
116
|
+
verbose: bool = True,
|
|
117
|
+
) -> subprocess.CompletedProcess:
|
|
118
|
+
"""
|
|
119
|
+
Copy files to/from a docker compose service.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
env_name: Environment name (gmail, slack, etc.)
|
|
123
|
+
compose_file: Path to docker-compose.yml
|
|
124
|
+
src: Source path (local or service:path)
|
|
125
|
+
dst: Destination path (local or service:path)
|
|
126
|
+
cwd: Working directory
|
|
127
|
+
check: Raise exception on non-zero exit
|
|
128
|
+
verbose: Print command before running
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
CompletedProcess instance
|
|
132
|
+
"""
|
|
133
|
+
args = ["cp", src, dst]
|
|
134
|
+
return run_compose(env_name, compose_file, args, cwd=cwd, check=check, verbose=verbose)
|
|
135
|
+
|
utils/config.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import importlib.resources
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _find_project_root() -> Path:
|
|
8
|
+
# 1. Explicit override — set DTAP_ROOT when running from Docker / CI / installed wheel.
|
|
9
|
+
if env_root := os.environ.get("DTAP_ROOT"):
|
|
10
|
+
root = Path(env_root).resolve()
|
|
11
|
+
if str(root) not in sys.path:
|
|
12
|
+
sys.path.insert(0, str(root))
|
|
13
|
+
return root
|
|
14
|
+
|
|
15
|
+
# 2. Installed package: locate via the bundled dt_arena/config/env.yaml.
|
|
16
|
+
# importlib.resources works whether the package is pip-installed or editable.
|
|
17
|
+
try:
|
|
18
|
+
ref = importlib.resources.files("dt_arena") / "config" / "env.yaml"
|
|
19
|
+
with importlib.resources.as_file(ref) as p:
|
|
20
|
+
# p = <prefix>/dt_arena/config/env.yaml → parents[2] = <prefix>
|
|
21
|
+
root = p.parents[2]
|
|
22
|
+
if str(root) not in sys.path:
|
|
23
|
+
sys.path.insert(0, str(root))
|
|
24
|
+
return root
|
|
25
|
+
except Exception:
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
# 3. Source checkout: walk up until we find pyproject.toml.
|
|
29
|
+
for parent in Path(__file__).resolve().parents:
|
|
30
|
+
if (parent / "pyproject.toml").exists():
|
|
31
|
+
if str(parent) not in sys.path:
|
|
32
|
+
sys.path.insert(0, str(parent))
|
|
33
|
+
return parent
|
|
34
|
+
|
|
35
|
+
# 4. Last resort.
|
|
36
|
+
guess = Path(__file__).resolve().parents[1]
|
|
37
|
+
if str(guess) not in sys.path:
|
|
38
|
+
sys.path.insert(0, str(guess))
|
|
39
|
+
return guess
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
PROJECT_ROOT = _find_project_root()
|
|
43
|
+
|
|
44
|
+
# Configuration file paths (bundled as package-data in the wheel)
|
|
45
|
+
ENV_CONFIG_PATH = PROJECT_ROOT / "dt_arena" / "config" / "env.yaml"
|
|
46
|
+
MCP_CONFIG_PATH = PROJECT_ROOT / "dt_arena" / "config" / "mcp.yaml"
|
|
47
|
+
INJECTION_MCP_CONFIG_PATH = PROJECT_ROOT / "dt_arena" / "config" / "injection_mcp.yaml"
|
|
48
|
+
|
|
49
|
+
# Bundled benchmark task lists (benchmark/<domain>/{benign,direct,indirect}.jsonl)
|
|
50
|
+
BENCHMARK_ROOT = PROJECT_ROOT / "benchmark"
|
|
51
|
+
|
|
52
|
+
# Evaluation script path
|
|
53
|
+
TASK_RUNNER_PATH = PROJECT_ROOT / "eval" / "task_runner.py"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def resolve_benchmark_task_list(
|
|
57
|
+
domain: str | None = None,
|
|
58
|
+
task_type: str | None = None,
|
|
59
|
+
threat_model: str | None = None,
|
|
60
|
+
) -> Path:
|
|
61
|
+
"""Resolve the bundled benchmark JSONL/dir matching the given filters.
|
|
62
|
+
|
|
63
|
+
- No filters → BENCHMARK_ROOT (all 14 domains × 3 task lists)
|
|
64
|
+
- domain only → BENCHMARK_ROOT/<domain>/ (all 3 lists)
|
|
65
|
+
- +task_type=benign → BENCHMARK_ROOT/<domain>/benign.jsonl
|
|
66
|
+
- +threat_model → BENCHMARK_ROOT/<domain>/<direct|indirect>.jsonl
|
|
67
|
+
"""
|
|
68
|
+
if not domain:
|
|
69
|
+
return BENCHMARK_ROOT
|
|
70
|
+
|
|
71
|
+
domain_dir = BENCHMARK_ROOT / domain
|
|
72
|
+
if task_type == "benign":
|
|
73
|
+
return domain_dir / "benign.jsonl"
|
|
74
|
+
if threat_model in ("direct", "indirect"):
|
|
75
|
+
return domain_dir / f"{threat_model}.jsonl"
|
|
76
|
+
return domain_dir
|
|
77
|
+
|