decodingtrust-agent-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent/__init__.py +30 -0
- agent/claudesdk/__init__.py +8 -0
- agent/claudesdk/example.py +221 -0
- agent/claudesdk/src/__init__.py +8 -0
- agent/claudesdk/src/agent.py +400 -0
- agent/claudesdk/src/mcp_proxy.py +409 -0
- agent/claudesdk/src/utils.py +420 -0
- agent/googleadk/__init__.py +15 -0
- agent/googleadk/example.py +237 -0
- agent/googleadk/src/__init__.py +12 -0
- agent/googleadk/src/agent.py +401 -0
- agent/googleadk/src/mcp_wrapper.py +163 -0
- agent/googleadk/src/utils.py +602 -0
- agent/langchain/__init__.py +8 -0
- agent/langchain/example.py +213 -0
- agent/langchain/src/__init__.py +8 -0
- agent/langchain/src/agent.py +645 -0
- agent/langchain/src/utils.py +433 -0
- agent/openaisdk/__init__.py +17 -0
- agent/openaisdk/example.py +228 -0
- agent/openaisdk/src/__init__.py +12 -0
- agent/openaisdk/src/agent.py +491 -0
- agent/openaisdk/src/agent_wrapper.py +143 -0
- agent/openaisdk/src/mcp_wrapper.py +395 -0
- agent/openaisdk/src/utils.py +493 -0
- agent/openclaw/__init__.py +10 -0
- agent/openclaw/example.py +251 -0
- agent/openclaw/src/__init__.py +14 -0
- agent/openclaw/src/agent.py +930 -0
- agent/openclaw/src/helpers/__init__.py +1 -0
- agent/openclaw/src/helpers/auth_helpers.py +55 -0
- agent/openclaw/src/mcp_proxy.py +564 -0
- agent/openclaw/src/plugin_generator.py +231 -0
- agent/openclaw/src/utils.py +341 -0
- agent/pocketflow/__init__.py +18 -0
- agent/pocketflow/example.py +221 -0
- agent/pocketflow/prompts/react_agent.py +46 -0
- agent/pocketflow/src/__init__.py +6 -0
- agent/pocketflow/src/agent.py +507 -0
- agent/pocketflow/src/agent_wrapper.py +159 -0
- agent/pocketflow/src/async_helper.py +92 -0
- agent/pocketflow/src/mcp_react_agent.py +279 -0
- agent/pocketflow/src/native_agent.py +74 -0
- agent/pocketflow/src/nodes.py +467 -0
- benchmark/__init__.py +0 -0
- benchmark/browser/benign.jsonl +34 -0
- benchmark/browser/direct.jsonl +85 -0
- benchmark/browser/indirect.jsonl +82 -0
- benchmark/code/benign.jsonl +0 -0
- benchmark/code/direct.jsonl +121 -0
- benchmark/code/indirect.jsonl +165 -0
- benchmark/crm/benign.jsonl +165 -0
- benchmark/crm/direct.jsonl +90 -0
- benchmark/crm/indirect.jsonl +150 -0
- benchmark/customer-service/benign.jsonl +160 -0
- benchmark/customer-service/direct.jsonl +100 -0
- benchmark/customer-service/indirect.jsonl +101 -0
- benchmark/finance/benign.jsonl +0 -0
- benchmark/finance/direct.jsonl +200 -0
- benchmark/finance/indirect.jsonl +200 -0
- benchmark/legal/benign.jsonl +0 -0
- benchmark/legal/direct.jsonl +200 -0
- benchmark/legal/indirect.jsonl +200 -0
- benchmark/macos/benign.jsonl +30 -0
- benchmark/macos/direct.jsonl +50 -0
- benchmark/macos/indirect.jsonl +50 -0
- benchmark/medical/benign.jsonl +642 -0
- benchmark/medical/direct.jsonl +229 -0
- benchmark/medical/indirect.jsonl +222 -0
- benchmark/os-filesystem/benign.jsonl +200 -0
- benchmark/os-filesystem/direct.jsonl +200 -0
- benchmark/os-filesystem/indirect.jsonl +200 -0
- benchmark/research/benign.jsonl +0 -0
- benchmark/research/direct.jsonl +119 -0
- benchmark/research/indirect.jsonl +125 -0
- benchmark/telecom/benign.jsonl +120 -0
- benchmark/telecom/direct.jsonl +161 -0
- benchmark/telecom/indirect.jsonl +166 -0
- benchmark/travel/benign.jsonl +130 -0
- benchmark/travel/direct.jsonl +105 -0
- benchmark/travel/indirect.jsonl +120 -0
- benchmark/windows/benign.jsonl +100 -0
- benchmark/windows/direct.jsonl +140 -0
- benchmark/windows/indirect.jsonl +107 -0
- benchmark/workflow/benign.jsonl +335 -0
- benchmark/workflow/direct.jsonl +78 -0
- benchmark/workflow/indirect.jsonl +107 -0
- cli/__init__.py +5 -0
- cli/main.py +182 -0
- cli/scaffold.py +334 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
- dt_arena/config/env.yaml +515 -0
- dt_arena/config/injection_mcp.yaml +430 -0
- dt_arena/config/mcp.yaml +642 -0
- dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
- dt_arena/envs/arxiv/docker-compose.yml +36 -0
- dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
- dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
- dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
- dt_arena/envs/atlassian/docker-compose.yml +72 -0
- dt_arena/envs/bigquery/docker-compose.yml +20 -0
- dt_arena/envs/booking/docker-compose.yml +59 -0
- dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
- dt_arena/envs/calendar/docker-compose.yml +42 -0
- dt_arena/envs/custom-website/docker-compose.yml +6 -0
- dt_arena/envs/customer_service/docker-compose.yml +59 -0
- dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
- dt_arena/envs/databricks/docker-compose.yml +51 -0
- dt_arena/envs/ecommerce/docker-compose.yml +6 -0
- dt_arena/envs/ers/docker-compose.yml +36 -0
- dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
- dt_arena/envs/finance/docker-compose.yml +23 -0
- dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
- dt_arena/envs/github/docker/docker-compose.yml +50 -0
- dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
- dt_arena/envs/gmail/docker-compose.yml +65 -0
- dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
- dt_arena/envs/google-form/docker-compose.yml +41 -0
- dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
- dt_arena/envs/googledocs/docker-compose.yml +78 -0
- dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
- dt_arena/envs/hospital/docker-compose.yml +27 -0
- dt_arena/envs/legal/docker-compose.yml +22 -0
- dt_arena/envs/linkedin/docker-compose.yml +63 -0
- dt_arena/envs/macos/docker-compose.yml +79 -0
- dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
- dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
- dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
- dt_arena/envs/paypal/docker-compose.yml +63 -0
- dt_arena/envs/research/docker-compose-hub.yml +13 -0
- dt_arena/envs/research/docker-compose.yml +24 -0
- dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
- dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
- dt_arena/envs/slack/docker-compose-hub.yml +28 -0
- dt_arena/envs/slack/docker-compose.yml +41 -0
- dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
- dt_arena/envs/snowflake/docker-compose.yml +44 -0
- dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
- dt_arena/envs/telecom/docker-compose.yml +17 -0
- dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
- dt_arena/envs/telegram/docker-compose.yml +62 -0
- dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
- dt_arena/envs/terminal/docker-compose.yml +26 -0
- dt_arena/envs/travel/docker-compose-hub.yml +19 -0
- dt_arena/envs/travel/docker-compose.yml +19 -0
- dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
- dt_arena/envs/whatsapp/docker-compose.yml +78 -0
- dt_arena/envs/windows/docker-compose.yml +71 -0
- dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
- dt_arena/envs/zoom/docker-compose.yml +40 -0
- dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
- dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
- dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
- dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
- dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
- dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
- dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
- dt_arena/injection_mcp_server/github/env_injection.py +206 -0
- dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
- dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
- dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
- dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
- dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
- dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
- dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
- dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
- dt_arena/injection_mcp_server/research/env_injection.py +616 -0
- dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
- dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
- dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
- dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
- dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
- dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
- dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
- dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
- dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
- dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
- dt_arena/mcp_server/atlassian/main.py +1554 -0
- dt_arena/mcp_server/atlassian/test_server.py +66 -0
- dt_arena/mcp_server/bigquery/main.py +333 -0
- dt_arena/mcp_server/booking/main.py +310 -0
- dt_arena/mcp_server/browser/main.py +1741 -0
- dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
- dt_arena/mcp_server/calendar/main.py +792 -0
- dt_arena/mcp_server/calendar/test_mcp.py +135 -0
- dt_arena/mcp_server/customer_service/main.py +1063 -0
- dt_arena/mcp_server/databricks/main.py +566 -0
- dt_arena/mcp_server/databricks/probe.py +102 -0
- dt_arena/mcp_server/ers/main.py +845 -0
- dt_arena/mcp_server/finance/__init__.py +87 -0
- dt_arena/mcp_server/finance/core/__init__.py +12 -0
- dt_arena/mcp_server/finance/core/data_loader.py +558 -0
- dt_arena/mcp_server/finance/core/portfolio.py +565 -0
- dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
- dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
- dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
- dt_arena/mcp_server/finance/injection/__init__.py +66 -0
- dt_arena/mcp_server/finance/injection/config.py +176 -0
- dt_arena/mcp_server/finance/injection/content.py +755 -0
- dt_arena/mcp_server/finance/injection/html.py +409 -0
- dt_arena/mcp_server/finance/injection/locations.py +167 -0
- dt_arena/mcp_server/finance/injection/methods.py +193 -0
- dt_arena/mcp_server/finance/injection/presets.py +1023 -0
- dt_arena/mcp_server/finance/main.py +361 -0
- dt_arena/mcp_server/finance/run_mcp.py +21 -0
- dt_arena/mcp_server/finance/run_web.py +26 -0
- dt_arena/mcp_server/finance/server/__init__.py +41 -0
- dt_arena/mcp_server/finance/server/extractor.py +1453 -0
- dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
- dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
- dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
- dt_arena/mcp_server/finance/server/mcp.py +451 -0
- dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
- dt_arena/mcp_server/finance/server/tools/account.py +88 -0
- dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
- dt_arena/mcp_server/finance/server/tools/social.py +73 -0
- dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
- dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
- dt_arena/mcp_server/finance/server/web.py +2139 -0
- dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
- dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
- dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
- dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
- dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
- dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
- dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
- dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
- dt_arena/mcp_server/github/main.py +441 -0
- dt_arena/mcp_server/gmail/main.py +1004 -0
- dt_arena/mcp_server/google_form/main.py +141 -0
- dt_arena/mcp_server/googledocs/main.py +458 -0
- dt_arena/mcp_server/hospital/mcp_server.py +458 -0
- dt_arena/mcp_server/legal/__init__.py +9 -0
- dt_arena/mcp_server/legal/core/__init__.py +14 -0
- dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
- dt_arena/mcp_server/legal/core/data_loader.py +266 -0
- dt_arena/mcp_server/legal/core/document_store.py +197 -0
- dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
- dt_arena/mcp_server/legal/main.py +89 -0
- dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
- dt_arena/mcp_server/legal/server/__init__.py +14 -0
- dt_arena/mcp_server/legal/server/mcp.py +2330 -0
- dt_arena/mcp_server/macos/client_test.py +270 -0
- dt_arena/mcp_server/macos/mcp_server.py +285 -0
- dt_arena/mcp_server/os-filesystem/main.py +1380 -0
- dt_arena/mcp_server/paypal/main.py +501 -0
- dt_arena/mcp_server/research/main.py +777 -0
- dt_arena/mcp_server/salesforce/main.py +2006 -0
- dt_arena/mcp_server/slack/main.py +318 -0
- dt_arena/mcp_server/snowflake/main.py +612 -0
- dt_arena/mcp_server/snowflake/probe.py +183 -0
- dt_arena/mcp_server/telecom/mcp_client.py +423 -0
- dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
- dt_arena/mcp_server/telegram/main.py +338 -0
- dt_arena/mcp_server/terminal/main.py +163 -0
- dt_arena/mcp_server/travel/client_test.py +16 -0
- dt_arena/mcp_server/travel/mcp_server.py +404 -0
- dt_arena/mcp_server/whatsapp/main.py +318 -0
- dt_arena/mcp_server/windows/client_test.py +270 -0
- dt_arena/mcp_server/windows/mcp_server.py +218 -0
- dt_arena/mcp_server/zoom/main.py +466 -0
- dt_arena/src/__init__.py +0 -0
- dt_arena/src/hooks/__init__.py +0 -0
- dt_arena/src/hooks/audit_log.py +30 -0
- dt_arena/src/hooks/hooks.json +3 -0
- dt_arena/src/run_benign.py +142 -0
- dt_arena/src/types/__init__.py +0 -0
- dt_arena/src/types/agent.py +441 -0
- dt_arena/src/types/attacks.py +2 -0
- dt_arena/src/types/environment.py +2 -0
- dt_arena/src/types/hooks.py +174 -0
- dt_arena/src/types/judge.py +52 -0
- dt_arena/src/types/red_teaming_trajectory.py +385 -0
- dt_arena/src/types/task.py +260 -0
- dt_arena/src/types/trajectory.py +315 -0
- dt_arena/utils/__init__.py +1 -0
- dt_arena/utils/atlassian/__init__.py +27 -0
- dt_arena/utils/atlassian/helpers.py +520 -0
- dt_arena/utils/bigquery/__init__.py +1 -0
- dt_arena/utils/bigquery/helpers.py +246 -0
- dt_arena/utils/calendar/__init__.py +1 -0
- dt_arena/utils/calendar/helpers.py +87 -0
- dt_arena/utils/customer_service/__init__.py +17 -0
- dt_arena/utils/customer_service/cs_env_client.py +940 -0
- dt_arena/utils/customer_service/helpers.py +339 -0
- dt_arena/utils/customer_service/judges/__init__.py +20 -0
- dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
- dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
- dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
- dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
- dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
- dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
- dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
- dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
- dt_arena/utils/customer_service/judges/text_utils.py +21 -0
- dt_arena/utils/databricks/__init__.py +2 -0
- dt_arena/utils/databricks/helpers.py +210 -0
- dt_arena/utils/finance/__init__.py +0 -0
- dt_arena/utils/finance/helpers.py +263 -0
- dt_arena/utils/github/__init__.py +1 -0
- dt_arena/utils/github/helpers.py +249 -0
- dt_arena/utils/gmail/__init__.py +1 -0
- dt_arena/utils/gmail/helpers.py +344 -0
- dt_arena/utils/google_form/__init__.py +2 -0
- dt_arena/utils/google_form/helpers.py +133 -0
- dt_arena/utils/legal/__init__.py +0 -0
- dt_arena/utils/legal/helpers.py +228 -0
- dt_arena/utils/macos/__init__.py +0 -0
- dt_arena/utils/macos/env_setup.py +215 -0
- dt_arena/utils/macos/helpers.py +61 -0
- dt_arena/utils/os_filesystem/__init__.py +1 -0
- dt_arena/utils/os_filesystem/helpers.py +366 -0
- dt_arena/utils/paypal/__init__.py +1 -0
- dt_arena/utils/paypal/helpers.py +178 -0
- dt_arena/utils/port_allocator.py +266 -0
- dt_arena/utils/research/__init__.py +0 -0
- dt_arena/utils/research/helpers.py +251 -0
- dt_arena/utils/salesforce/__init__.py +1 -0
- dt_arena/utils/salesforce/helpers.py +719 -0
- dt_arena/utils/slack/__init__.py +1 -0
- dt_arena/utils/slack/helpers.py +176 -0
- dt_arena/utils/snowflake/__init__.py +1 -0
- dt_arena/utils/snowflake/helpers.py +166 -0
- dt_arena/utils/telecom/__init__.py +1 -0
- dt_arena/utils/telecom/helpers.py +760 -0
- dt_arena/utils/telegram/__init__.py +0 -0
- dt_arena/utils/telegram/helpers.py +174 -0
- dt_arena/utils/terminal/__init__.py +0 -0
- dt_arena/utils/terminal/helpers.py +20 -0
- dt_arena/utils/travel/__init__.py +0 -0
- dt_arena/utils/travel/env_client.py +537 -0
- dt_arena/utils/travel/llm_judge.py +137 -0
- dt_arena/utils/travel/prompts.py +64 -0
- dt_arena/utils/utils/__init__.py +122 -0
- dt_arena/utils/whatsapp/__init__.py +0 -0
- dt_arena/utils/whatsapp/helpers.py +226 -0
- dt_arena/utils/windows/__init__.py +0 -0
- dt_arena/utils/windows/env_reset.py +224 -0
- dt_arena/utils/windows/env_setup.py +280 -0
- dt_arena/utils/windows/exfil_helpers.py +170 -0
- dt_arena/utils/windows/helpers.py +74 -0
- dt_arena/utils/zoom/__init__.py +1 -0
- dt_arena/utils/zoom/helpers.py +70 -0
- eval/__init__.py +1 -0
- eval/evaluation.py +426 -0
- eval/task_runner.py +449 -0
- utils/__init__.py +148 -0
- utils/agent_helpers.py +308 -0
- utils/agent_wrapper.py +189 -0
- utils/compose_utils.py +135 -0
- utils/config.py +77 -0
- utils/env_helpers.py +104 -0
- utils/eval_stats.py +88 -0
- utils/injection_helpers.py +429 -0
- utils/injection_mcp_helpers.py +152 -0
- utils/judge_helpers.py +181 -0
- utils/judge_utils.py +472 -0
- utils/llm.py +196 -0
- utils/logging.py +45 -0
- utils/mcp_helpers.py +232 -0
- utils/mcp_manager.py +235 -0
- utils/memory_guard.py +18 -0
- utils/red_teaming_sandbox.py +476 -0
- utils/reset_helpers.py +318 -0
- utils/resource_manager.py +370 -0
- utils/skill_helpers.py +447 -0
- utils/task_executor.py +904 -0
- utils/task_helpers.py +270 -0
- utils/template_helpers.py +179 -0
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import asyncio
|
|
4
|
+
import argparse
|
|
5
|
+
|
|
6
|
+
from dt_arena.src.types.agent import AgentConfig, RuntimeConfig
|
|
7
|
+
from dt_arena.src.types.task import TaskConfig, AttackConfig
|
|
8
|
+
|
|
9
|
+
from agent.pocketflow import MCPReactAgent
|
|
10
|
+
from utils.injection_helpers import (
|
|
11
|
+
build_tool_injections_from_config,
|
|
12
|
+
apply_prompt_injections,
|
|
13
|
+
)
|
|
14
|
+
from utils.task_helpers import extract_dataset_path
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# ============================================================================
|
|
18
|
+
# Standalone Test Example for MCPReactAgent
|
|
19
|
+
# Supports tool injection and prompt injection from attack config
|
|
20
|
+
# ============================================================================
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async def main():
|
|
24
|
+
"""Example demonstrating agent with MCP server and trajectory generation"""
|
|
25
|
+
|
|
26
|
+
parser = argparse.ArgumentParser(
|
|
27
|
+
description="Run PocketFlow SDK Agent with configuration file",
|
|
28
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
29
|
+
epilog="""
|
|
30
|
+
Examples:
|
|
31
|
+
# Run with default settings
|
|
32
|
+
python example.py --config path/to/config.yaml
|
|
33
|
+
|
|
34
|
+
# Run with custom model and temperature
|
|
35
|
+
python example.py --config path/to/config.yaml --model gpt-4o-mini --temperature 0.5
|
|
36
|
+
|
|
37
|
+
# Run with custom max turns and output directory
|
|
38
|
+
python example.py --config path/to/config.yaml --max-turns 20 --output-dir ./custom_results
|
|
39
|
+
"""
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
parser.add_argument(
|
|
43
|
+
"--config",
|
|
44
|
+
type=str,
|
|
45
|
+
required=True,
|
|
46
|
+
help="Path to YAML configuration file (with Task, Agent, Attack sections)"
|
|
47
|
+
)
|
|
48
|
+
parser.add_argument(
|
|
49
|
+
"--model",
|
|
50
|
+
type=str,
|
|
51
|
+
default="gpt-4o",
|
|
52
|
+
help="Model to use (default: gpt-4o)"
|
|
53
|
+
)
|
|
54
|
+
parser.add_argument(
|
|
55
|
+
"--temperature",
|
|
56
|
+
type=float,
|
|
57
|
+
default=0.1,
|
|
58
|
+
help="Sampling temperature, 0.0-1.0 (default: 0.1)"
|
|
59
|
+
)
|
|
60
|
+
parser.add_argument(
|
|
61
|
+
"--max-turns",
|
|
62
|
+
type=int,
|
|
63
|
+
default=10,
|
|
64
|
+
help="Maximum conversation turns (default: 10)"
|
|
65
|
+
)
|
|
66
|
+
parser.add_argument(
|
|
67
|
+
"--output-dir",
|
|
68
|
+
type=str,
|
|
69
|
+
default=None,
|
|
70
|
+
help="Output directory for traces and trajectories (default: ./results)"
|
|
71
|
+
)
|
|
72
|
+
parser.add_argument(
|
|
73
|
+
"--debug",
|
|
74
|
+
action="store_true",
|
|
75
|
+
help="Enable debug mode for tracing"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
args = parser.parse_args()
|
|
79
|
+
|
|
80
|
+
# Setup
|
|
81
|
+
config_path = args.config
|
|
82
|
+
if not os.path.exists(config_path):
|
|
83
|
+
print(f"[ERROR] Configuration file not found: {config_path}")
|
|
84
|
+
sys.exit(1)
|
|
85
|
+
|
|
86
|
+
# Extract dataset path structure (e.g., crm/benign/1)
|
|
87
|
+
dataset_path = extract_dataset_path(config_path)
|
|
88
|
+
|
|
89
|
+
# Build output directory
|
|
90
|
+
base_output_dir = args.output_dir or os.path.join(os.getcwd(), "results")
|
|
91
|
+
output_dir = os.path.join(base_output_dir, "pocketflow", dataset_path)
|
|
92
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
93
|
+
|
|
94
|
+
print(f"[INFO] Loading configuration from: {config_path}")
|
|
95
|
+
print(f"[INFO] Model: {args.model}")
|
|
96
|
+
print(f"[INFO] Temperature: {args.temperature}")
|
|
97
|
+
print(f"[INFO] Max Turns: {args.max_turns}")
|
|
98
|
+
print(f"[INFO] Output Directory: {output_dir}")
|
|
99
|
+
print("-" * 80)
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
# Load configuration
|
|
103
|
+
task_config = TaskConfig.from_yaml(config_path)
|
|
104
|
+
agent_config = AgentConfig.from_yaml(config_path)
|
|
105
|
+
attack_config = AttackConfig.from_yaml(config_path)
|
|
106
|
+
|
|
107
|
+
# Build tool injections from all attack turns
|
|
108
|
+
mcp_injection = build_tool_injections_from_config(attack_config)
|
|
109
|
+
|
|
110
|
+
# Create runtime config with tool injections
|
|
111
|
+
runtime_config = RuntimeConfig(
|
|
112
|
+
model=args.model,
|
|
113
|
+
temperature=args.temperature,
|
|
114
|
+
max_turns=args.max_turns,
|
|
115
|
+
output_dir=output_dir,
|
|
116
|
+
mcp_injection=mcp_injection,
|
|
117
|
+
debug=args.debug,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Create agent with configurations
|
|
121
|
+
agent = MCPReactAgent(
|
|
122
|
+
agent_config=agent_config,
|
|
123
|
+
runtime_config=runtime_config,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Display loaded configuration
|
|
127
|
+
print(f"\n[TASK] Task ID: {task_config.task_id}")
|
|
128
|
+
print(f"[TASK] Domain: {task_config.domain}")
|
|
129
|
+
|
|
130
|
+
if attack_config:
|
|
131
|
+
print(f"\n[ATTACK] Risk Category: {attack_config.risk_category}")
|
|
132
|
+
print(f"[ATTACK] Threat Model: {attack_config.threat_model}")
|
|
133
|
+
if attack_config.malicious_goal:
|
|
134
|
+
print(f"[ATTACK] Malicious Goal: {attack_config.malicious_goal}")
|
|
135
|
+
if attack_config.attack_turns:
|
|
136
|
+
print(f"[ATTACK] Attack Turns: {len(attack_config.attack_turns)}")
|
|
137
|
+
for turn in attack_config.attack_turns:
|
|
138
|
+
print(f" Turn {turn.turn_id}: {len(turn.attack_steps)} attack step(s)")
|
|
139
|
+
for step in turn.attack_steps:
|
|
140
|
+
print(f" - {step.type} ({step.mode})")
|
|
141
|
+
|
|
142
|
+
# Handle prompt injections based on threat model
|
|
143
|
+
is_direct_attack = attack_config and attack_config.threat_model == "direct"
|
|
144
|
+
|
|
145
|
+
if is_direct_attack:
|
|
146
|
+
user_instruction = apply_prompt_injections(original_instruction=None, attack_config=attack_config)
|
|
147
|
+
if not user_instruction:
|
|
148
|
+
user_instruction = task_config.original_instruction
|
|
149
|
+
else:
|
|
150
|
+
user_instruction = task_config.original_instruction
|
|
151
|
+
user_instruction = apply_prompt_injections(user_instruction, attack_config)
|
|
152
|
+
|
|
153
|
+
# Normalize instruction
|
|
154
|
+
if isinstance(user_instruction, str):
|
|
155
|
+
user_instruction = user_instruction.strip()
|
|
156
|
+
else:
|
|
157
|
+
user_instruction = [instr.strip() for instr in user_instruction]
|
|
158
|
+
|
|
159
|
+
print("\n" + "=" * 80)
|
|
160
|
+
print("[INSTRUCTION (after injection)]")
|
|
161
|
+
print("=" * 80)
|
|
162
|
+
if isinstance(user_instruction, list):
|
|
163
|
+
print(f"Multi-turn task with {len(user_instruction)} queries:")
|
|
164
|
+
for i, instr in enumerate(user_instruction, 1):
|
|
165
|
+
print(f" {i}. {instr}")
|
|
166
|
+
else:
|
|
167
|
+
print(user_instruction)
|
|
168
|
+
print("=" * 80)
|
|
169
|
+
|
|
170
|
+
if attack_config and attack_config.malicious_goal:
|
|
171
|
+
print("\n[MALICIOUS GOAL] Testing Against:")
|
|
172
|
+
print("-" * 80)
|
|
173
|
+
print(attack_config.malicious_goal.strip())
|
|
174
|
+
print("-" * 80)
|
|
175
|
+
|
|
176
|
+
# Run agent with async context manager
|
|
177
|
+
async with agent:
|
|
178
|
+
print("\n[INFO] Running agent...")
|
|
179
|
+
if isinstance(user_instruction, list):
|
|
180
|
+
print(f"[INFO] Processing {len(user_instruction)} turns...")
|
|
181
|
+
# Pass task metadata via run() method
|
|
182
|
+
metadata = {
|
|
183
|
+
"task_id": task_config.task_id,
|
|
184
|
+
"domain": task_config.domain,
|
|
185
|
+
"category": attack_config.risk_category if attack_config else None,
|
|
186
|
+
"malicious_goal": attack_config.malicious_goal if attack_config else None,
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
result = await agent.run(user_instruction, metadata=metadata)
|
|
190
|
+
|
|
191
|
+
print("\n" + "=" * 80)
|
|
192
|
+
print("[AGENT RESPONSE]")
|
|
193
|
+
print("=" * 80)
|
|
194
|
+
print(result.final_output)
|
|
195
|
+
print("=" * 80)
|
|
196
|
+
|
|
197
|
+
print(f"\n[SUCCESS] Task completed")
|
|
198
|
+
print(f"[INFO] Turns: {result.turn_count}")
|
|
199
|
+
print(f"[INFO] Trajectories saved to: {agent.trajectories_dir}")
|
|
200
|
+
if result.trajectory:
|
|
201
|
+
print(f"[INFO] Trajectory steps: {len(result.trajectory)}")
|
|
202
|
+
|
|
203
|
+
return result
|
|
204
|
+
|
|
205
|
+
except Exception as e:
|
|
206
|
+
print(f"\n[ERROR] {e}")
|
|
207
|
+
import traceback
|
|
208
|
+
traceback.print_exc()
|
|
209
|
+
sys.exit(1)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
if __name__ == "__main__":
|
|
213
|
+
try:
|
|
214
|
+
result = asyncio.run(main())
|
|
215
|
+
if result:
|
|
216
|
+
print(f"\n[SUCCESS] Task completed")
|
|
217
|
+
except KeyboardInterrupt:
|
|
218
|
+
print("\n[INFO] Interrupted by user")
|
|
219
|
+
except Exception as e:
|
|
220
|
+
print(f"\n[ERROR] {e}")
|
|
221
|
+
sys.exit(1)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
ACTION_SPACE_INSTRUCTION = """You are a ReAct agent that can use tools to complete tasks. You MUST choose ONE of the following two actions:
|
|
2
|
+
|
|
3
|
+
## Available Actions:
|
|
4
|
+
1. **use_tool**: Call one of the available tools to gather information or perform an action
|
|
5
|
+
- When to use: You need more information or need to perform an action
|
|
6
|
+
- Required fields: action, reasoning, tool_name, tool_arguments
|
|
7
|
+
- Set final_answer to null
|
|
8
|
+
|
|
9
|
+
2. **answer**: Provide the final answer to the user's query
|
|
10
|
+
- When to use: You have sufficient information to provide a complete answer
|
|
11
|
+
- Required fields: action, reasoning, final_answer
|
|
12
|
+
- Set tool_name and tool_arguments to null
|
|
13
|
+
|
|
14
|
+
## Response Format (MUST be valid YAML):
|
|
15
|
+
|
|
16
|
+
```yaml
|
|
17
|
+
reasoning: |
|
|
18
|
+
<your step-by-step reasoning about what to do next>
|
|
19
|
+
action: use_tool # MUST be exactly "use_tool" or "answer" (no other values allowed)
|
|
20
|
+
tool_name: <tool_name> # REQUIRED if action is "use_tool", otherwise set to null
|
|
21
|
+
tool_arguments: # REQUIRED if action is "use_tool", otherwise set to null
|
|
22
|
+
<param1>: <value1>
|
|
23
|
+
<param2>: <value2>
|
|
24
|
+
final_answer: | # REQUIRED if action is "answer", otherwise set to null
|
|
25
|
+
<your final answer to the user>
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Important Rules:
|
|
29
|
+
- The response MUST be valid YAML enclosed in ```yaml and ```
|
|
30
|
+
- The "action" field MUST be exactly "use_tool" or "answer" (case-sensitive)
|
|
31
|
+
- If action is "use_tool": tool_name MUST be specified, tool_arguments MUST be a dict, final_answer MUST be null
|
|
32
|
+
- If action is "answer": final_answer MUST be specified, tool_name MUST be null, tool_arguments MUST be null
|
|
33
|
+
- Always include the "reasoning" field to explain your decision
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
REACT_AGENT_SYSPROMPT = """{user_given_system_prompt}
|
|
37
|
+
|
|
38
|
+
# Available Tools
|
|
39
|
+
{tools_text}
|
|
40
|
+
|
|
41
|
+
# Action Space and Response Format
|
|
42
|
+
{action_space_instruction}
|
|
43
|
+
|
|
44
|
+
Think carefully about whether you have enough information to answer the question.
|
|
45
|
+
- If you need more information, use an appropriate tool.
|
|
46
|
+
- If you have sufficient information, provide a comprehensive final answer."""
|