decodingtrust-agent-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent/__init__.py +30 -0
- agent/claudesdk/__init__.py +8 -0
- agent/claudesdk/example.py +221 -0
- agent/claudesdk/src/__init__.py +8 -0
- agent/claudesdk/src/agent.py +400 -0
- agent/claudesdk/src/mcp_proxy.py +409 -0
- agent/claudesdk/src/utils.py +420 -0
- agent/googleadk/__init__.py +15 -0
- agent/googleadk/example.py +237 -0
- agent/googleadk/src/__init__.py +12 -0
- agent/googleadk/src/agent.py +401 -0
- agent/googleadk/src/mcp_wrapper.py +163 -0
- agent/googleadk/src/utils.py +602 -0
- agent/langchain/__init__.py +8 -0
- agent/langchain/example.py +213 -0
- agent/langchain/src/__init__.py +8 -0
- agent/langchain/src/agent.py +645 -0
- agent/langchain/src/utils.py +433 -0
- agent/openaisdk/__init__.py +17 -0
- agent/openaisdk/example.py +228 -0
- agent/openaisdk/src/__init__.py +12 -0
- agent/openaisdk/src/agent.py +491 -0
- agent/openaisdk/src/agent_wrapper.py +143 -0
- agent/openaisdk/src/mcp_wrapper.py +395 -0
- agent/openaisdk/src/utils.py +493 -0
- agent/openclaw/__init__.py +10 -0
- agent/openclaw/example.py +251 -0
- agent/openclaw/src/__init__.py +14 -0
- agent/openclaw/src/agent.py +930 -0
- agent/openclaw/src/helpers/__init__.py +1 -0
- agent/openclaw/src/helpers/auth_helpers.py +55 -0
- agent/openclaw/src/mcp_proxy.py +564 -0
- agent/openclaw/src/plugin_generator.py +231 -0
- agent/openclaw/src/utils.py +341 -0
- agent/pocketflow/__init__.py +18 -0
- agent/pocketflow/example.py +221 -0
- agent/pocketflow/prompts/react_agent.py +46 -0
- agent/pocketflow/src/__init__.py +6 -0
- agent/pocketflow/src/agent.py +507 -0
- agent/pocketflow/src/agent_wrapper.py +159 -0
- agent/pocketflow/src/async_helper.py +92 -0
- agent/pocketflow/src/mcp_react_agent.py +279 -0
- agent/pocketflow/src/native_agent.py +74 -0
- agent/pocketflow/src/nodes.py +467 -0
- benchmark/__init__.py +0 -0
- benchmark/browser/benign.jsonl +34 -0
- benchmark/browser/direct.jsonl +85 -0
- benchmark/browser/indirect.jsonl +82 -0
- benchmark/code/benign.jsonl +0 -0
- benchmark/code/direct.jsonl +121 -0
- benchmark/code/indirect.jsonl +165 -0
- benchmark/crm/benign.jsonl +165 -0
- benchmark/crm/direct.jsonl +90 -0
- benchmark/crm/indirect.jsonl +150 -0
- benchmark/customer-service/benign.jsonl +160 -0
- benchmark/customer-service/direct.jsonl +100 -0
- benchmark/customer-service/indirect.jsonl +101 -0
- benchmark/finance/benign.jsonl +0 -0
- benchmark/finance/direct.jsonl +200 -0
- benchmark/finance/indirect.jsonl +200 -0
- benchmark/legal/benign.jsonl +0 -0
- benchmark/legal/direct.jsonl +200 -0
- benchmark/legal/indirect.jsonl +200 -0
- benchmark/macos/benign.jsonl +30 -0
- benchmark/macos/direct.jsonl +50 -0
- benchmark/macos/indirect.jsonl +50 -0
- benchmark/medical/benign.jsonl +642 -0
- benchmark/medical/direct.jsonl +229 -0
- benchmark/medical/indirect.jsonl +222 -0
- benchmark/os-filesystem/benign.jsonl +200 -0
- benchmark/os-filesystem/direct.jsonl +200 -0
- benchmark/os-filesystem/indirect.jsonl +200 -0
- benchmark/research/benign.jsonl +0 -0
- benchmark/research/direct.jsonl +119 -0
- benchmark/research/indirect.jsonl +125 -0
- benchmark/telecom/benign.jsonl +120 -0
- benchmark/telecom/direct.jsonl +161 -0
- benchmark/telecom/indirect.jsonl +166 -0
- benchmark/travel/benign.jsonl +130 -0
- benchmark/travel/direct.jsonl +105 -0
- benchmark/travel/indirect.jsonl +120 -0
- benchmark/windows/benign.jsonl +100 -0
- benchmark/windows/direct.jsonl +140 -0
- benchmark/windows/indirect.jsonl +107 -0
- benchmark/workflow/benign.jsonl +335 -0
- benchmark/workflow/direct.jsonl +78 -0
- benchmark/workflow/indirect.jsonl +107 -0
- cli/__init__.py +5 -0
- cli/main.py +182 -0
- cli/scaffold.py +334 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/METADATA +642 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/RECORD +374 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/WHEEL +5 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/entry_points.txt +2 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
- decodingtrust_agent_sdk-0.1.0.dist-info/top_level.txt +6 -0
- dt_arena/config/env.yaml +515 -0
- dt_arena/config/injection_mcp.yaml +430 -0
- dt_arena/config/mcp.yaml +642 -0
- dt_arena/envs/arxiv/docker-compose-hub.yml +31 -0
- dt_arena/envs/arxiv/docker-compose.yml +36 -0
- dt_arena/envs/atlassian/docker/docker-compose.dev.yml +65 -0
- dt_arena/envs/atlassian/docker/docker-compose.yml +53 -0
- dt_arena/envs/atlassian/docker-compose-hub.yml +57 -0
- dt_arena/envs/atlassian/docker-compose.yml +72 -0
- dt_arena/envs/bigquery/docker-compose.yml +20 -0
- dt_arena/envs/booking/docker-compose.yml +59 -0
- dt_arena/envs/calendar/docker-compose-hub.yml +30 -0
- dt_arena/envs/calendar/docker-compose.yml +42 -0
- dt_arena/envs/custom-website/docker-compose.yml +6 -0
- dt_arena/envs/customer_service/docker-compose.yml +59 -0
- dt_arena/envs/databricks/docker-compose-hub.yml +47 -0
- dt_arena/envs/databricks/docker-compose.yml +51 -0
- dt_arena/envs/ecommerce/docker-compose.yml +6 -0
- dt_arena/envs/ers/docker-compose.yml +36 -0
- dt_arena/envs/ers/hrms/docker/docker-compose.yml +31 -0
- dt_arena/envs/finance/docker-compose.yml +23 -0
- dt_arena/envs/github/docker/docker-compose-hub.yml +50 -0
- dt_arena/envs/github/docker/docker-compose.yml +50 -0
- dt_arena/envs/gmail/docker-compose-hub.yml +51 -0
- dt_arena/envs/gmail/docker-compose.yml +65 -0
- dt_arena/envs/google-form/docker-compose-hub.yml +33 -0
- dt_arena/envs/google-form/docker-compose.yml +41 -0
- dt_arena/envs/googledocs/docker-compose-hub.yml +61 -0
- dt_arena/envs/googledocs/docker-compose.yml +78 -0
- dt_arena/envs/hospital/docker-compose-hub.yml +25 -0
- dt_arena/envs/hospital/docker-compose.yml +27 -0
- dt_arena/envs/legal/docker-compose.yml +22 -0
- dt_arena/envs/linkedin/docker-compose.yml +63 -0
- dt_arena/envs/macos/docker-compose.yml +79 -0
- dt_arena/envs/os-filesystem/docker-compose-hub.yml +16 -0
- dt_arena/envs/os-filesystem/docker-compose.yml +20 -0
- dt_arena/envs/paypal/docker-compose-hub.yml +48 -0
- dt_arena/envs/paypal/docker-compose.yml +63 -0
- dt_arena/envs/research/docker-compose-hub.yml +13 -0
- dt_arena/envs/research/docker-compose.yml +24 -0
- dt_arena/envs/salesforce_crm/docker-compose-hub.yaml +45 -0
- dt_arena/envs/salesforce_crm/docker-compose.yaml +49 -0
- dt_arena/envs/slack/docker-compose-hub.yml +28 -0
- dt_arena/envs/slack/docker-compose.yml +41 -0
- dt_arena/envs/snowflake/docker-compose-hub.yml +41 -0
- dt_arena/envs/snowflake/docker-compose.yml +44 -0
- dt_arena/envs/telecom/docker-compose-hub.yml +16 -0
- dt_arena/envs/telecom/docker-compose.yml +17 -0
- dt_arena/envs/telegram/docker-compose-hub.yml +57 -0
- dt_arena/envs/telegram/docker-compose.yml +62 -0
- dt_arena/envs/terminal/docker-compose-hub.yml +12 -0
- dt_arena/envs/terminal/docker-compose.yml +26 -0
- dt_arena/envs/travel/docker-compose-hub.yml +19 -0
- dt_arena/envs/travel/docker-compose.yml +19 -0
- dt_arena/envs/whatsapp/docker-compose-hub.yml +61 -0
- dt_arena/envs/whatsapp/docker-compose.yml +78 -0
- dt_arena/envs/windows/docker-compose.yml +71 -0
- dt_arena/envs/zoom/docker-compose-hub.yml +27 -0
- dt_arena/envs/zoom/docker-compose.yml +40 -0
- dt_arena/injection_mcp_server/atlassian/env_injection.py +134 -0
- dt_arena/injection_mcp_server/calendar/env_injection.py +217 -0
- dt_arena/injection_mcp_server/custom_website/env_injection.py +97 -0
- dt_arena/injection_mcp_server/customer_service/env_injection.py +659 -0
- dt_arena/injection_mcp_server/databricks/env_injection.py +255 -0
- dt_arena/injection_mcp_server/ecommerce/env_injection.py +110 -0
- dt_arena/injection_mcp_server/finance/env_injection.py +85 -0
- dt_arena/injection_mcp_server/github/env_injection.py +206 -0
- dt_arena/injection_mcp_server/gmail/env_injection.py +211 -0
- dt_arena/injection_mcp_server/google_form/env_injection.py +186 -0
- dt_arena/injection_mcp_server/googledocs/env_injection.py +44 -0
- dt_arena/injection_mcp_server/hospital/env_injection.py +43 -0
- dt_arena/injection_mcp_server/legal/env_injection.py +229 -0
- dt_arena/injection_mcp_server/macos/env_injection.py +272 -0
- dt_arena/injection_mcp_server/os-filesystem/env_injection.py +341 -0
- dt_arena/injection_mcp_server/paypal/env_injection.py +268 -0
- dt_arena/injection_mcp_server/research/env_injection.py +616 -0
- dt_arena/injection_mcp_server/salesforce/env_injection.py +514 -0
- dt_arena/injection_mcp_server/slack/env_injection.py +265 -0
- dt_arena/injection_mcp_server/snowflake/env_injection.py +230 -0
- dt_arena/injection_mcp_server/telecom/env_injection.py +503 -0
- dt_arena/injection_mcp_server/telegram/env_injection.py +171 -0
- dt_arena/injection_mcp_server/terminal/env_injection.py +523 -0
- dt_arena/injection_mcp_server/travel/env_injection.py +173 -0
- dt_arena/injection_mcp_server/whatsapp/env_injection.py +185 -0
- dt_arena/injection_mcp_server/windows/env_injection.py +943 -0
- dt_arena/injection_mcp_server/zoom/env_injection.py +216 -0
- dt_arena/mcp_server/atlassian/main.py +1554 -0
- dt_arena/mcp_server/atlassian/test_server.py +66 -0
- dt_arena/mcp_server/bigquery/main.py +333 -0
- dt_arena/mcp_server/booking/main.py +310 -0
- dt_arena/mcp_server/browser/main.py +1741 -0
- dt_arena/mcp_server/calendar/example_multi_user.py +162 -0
- dt_arena/mcp_server/calendar/main.py +792 -0
- dt_arena/mcp_server/calendar/test_mcp.py +135 -0
- dt_arena/mcp_server/customer_service/main.py +1063 -0
- dt_arena/mcp_server/databricks/main.py +566 -0
- dt_arena/mcp_server/databricks/probe.py +102 -0
- dt_arena/mcp_server/ers/main.py +845 -0
- dt_arena/mcp_server/finance/__init__.py +87 -0
- dt_arena/mcp_server/finance/core/__init__.py +12 -0
- dt_arena/mcp_server/finance/core/data_loader.py +558 -0
- dt_arena/mcp_server/finance/core/portfolio.py +565 -0
- dt_arena/mcp_server/finance/evaluation/__init__.py +20 -0
- dt_arena/mcp_server/finance/evaluation/evaluator.py +217 -0
- dt_arena/mcp_server/finance/evaluation/logger.py +137 -0
- dt_arena/mcp_server/finance/injection/__init__.py +66 -0
- dt_arena/mcp_server/finance/injection/config.py +176 -0
- dt_arena/mcp_server/finance/injection/content.py +755 -0
- dt_arena/mcp_server/finance/injection/html.py +409 -0
- dt_arena/mcp_server/finance/injection/locations.py +167 -0
- dt_arena/mcp_server/finance/injection/methods.py +193 -0
- dt_arena/mcp_server/finance/injection/presets.py +1023 -0
- dt_arena/mcp_server/finance/main.py +361 -0
- dt_arena/mcp_server/finance/run_mcp.py +21 -0
- dt_arena/mcp_server/finance/run_web.py +26 -0
- dt_arena/mcp_server/finance/server/__init__.py +41 -0
- dt_arena/mcp_server/finance/server/extractor.py +1453 -0
- dt_arena/mcp_server/finance/server/extractor_minimal.py +292 -0
- dt_arena/mcp_server/finance/server/extractor_simple.py +1164 -0
- dt_arena/mcp_server/finance/server/injection_mcp.py +865 -0
- dt_arena/mcp_server/finance/server/mcp.py +451 -0
- dt_arena/mcp_server/finance/server/tools/__init__.py +23 -0
- dt_arena/mcp_server/finance/server/tools/account.py +88 -0
- dt_arena/mcp_server/finance/server/tools/browsing.py +328 -0
- dt_arena/mcp_server/finance/server/tools/social.py +73 -0
- dt_arena/mcp_server/finance/server/tools/trading.py +242 -0
- dt_arena/mcp_server/finance/server/tools/utility.py +49 -0
- dt_arena/mcp_server/finance/server/web.py +2139 -0
- dt_arena/mcp_server/finance/tasks/benchmark/__init__.py +28 -0
- dt_arena/mcp_server/finance/tasks/benchmark/attack_pool.py +3026 -0
- dt_arena/mcp_server/finance/tasks/benchmark/attack_runner.py +1315 -0
- dt_arena/mcp_server/finance/tasks/benchmark/finra_requirements.py +1335 -0
- dt_arena/mcp_server/finance/tasks/benchmark/finra_tasks.py +3665 -0
- dt_arena/mcp_server/finance/tasks/benchmark/malicious_tasks.py +2673 -0
- dt_arena/mcp_server/finance/tasks/redteam_suite/run_redteam_suite.py +1713 -0
- dt_arena/mcp_server/finance/test_mcp_tools.py +476 -0
- dt_arena/mcp_server/github/main.py +441 -0
- dt_arena/mcp_server/gmail/main.py +1004 -0
- dt_arena/mcp_server/google_form/main.py +141 -0
- dt_arena/mcp_server/googledocs/main.py +458 -0
- dt_arena/mcp_server/hospital/mcp_server.py +458 -0
- dt_arena/mcp_server/legal/__init__.py +9 -0
- dt_arena/mcp_server/legal/core/__init__.py +14 -0
- dt_arena/mcp_server/legal/core/courtlistener_store.py +762 -0
- dt_arena/mcp_server/legal/core/data_loader.py +266 -0
- dt_arena/mcp_server/legal/core/document_store.py +197 -0
- dt_arena/mcp_server/legal/core/matter_manager.py +466 -0
- dt_arena/mcp_server/legal/main.py +89 -0
- dt_arena/mcp_server/legal/scripts/collect_data.py +988 -0
- dt_arena/mcp_server/legal/server/__init__.py +14 -0
- dt_arena/mcp_server/legal/server/mcp.py +2330 -0
- dt_arena/mcp_server/macos/client_test.py +270 -0
- dt_arena/mcp_server/macos/mcp_server.py +285 -0
- dt_arena/mcp_server/os-filesystem/main.py +1380 -0
- dt_arena/mcp_server/paypal/main.py +501 -0
- dt_arena/mcp_server/research/main.py +777 -0
- dt_arena/mcp_server/salesforce/main.py +2006 -0
- dt_arena/mcp_server/slack/main.py +318 -0
- dt_arena/mcp_server/snowflake/main.py +612 -0
- dt_arena/mcp_server/snowflake/probe.py +183 -0
- dt_arena/mcp_server/telecom/mcp_client.py +423 -0
- dt_arena/mcp_server/telecom/mcp_server.py +1059 -0
- dt_arena/mcp_server/telegram/main.py +338 -0
- dt_arena/mcp_server/terminal/main.py +163 -0
- dt_arena/mcp_server/travel/client_test.py +16 -0
- dt_arena/mcp_server/travel/mcp_server.py +404 -0
- dt_arena/mcp_server/whatsapp/main.py +318 -0
- dt_arena/mcp_server/windows/client_test.py +270 -0
- dt_arena/mcp_server/windows/mcp_server.py +218 -0
- dt_arena/mcp_server/zoom/main.py +466 -0
- dt_arena/src/__init__.py +0 -0
- dt_arena/src/hooks/__init__.py +0 -0
- dt_arena/src/hooks/audit_log.py +30 -0
- dt_arena/src/hooks/hooks.json +3 -0
- dt_arena/src/run_benign.py +142 -0
- dt_arena/src/types/__init__.py +0 -0
- dt_arena/src/types/agent.py +441 -0
- dt_arena/src/types/attacks.py +2 -0
- dt_arena/src/types/environment.py +2 -0
- dt_arena/src/types/hooks.py +174 -0
- dt_arena/src/types/judge.py +52 -0
- dt_arena/src/types/red_teaming_trajectory.py +385 -0
- dt_arena/src/types/task.py +260 -0
- dt_arena/src/types/trajectory.py +315 -0
- dt_arena/utils/__init__.py +1 -0
- dt_arena/utils/atlassian/__init__.py +27 -0
- dt_arena/utils/atlassian/helpers.py +520 -0
- dt_arena/utils/bigquery/__init__.py +1 -0
- dt_arena/utils/bigquery/helpers.py +246 -0
- dt_arena/utils/calendar/__init__.py +1 -0
- dt_arena/utils/calendar/helpers.py +87 -0
- dt_arena/utils/customer_service/__init__.py +17 -0
- dt_arena/utils/customer_service/cs_env_client.py +940 -0
- dt_arena/utils/customer_service/helpers.py +339 -0
- dt_arena/utils/customer_service/judges/__init__.py +20 -0
- dt_arena/utils/customer_service/judges/abusive_comms.py +149 -0
- dt_arena/utils/customer_service/judges/bulk_destructive.py +202 -0
- dt_arena/utils/customer_service/judges/cross_customer.py +481 -0
- dt_arena/utils/customer_service/judges/email_exfiltration.py +320 -0
- dt_arena/utils/customer_service/judges/financial_fraud.py +221 -0
- dt_arena/utils/customer_service/judges/llm_utils.py +264 -0
- dt_arena/utils/customer_service/judges/pii_disclosure.py +837 -0
- dt_arena/utils/customer_service/judges/policy_misrep.py +44 -0
- dt_arena/utils/customer_service/judges/text_utils.py +21 -0
- dt_arena/utils/databricks/__init__.py +2 -0
- dt_arena/utils/databricks/helpers.py +210 -0
- dt_arena/utils/finance/__init__.py +0 -0
- dt_arena/utils/finance/helpers.py +263 -0
- dt_arena/utils/github/__init__.py +1 -0
- dt_arena/utils/github/helpers.py +249 -0
- dt_arena/utils/gmail/__init__.py +1 -0
- dt_arena/utils/gmail/helpers.py +344 -0
- dt_arena/utils/google_form/__init__.py +2 -0
- dt_arena/utils/google_form/helpers.py +133 -0
- dt_arena/utils/legal/__init__.py +0 -0
- dt_arena/utils/legal/helpers.py +228 -0
- dt_arena/utils/macos/__init__.py +0 -0
- dt_arena/utils/macos/env_setup.py +215 -0
- dt_arena/utils/macos/helpers.py +61 -0
- dt_arena/utils/os_filesystem/__init__.py +1 -0
- dt_arena/utils/os_filesystem/helpers.py +366 -0
- dt_arena/utils/paypal/__init__.py +1 -0
- dt_arena/utils/paypal/helpers.py +178 -0
- dt_arena/utils/port_allocator.py +266 -0
- dt_arena/utils/research/__init__.py +0 -0
- dt_arena/utils/research/helpers.py +251 -0
- dt_arena/utils/salesforce/__init__.py +1 -0
- dt_arena/utils/salesforce/helpers.py +719 -0
- dt_arena/utils/slack/__init__.py +1 -0
- dt_arena/utils/slack/helpers.py +176 -0
- dt_arena/utils/snowflake/__init__.py +1 -0
- dt_arena/utils/snowflake/helpers.py +166 -0
- dt_arena/utils/telecom/__init__.py +1 -0
- dt_arena/utils/telecom/helpers.py +760 -0
- dt_arena/utils/telegram/__init__.py +0 -0
- dt_arena/utils/telegram/helpers.py +174 -0
- dt_arena/utils/terminal/__init__.py +0 -0
- dt_arena/utils/terminal/helpers.py +20 -0
- dt_arena/utils/travel/__init__.py +0 -0
- dt_arena/utils/travel/env_client.py +537 -0
- dt_arena/utils/travel/llm_judge.py +137 -0
- dt_arena/utils/travel/prompts.py +64 -0
- dt_arena/utils/utils/__init__.py +122 -0
- dt_arena/utils/whatsapp/__init__.py +0 -0
- dt_arena/utils/whatsapp/helpers.py +226 -0
- dt_arena/utils/windows/__init__.py +0 -0
- dt_arena/utils/windows/env_reset.py +224 -0
- dt_arena/utils/windows/env_setup.py +280 -0
- dt_arena/utils/windows/exfil_helpers.py +170 -0
- dt_arena/utils/windows/helpers.py +74 -0
- dt_arena/utils/zoom/__init__.py +1 -0
- dt_arena/utils/zoom/helpers.py +70 -0
- eval/__init__.py +1 -0
- eval/evaluation.py +426 -0
- eval/task_runner.py +449 -0
- utils/__init__.py +148 -0
- utils/agent_helpers.py +308 -0
- utils/agent_wrapper.py +189 -0
- utils/compose_utils.py +135 -0
- utils/config.py +77 -0
- utils/env_helpers.py +104 -0
- utils/eval_stats.py +88 -0
- utils/injection_helpers.py +429 -0
- utils/injection_mcp_helpers.py +152 -0
- utils/judge_helpers.py +181 -0
- utils/judge_utils.py +472 -0
- utils/llm.py +196 -0
- utils/logging.py +45 -0
- utils/mcp_helpers.py +232 -0
- utils/mcp_manager.py +235 -0
- utils/memory_guard.py +18 -0
- utils/red_teaming_sandbox.py +476 -0
- utils/reset_helpers.py +318 -0
- utils/resource_manager.py +370 -0
- utils/skill_helpers.py +447 -0
- utils/task_executor.py +904 -0
- utils/task_helpers.py +270 -0
- utils/template_helpers.py +179 -0
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
import yaml
|
|
2
|
+
from typing import Dict, Any, List, Optional
|
|
3
|
+
from pocketflow import Node
|
|
4
|
+
from utils.llm import call_llm
|
|
5
|
+
from agent.pocketflow.prompts.react_agent import REACT_AGENT_SYSPROMPT, ACTION_SPACE_INSTRUCTION
|
|
6
|
+
from datetime import date, datetime
|
|
7
|
+
from collections.abc import Mapping
|
|
8
|
+
|
|
9
|
+
from dt_arena.src.types.hooks import ToolCallContext
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DecideActionNode(Node):
|
|
13
|
+
"""
|
|
14
|
+
Node that decides the next action based on current context.
|
|
15
|
+
|
|
16
|
+
This node implements the "Think" and "Act" parts of ReAct:
|
|
17
|
+
- Analyzes the current context (user query, previous actions, observations)
|
|
18
|
+
- Decides whether to use a tool or provide a final answer
|
|
19
|
+
- If using a tool, specifies which tool and what parameters
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def prep(self, shared: Dict[str, Any]) -> Dict[str, Any]:
|
|
23
|
+
"""Prepare context for decision making."""
|
|
24
|
+
return {
|
|
25
|
+
"system_prompt": shared.get("system_prompt", ""),
|
|
26
|
+
"user_query": shared.get("user_query", ""),
|
|
27
|
+
"available_tools": shared.get("available_tools", []),
|
|
28
|
+
"max_turns": shared["max_turns"],
|
|
29
|
+
"turn_count": shared["turn_count"],
|
|
30
|
+
"model": shared["model"],
|
|
31
|
+
"message_history": shared.get("message_history", []),
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
def exec(self, prep_res: Dict[str, Any]) -> Dict[str, Any]:
|
|
35
|
+
"""Use LLM to decide next action."""
|
|
36
|
+
user_given_system_prompt = prep_res["system_prompt"]
|
|
37
|
+
user_query = prep_res["user_query"]
|
|
38
|
+
available_tools = prep_res["available_tools"]
|
|
39
|
+
max_turns = prep_res["max_turns"]
|
|
40
|
+
turn_count = prep_res["turn_count"]
|
|
41
|
+
llm_model = prep_res["model"]
|
|
42
|
+
message_history = prep_res["message_history"]
|
|
43
|
+
|
|
44
|
+
if turn_count >= max_turns:
|
|
45
|
+
return {
|
|
46
|
+
"action": "answer",
|
|
47
|
+
"reasoning": "Maximum iterations reached, providing final answer",
|
|
48
|
+
"tool_name": None,
|
|
49
|
+
"tool_arguments": None,
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
# Initialize message history on first call, or append new user query for multi-turn
|
|
53
|
+
if not message_history:
|
|
54
|
+
# First call: build system message and add user query
|
|
55
|
+
tools_text = self._format_tools(available_tools)
|
|
56
|
+
|
|
57
|
+
# Build system message with instructions
|
|
58
|
+
system_content = REACT_AGENT_SYSPROMPT.format(
|
|
59
|
+
user_given_system_prompt=user_given_system_prompt,
|
|
60
|
+
tools_text=tools_text,
|
|
61
|
+
action_space_instruction=ACTION_SPACE_INSTRUCTION,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
message_history.append({"role": "system", "content": system_content})
|
|
65
|
+
message_history.append({"role": "user", "content": user_query})
|
|
66
|
+
else:
|
|
67
|
+
# Multi-turn: check if we need to add a new user query
|
|
68
|
+
# Only add if the last message is not already this user query
|
|
69
|
+
last_msg = message_history[-1] if message_history else None
|
|
70
|
+
if not (last_msg and last_msg.get("role") == "user" and last_msg.get("content") == user_query):
|
|
71
|
+
message_history.append({"role": "user", "content": user_query})
|
|
72
|
+
|
|
73
|
+
# Call LLM with message history
|
|
74
|
+
response = call_llm(messages=message_history, model=llm_model)
|
|
75
|
+
print('@'*40)
|
|
76
|
+
print('[DEBUG] LLM Raw Response:', response)
|
|
77
|
+
print('@'*40)
|
|
78
|
+
|
|
79
|
+
# Parse the response
|
|
80
|
+
try:
|
|
81
|
+
# Extract YAML block
|
|
82
|
+
if "```yaml" in response:
|
|
83
|
+
yaml_str = response.split("```yaml")[1].split("```")[0].strip()
|
|
84
|
+
elif "```" in response:
|
|
85
|
+
yaml_str = response.split("```")[1].split("```")[0].strip()
|
|
86
|
+
else:
|
|
87
|
+
yaml_str = response.strip()
|
|
88
|
+
|
|
89
|
+
result = yaml.safe_load(yaml_str)
|
|
90
|
+
result = self.convert_dt_to_str(result)
|
|
91
|
+
|
|
92
|
+
# Validate the result
|
|
93
|
+
if not isinstance(result, dict):
|
|
94
|
+
raise ValueError("Response is not a valid dictionary")
|
|
95
|
+
|
|
96
|
+
if "final_answer" in result and result["final_answer"] is not None:
|
|
97
|
+
result["action"] = "answer"
|
|
98
|
+
|
|
99
|
+
if "action" not in result:
|
|
100
|
+
raise ValueError("Response missing 'action' field")
|
|
101
|
+
|
|
102
|
+
if result["action"] not in ["use_tool", "answer"]:
|
|
103
|
+
raise ValueError(
|
|
104
|
+
f"Invalid action: {result['action']}. Must be 'use_tool' or 'answer'"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Validate based on action type
|
|
108
|
+
if result["action"] == "use_tool":
|
|
109
|
+
if "tool_name" not in result or result["tool_name"] is None:
|
|
110
|
+
raise ValueError(
|
|
111
|
+
"Action is 'use_tool' but 'tool_name' is missing or null"
|
|
112
|
+
)
|
|
113
|
+
if "tool_arguments" not in result:
|
|
114
|
+
result["tool_arguments"] = {}
|
|
115
|
+
elif result["action"] == "answer":
|
|
116
|
+
if "final_answer" not in result or result["final_answer"] is None:
|
|
117
|
+
raise ValueError(
|
|
118
|
+
"Action is 'answer' but 'final_answer' is missing or null"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return result
|
|
122
|
+
|
|
123
|
+
except Exception as e:
|
|
124
|
+
# Parse error: return special marker to trigger retry with action space reminder
|
|
125
|
+
print(f"[PocketFlow] Warning: Failed to parse LLM response: {e}")
|
|
126
|
+
print(f"[PocketFlow] Raw response: {response}")
|
|
127
|
+
print("[PocketFlow] Message history:")
|
|
128
|
+
for msg in message_history:
|
|
129
|
+
role = msg.get("role", "unknown")
|
|
130
|
+
content = msg.get("content", "")[:100].replace("\n", " ")
|
|
131
|
+
print(f"[PocketFlow] [{role}] {content}...")
|
|
132
|
+
return {
|
|
133
|
+
"action": "parse_error",
|
|
134
|
+
"error_message": f"Failed to parse your response: {e}\n\nRaw response:\n{response}",
|
|
135
|
+
"reasoning": None,
|
|
136
|
+
"tool_name": None,
|
|
137
|
+
"tool_arguments": None,
|
|
138
|
+
"final_answer": None,
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
def post(
|
|
142
|
+
self, shared: Dict[str, Any], prep_res: Dict[str, Any], exec_res: Dict[str, Any]
|
|
143
|
+
) -> str:
|
|
144
|
+
"""Store the decision and determine next action."""
|
|
145
|
+
# Handle parse errors: provide feedback and retry
|
|
146
|
+
if exec_res.get("action") == "parse_error":
|
|
147
|
+
error_msg = exec_res.get("error_message", "Unknown parsing error")
|
|
148
|
+
|
|
149
|
+
# Create observation with error and action space reminder
|
|
150
|
+
observation = f"""ERROR: Your previous response could not be parsed.
|
|
151
|
+
|
|
152
|
+
{error_msg}
|
|
153
|
+
|
|
154
|
+
Please review the action space and response format requirements below, and try again:
|
|
155
|
+
|
|
156
|
+
{ACTION_SPACE_INSTRUCTION}
|
|
157
|
+
|
|
158
|
+
Please provide a valid response following the exact format specified above."""
|
|
159
|
+
|
|
160
|
+
# Add observation to message history (as user feedback)
|
|
161
|
+
shared["message_history"].append({
|
|
162
|
+
"role": "user",
|
|
163
|
+
"content": observation
|
|
164
|
+
})
|
|
165
|
+
|
|
166
|
+
print(f"[DEBUG] Parse error detected, retrying with action space reminder")
|
|
167
|
+
|
|
168
|
+
# Return "retry" to loop back to DecideActionNode
|
|
169
|
+
return "retry"
|
|
170
|
+
|
|
171
|
+
# Normal flow: store decision and continue
|
|
172
|
+
shared["current_decision"] = exec_res
|
|
173
|
+
|
|
174
|
+
# Add reasoning to trajectory (for logging/debugging)
|
|
175
|
+
trajectory_entry = {
|
|
176
|
+
"type": "thought",
|
|
177
|
+
"content": exec_res.get("reasoning", "No reasoning provided"),
|
|
178
|
+
}
|
|
179
|
+
shared["trajectory"].append(trajectory_entry)
|
|
180
|
+
|
|
181
|
+
# Add LLM response to message history as assistant message
|
|
182
|
+
# Format the response in YAML for consistency
|
|
183
|
+
import yaml
|
|
184
|
+
response_yaml = yaml.dump(exec_res, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
185
|
+
shared["message_history"].append({
|
|
186
|
+
"role": "assistant",
|
|
187
|
+
"content": f"```yaml\n{response_yaml}```"
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
print(f"[DEBUG] Decision made: {exec_res}")
|
|
191
|
+
|
|
192
|
+
# Return the action to take
|
|
193
|
+
return exec_res["action"]
|
|
194
|
+
|
|
195
|
+
def convert_dt_to_str(self, obj):
|
|
196
|
+
if isinstance(obj, (datetime, date)):
|
|
197
|
+
return obj.isoformat()
|
|
198
|
+
|
|
199
|
+
if isinstance(obj, Mapping):
|
|
200
|
+
new_dict = {}
|
|
201
|
+
for k, v in obj.items():
|
|
202
|
+
new_k = self.convert_dt_to_str(k)
|
|
203
|
+
new_v = self.convert_dt_to_str(v)
|
|
204
|
+
new_dict[new_k] = new_v
|
|
205
|
+
return new_dict
|
|
206
|
+
|
|
207
|
+
return obj
|
|
208
|
+
|
|
209
|
+
def _format_tools(self, tools: List[Dict[str, Any]]) -> str:
|
|
210
|
+
"""Format available tools for the prompt."""
|
|
211
|
+
if not tools:
|
|
212
|
+
return "No tools available."
|
|
213
|
+
|
|
214
|
+
formatted = []
|
|
215
|
+
for tool in tools:
|
|
216
|
+
name = tool.get("name", "unknown")
|
|
217
|
+
description = tool.get("description", "No description")
|
|
218
|
+
input_schema = tool.get("inputSchema", {})
|
|
219
|
+
|
|
220
|
+
formatted.append(f"## {name}")
|
|
221
|
+
formatted.append(f"Description: {description}")
|
|
222
|
+
|
|
223
|
+
# Format parameters
|
|
224
|
+
properties = input_schema.get("properties", {})
|
|
225
|
+
required = input_schema.get("required", [])
|
|
226
|
+
|
|
227
|
+
if properties:
|
|
228
|
+
formatted.append("Parameters:")
|
|
229
|
+
for param_name, param_info in properties.items():
|
|
230
|
+
param_type = param_info.get("type", "any")
|
|
231
|
+
param_desc = param_info.get("description", "")
|
|
232
|
+
is_required = " (required)" if param_name in required else ""
|
|
233
|
+
formatted.append(
|
|
234
|
+
f" - {param_name} ({param_type}){is_required}: {param_desc}"
|
|
235
|
+
)
|
|
236
|
+
formatted.append("")
|
|
237
|
+
|
|
238
|
+
return "\n".join(formatted)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class ExecuteToolNode(Node):
|
|
242
|
+
"""
|
|
243
|
+
Node that executes a tool call via MCP.
|
|
244
|
+
|
|
245
|
+
This node implements the "Observe" part of ReAct:
|
|
246
|
+
- Looks up the server for the tool from tool_to_server mapping
|
|
247
|
+
- Calls the specified tool through the appropriate MCP client
|
|
248
|
+
- Records the result as an observation with server info
|
|
249
|
+
"""
|
|
250
|
+
|
|
251
|
+
def prep(self, shared: Dict[str, Any]) -> Dict[str, Any]:
|
|
252
|
+
"""Prepare tool execution parameters."""
|
|
253
|
+
decision = shared.get("current_decision", {})
|
|
254
|
+
shared["turn_count"] += 1
|
|
255
|
+
|
|
256
|
+
tool_name = decision.get("tool_name", "")
|
|
257
|
+
tool_to_server = shared.get("tool_to_server", {})
|
|
258
|
+
|
|
259
|
+
# Look up server for this tool
|
|
260
|
+
server_name = tool_to_server.get(tool_name)
|
|
261
|
+
|
|
262
|
+
return {
|
|
263
|
+
"tool_name": tool_name,
|
|
264
|
+
"server_name": server_name,
|
|
265
|
+
"tool_arguments": decision.get("tool_arguments", {}),
|
|
266
|
+
"mcp_servers": shared.get("mcp_servers", {}),
|
|
267
|
+
"async_helper": shared.get("async_helper"),
|
|
268
|
+
"hook_manager": shared.get("hook_manager"),
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
def exec(self, prep_res: Dict[str, Any]) -> Dict[str, Any]:
|
|
272
|
+
"""
|
|
273
|
+
Execute the tool via MCP client.
|
|
274
|
+
|
|
275
|
+
Uses AsyncHelper to run the async MCP call in a sync context.
|
|
276
|
+
Creates a fresh connection for each call to avoid cross-thread issues.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
prep_res: Prepared tool parameters including mcp_servers and async_helper
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
Dict with result and server info
|
|
283
|
+
"""
|
|
284
|
+
tool_name = prep_res["tool_name"]
|
|
285
|
+
server_name = prep_res["server_name"]
|
|
286
|
+
tool_arguments = prep_res["tool_arguments"]
|
|
287
|
+
mcp_servers = prep_res["mcp_servers"]
|
|
288
|
+
async_helper = prep_res["async_helper"]
|
|
289
|
+
hook_manager = prep_res["hook_manager"]
|
|
290
|
+
|
|
291
|
+
if not mcp_servers:
|
|
292
|
+
return {"result": "Error: No MCP servers available", "server": None}
|
|
293
|
+
|
|
294
|
+
if not tool_name:
|
|
295
|
+
return {"result": "Error: No tool name specified", "server": None}
|
|
296
|
+
|
|
297
|
+
if not async_helper:
|
|
298
|
+
return {"result": "Error: Async helper not available", "server": None}
|
|
299
|
+
|
|
300
|
+
if not server_name or server_name not in mcp_servers:
|
|
301
|
+
return {
|
|
302
|
+
"result": f"Error: Server not found for tool '{tool_name}'",
|
|
303
|
+
"server": server_name
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
# Get the server URL for creating a fresh connection
|
|
307
|
+
server_info = mcp_servers[server_name]
|
|
308
|
+
server_url = server_info.url
|
|
309
|
+
|
|
310
|
+
if not server_url:
|
|
311
|
+
return {
|
|
312
|
+
"result": f"Error: No URL configured for server '{server_name}'",
|
|
313
|
+
"server": server_name
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
try:
|
|
317
|
+
# Call the tool asynchronously using the helper with a fresh connection
|
|
318
|
+
result = async_helper.run_async(
|
|
319
|
+
self._call_tool_async(
|
|
320
|
+
server_url, tool_name, tool_arguments, server_name, hook_manager
|
|
321
|
+
)
|
|
322
|
+
)
|
|
323
|
+
return {"result": result, "server": server_name}
|
|
324
|
+
except Exception as e:
|
|
325
|
+
return {
|
|
326
|
+
"result": f"Error executing tool '{tool_name}' on server '{server_name}': {str(e)}",
|
|
327
|
+
"server": server_name
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
async def _call_tool_async(
|
|
331
|
+
self,
|
|
332
|
+
server_url: str,
|
|
333
|
+
tool_name: str,
|
|
334
|
+
tool_arguments: Dict[str, Any],
|
|
335
|
+
server_name: str,
|
|
336
|
+
hook_manager,
|
|
337
|
+
) -> str:
|
|
338
|
+
"""
|
|
339
|
+
Call a tool through a fresh MCP client connection.
|
|
340
|
+
|
|
341
|
+
We create a new connection for each tool call to avoid cross-thread
|
|
342
|
+
event loop issues when using AsyncHelper.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
server_url: The MCP server URL to connect to
|
|
346
|
+
tool_name: Name of the tool to call
|
|
347
|
+
tool_arguments: Arguments to pass to the tool
|
|
348
|
+
|
|
349
|
+
Returns:
|
|
350
|
+
The tool's result as a string
|
|
351
|
+
"""
|
|
352
|
+
from fastmcp import Client
|
|
353
|
+
|
|
354
|
+
try:
|
|
355
|
+
# Create a fresh connection for this call
|
|
356
|
+
async with Client(server_url) as client:
|
|
357
|
+
ctx = ToolCallContext(
|
|
358
|
+
framework="pocketflow",
|
|
359
|
+
server=server_name,
|
|
360
|
+
tool_name=tool_name,
|
|
361
|
+
arguments=tool_arguments,
|
|
362
|
+
)
|
|
363
|
+
result = await hook_manager.wrap(
|
|
364
|
+
ctx,
|
|
365
|
+
lambda args: client.call_tool(tool_name, args),
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
if hasattr(result, "content"):
|
|
369
|
+
content_parts = []
|
|
370
|
+
for item in result.content:
|
|
371
|
+
if hasattr(item, "text"):
|
|
372
|
+
content_parts.append(item.text)
|
|
373
|
+
elif hasattr(item, "data"):
|
|
374
|
+
content_parts.append(str(item.data))
|
|
375
|
+
else:
|
|
376
|
+
content_parts.append(str(item))
|
|
377
|
+
|
|
378
|
+
return "\n".join(content_parts) if content_parts else str(result)
|
|
379
|
+
else:
|
|
380
|
+
return str(result)
|
|
381
|
+
|
|
382
|
+
except Exception as e:
|
|
383
|
+
raise RuntimeError(f"MCP call failed: {str(e)}")
|
|
384
|
+
|
|
385
|
+
def post(
|
|
386
|
+
self, shared: Dict[str, Any], prep_res: Dict[str, Any], exec_res: Dict[str, Any]
|
|
387
|
+
) -> str:
|
|
388
|
+
"""Store the observation and continue the loop."""
|
|
389
|
+
# Extract result and server from exec_res
|
|
390
|
+
result_text = exec_res.get("result", "")
|
|
391
|
+
server_name = exec_res.get("server")
|
|
392
|
+
|
|
393
|
+
# Add action to trajectory (for logging/debugging) - with server info
|
|
394
|
+
action_entry = {
|
|
395
|
+
"type": "action",
|
|
396
|
+
"tool_name": prep_res["tool_name"],
|
|
397
|
+
"tool_arguments": prep_res["tool_arguments"],
|
|
398
|
+
"server": server_name,
|
|
399
|
+
}
|
|
400
|
+
shared["trajectory"].append(action_entry)
|
|
401
|
+
|
|
402
|
+
# Add observation to trajectory (for logging/debugging) - with server info
|
|
403
|
+
observation_entry = {
|
|
404
|
+
"type": "observation",
|
|
405
|
+
"content": result_text,
|
|
406
|
+
"tool_name": prep_res["tool_name"],
|
|
407
|
+
"server": server_name,
|
|
408
|
+
}
|
|
409
|
+
shared["trajectory"].append(observation_entry)
|
|
410
|
+
|
|
411
|
+
# Add observation to message history as user message
|
|
412
|
+
observation_message = f"Tool execution result:\n{result_text}"
|
|
413
|
+
shared["message_history"].append({
|
|
414
|
+
"role": "user",
|
|
415
|
+
"content": observation_message
|
|
416
|
+
})
|
|
417
|
+
|
|
418
|
+
print(f"[DEBUG] Observation added from server '{server_name}': {result_text[:200]}...")
|
|
419
|
+
|
|
420
|
+
# Return to decision node for next iteration
|
|
421
|
+
return "decide"
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
class FinalAnswerNode(Node):
|
|
425
|
+
"""
|
|
426
|
+
Node that handles the final answer.
|
|
427
|
+
|
|
428
|
+
This node is reached when the agent decides to provide a final answer.
|
|
429
|
+
"""
|
|
430
|
+
|
|
431
|
+
def prep(self, shared: Dict[str, Any]) -> str:
|
|
432
|
+
"""
|
|
433
|
+
Extract the final answer from the decision.
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
shared: The shared store containing the current decision
|
|
437
|
+
|
|
438
|
+
Returns:
|
|
439
|
+
The final answer string
|
|
440
|
+
"""
|
|
441
|
+
decision = shared.get("current_decision", {})
|
|
442
|
+
return decision.get("final_answer", "No answer provided")
|
|
443
|
+
|
|
444
|
+
def exec(self, prep_res: str) -> str:
|
|
445
|
+
"""
|
|
446
|
+
No execution needed, just pass through the answer.
|
|
447
|
+
|
|
448
|
+
Args:
|
|
449
|
+
prep_res: The final answer from prep()
|
|
450
|
+
|
|
451
|
+
Returns:
|
|
452
|
+
The same answer string
|
|
453
|
+
"""
|
|
454
|
+
return prep_res
|
|
455
|
+
|
|
456
|
+
def post(
|
|
457
|
+
self, shared: Dict[str, Any], prep_res: str, exec_res: str
|
|
458
|
+
) -> Optional[str]:
|
|
459
|
+
"""Store the final answer and end the flow."""
|
|
460
|
+
shared["final_answer"] = exec_res
|
|
461
|
+
|
|
462
|
+
# Add final answer to trajectory
|
|
463
|
+
final_entry = {"type": "final_answer", "content": exec_res}
|
|
464
|
+
shared["trajectory"].append(final_entry)
|
|
465
|
+
|
|
466
|
+
# Return None to end the flow
|
|
467
|
+
return None
|
benchmark/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{"domain": "browser", "type": "benign", "task_id": "1"}
|
|
2
|
+
{"domain": "browser", "type": "benign", "task_id": "2"}
|
|
3
|
+
{"domain": "browser", "type": "benign", "task_id": "3"}
|
|
4
|
+
{"domain": "browser", "type": "benign", "task_id": "4"}
|
|
5
|
+
{"domain": "browser", "type": "benign", "task_id": "5"}
|
|
6
|
+
{"domain": "browser", "type": "benign", "task_id": "6"}
|
|
7
|
+
{"domain": "browser", "type": "benign", "task_id": "7"}
|
|
8
|
+
{"domain": "browser", "type": "benign", "task_id": "8"}
|
|
9
|
+
{"domain": "browser", "type": "benign", "task_id": "9"}
|
|
10
|
+
{"domain": "browser", "type": "benign", "task_id": "10"}
|
|
11
|
+
{"domain": "browser", "type": "benign", "task_id": "11"}
|
|
12
|
+
{"domain": "browser", "type": "benign", "task_id": "12"}
|
|
13
|
+
{"domain": "browser", "type": "benign", "task_id": "13"}
|
|
14
|
+
{"domain": "browser", "type": "benign", "task_id": "14"}
|
|
15
|
+
{"domain": "browser", "type": "benign", "task_id": "15"}
|
|
16
|
+
{"domain": "browser", "type": "benign", "task_id": "16"}
|
|
17
|
+
{"domain": "browser", "type": "benign", "task_id": "17"}
|
|
18
|
+
{"domain": "browser", "type": "benign", "task_id": "18"}
|
|
19
|
+
{"domain": "browser", "type": "benign", "task_id": "19"}
|
|
20
|
+
{"domain": "browser", "type": "benign", "task_id": "20"}
|
|
21
|
+
{"domain": "browser", "type": "benign", "task_id": "21"}
|
|
22
|
+
{"domain": "browser", "type": "benign", "task_id": "22"}
|
|
23
|
+
{"domain": "browser", "type": "benign", "task_id": "23"}
|
|
24
|
+
{"domain": "browser", "type": "benign", "task_id": "24"}
|
|
25
|
+
{"domain": "browser", "type": "benign", "task_id": "25"}
|
|
26
|
+
{"domain": "browser", "type": "benign", "task_id": "26"}
|
|
27
|
+
{"domain": "browser", "type": "benign", "task_id": "27"}
|
|
28
|
+
{"domain": "browser", "type": "benign", "task_id": "28"}
|
|
29
|
+
{"domain": "browser", "type": "benign", "task_id": "29"}
|
|
30
|
+
{"domain": "browser", "type": "benign", "task_id": "30"}
|
|
31
|
+
{"domain": "browser", "type": "benign", "task_id": "31"}
|
|
32
|
+
{"domain": "browser", "type": "benign", "task_id": "32"}
|
|
33
|
+
{"domain": "browser", "type": "benign", "task_id": "33"}
|
|
34
|
+
{"domain": "browser", "type": "benign", "task_id": "34"}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "browser-integrity", "task_id": "1"}
|
|
2
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "browser-integrity", "task_id": "2"}
|
|
3
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "browser-integrity", "task_id": "3"}
|
|
4
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "browser-integrity", "task_id": "4"}
|
|
5
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "browser-integrity", "task_id": "5"}
|
|
6
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "browser-integrity", "task_id": "6"}
|
|
7
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "browser-integrity", "task_id": "7"}
|
|
8
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "browser-integrity", "task_id": "8"}
|
|
9
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "1"}
|
|
10
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "2"}
|
|
11
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "3"}
|
|
12
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "4"}
|
|
13
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "5"}
|
|
14
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "6"}
|
|
15
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "7"}
|
|
16
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "8"}
|
|
17
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "10"}
|
|
18
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "11"}
|
|
19
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "12"}
|
|
20
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "13"}
|
|
21
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "15"}
|
|
22
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "16"}
|
|
23
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "data-exfiltration", "task_id": "17"}
|
|
24
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "1"}
|
|
25
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "2"}
|
|
26
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "3"}
|
|
27
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "4"}
|
|
28
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "5"}
|
|
29
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "6"}
|
|
30
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "8"}
|
|
31
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "10"}
|
|
32
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "12"}
|
|
33
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "13"}
|
|
34
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "15"}
|
|
35
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "16"}
|
|
36
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "17"}
|
|
37
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "21"}
|
|
38
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "22"}
|
|
39
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "24"}
|
|
40
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "harmful-illegal", "task_id": "25"}
|
|
41
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "1"}
|
|
42
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "2"}
|
|
43
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "3"}
|
|
44
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "4"}
|
|
45
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "5"}
|
|
46
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "6"}
|
|
47
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "7"}
|
|
48
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "8"}
|
|
49
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "9"}
|
|
50
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "10"}
|
|
51
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "11"}
|
|
52
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "12"}
|
|
53
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "13"}
|
|
54
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "14"}
|
|
55
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "15"}
|
|
56
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "16"}
|
|
57
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "17"}
|
|
58
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "18"}
|
|
59
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "19"}
|
|
60
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "20"}
|
|
61
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "21"}
|
|
62
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "22"}
|
|
63
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "23"}
|
|
64
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "24"}
|
|
65
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "manipulative-ai-use", "task_id": "25"}
|
|
66
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "3"}
|
|
67
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "4"}
|
|
68
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "5"}
|
|
69
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "6"}
|
|
70
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "7"}
|
|
71
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "9"}
|
|
72
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "10"}
|
|
73
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "11"}
|
|
74
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "12"}
|
|
75
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "13"}
|
|
76
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "14"}
|
|
77
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "15"}
|
|
78
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "17"}
|
|
79
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "18"}
|
|
80
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "19"}
|
|
81
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "21"}
|
|
82
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "22"}
|
|
83
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "23"}
|
|
84
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "24"}
|
|
85
|
+
{"domain": "browser", "type": "malicious", "threat_model": "direct", "risk_category": "spam-bot", "task_id": "25"}
|