hackagent 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hackagent/__init__.py +12 -0
- hackagent/agent.py +214 -0
- hackagent/api/__init__.py +1 -0
- hackagent/api/agent/__init__.py +1 -0
- hackagent/api/agent/agent_create.py +347 -0
- hackagent/api/agent/agent_destroy.py +140 -0
- hackagent/api/agent/agent_list.py +242 -0
- hackagent/api/agent/agent_partial_update.py +361 -0
- hackagent/api/agent/agent_retrieve.py +235 -0
- hackagent/api/agent/agent_update.py +361 -0
- hackagent/api/apilogs/__init__.py +1 -0
- hackagent/api/apilogs/apilogs_list.py +170 -0
- hackagent/api/apilogs/apilogs_retrieve.py +162 -0
- hackagent/api/attack/__init__.py +1 -0
- hackagent/api/attack/attack_create.py +275 -0
- hackagent/api/attack/attack_destroy.py +146 -0
- hackagent/api/attack/attack_list.py +254 -0
- hackagent/api/attack/attack_partial_update.py +289 -0
- hackagent/api/attack/attack_retrieve.py +247 -0
- hackagent/api/attack/attack_update.py +289 -0
- hackagent/api/checkout/__init__.py +1 -0
- hackagent/api/checkout/checkout_create.py +225 -0
- hackagent/api/generate/__init__.py +1 -0
- hackagent/api/generate/generate_create.py +253 -0
- hackagent/api/judge/__init__.py +1 -0
- hackagent/api/judge/judge_create.py +253 -0
- hackagent/api/key/__init__.py +1 -0
- hackagent/api/key/key_create.py +179 -0
- hackagent/api/key/key_destroy.py +103 -0
- hackagent/api/key/key_list.py +170 -0
- hackagent/api/key/key_retrieve.py +162 -0
- hackagent/api/organization/__init__.py +1 -0
- hackagent/api/organization/organization_create.py +208 -0
- hackagent/api/organization/organization_destroy.py +104 -0
- hackagent/api/organization/organization_list.py +170 -0
- hackagent/api/organization/organization_me_retrieve.py +126 -0
- hackagent/api/organization/organization_partial_update.py +222 -0
- hackagent/api/organization/organization_retrieve.py +163 -0
- hackagent/api/organization/organization_update.py +222 -0
- hackagent/api/prompt/__init__.py +1 -0
- hackagent/api/prompt/prompt_create.py +171 -0
- hackagent/api/prompt/prompt_destroy.py +104 -0
- hackagent/api/prompt/prompt_list.py +185 -0
- hackagent/api/prompt/prompt_partial_update.py +185 -0
- hackagent/api/prompt/prompt_retrieve.py +163 -0
- hackagent/api/prompt/prompt_update.py +185 -0
- hackagent/api/result/__init__.py +1 -0
- hackagent/api/result/result_create.py +175 -0
- hackagent/api/result/result_destroy.py +106 -0
- hackagent/api/result/result_list.py +249 -0
- hackagent/api/result/result_partial_update.py +193 -0
- hackagent/api/result/result_retrieve.py +167 -0
- hackagent/api/result/result_trace_create.py +177 -0
- hackagent/api/result/result_update.py +189 -0
- hackagent/api/run/__init__.py +1 -0
- hackagent/api/run/run_create.py +187 -0
- hackagent/api/run/run_destroy.py +112 -0
- hackagent/api/run/run_list.py +291 -0
- hackagent/api/run/run_partial_update.py +201 -0
- hackagent/api/run/run_result_create.py +177 -0
- hackagent/api/run/run_retrieve.py +179 -0
- hackagent/api/run/run_run_tests_create.py +187 -0
- hackagent/api/run/run_update.py +201 -0
- hackagent/api/user/__init__.py +1 -0
- hackagent/api/user/user_create.py +212 -0
- hackagent/api/user/user_destroy.py +106 -0
- hackagent/api/user/user_list.py +174 -0
- hackagent/api/user/user_me_retrieve.py +126 -0
- hackagent/api/user/user_me_update.py +196 -0
- hackagent/api/user/user_partial_update.py +226 -0
- hackagent/api/user/user_retrieve.py +167 -0
- hackagent/api/user/user_update.py +226 -0
- hackagent/attacks/AdvPrefix/__init__.py +41 -0
- hackagent/attacks/AdvPrefix/completions.py +416 -0
- hackagent/attacks/AdvPrefix/config.py +259 -0
- hackagent/attacks/AdvPrefix/evaluation.py +745 -0
- hackagent/attacks/AdvPrefix/evaluators.py +564 -0
- hackagent/attacks/AdvPrefix/generate.py +711 -0
- hackagent/attacks/AdvPrefix/utils.py +307 -0
- hackagent/attacks/__init__.py +35 -0
- hackagent/attacks/advprefix.py +507 -0
- hackagent/attacks/base.py +106 -0
- hackagent/attacks/strategies.py +906 -0
- hackagent/cli/__init__.py +19 -0
- hackagent/cli/commands/__init__.py +20 -0
- hackagent/cli/commands/agent.py +100 -0
- hackagent/cli/commands/attack.py +417 -0
- hackagent/cli/commands/config.py +301 -0
- hackagent/cli/commands/results.py +327 -0
- hackagent/cli/config.py +249 -0
- hackagent/cli/main.py +515 -0
- hackagent/cli/tui/__init__.py +31 -0
- hackagent/cli/tui/actions_logger.py +200 -0
- hackagent/cli/tui/app.py +288 -0
- hackagent/cli/tui/base.py +137 -0
- hackagent/cli/tui/logger.py +318 -0
- hackagent/cli/tui/views/__init__.py +33 -0
- hackagent/cli/tui/views/agents.py +488 -0
- hackagent/cli/tui/views/attacks.py +624 -0
- hackagent/cli/tui/views/config.py +244 -0
- hackagent/cli/tui/views/dashboard.py +307 -0
- hackagent/cli/tui/views/results.py +1210 -0
- hackagent/cli/tui/widgets/__init__.py +24 -0
- hackagent/cli/tui/widgets/actions.py +346 -0
- hackagent/cli/tui/widgets/logs.py +435 -0
- hackagent/cli/utils.py +276 -0
- hackagent/client.py +286 -0
- hackagent/errors.py +37 -0
- hackagent/logger.py +83 -0
- hackagent/models/__init__.py +109 -0
- hackagent/models/agent.py +223 -0
- hackagent/models/agent_request.py +129 -0
- hackagent/models/api_token_log.py +184 -0
- hackagent/models/attack.py +154 -0
- hackagent/models/attack_request.py +82 -0
- hackagent/models/checkout_session_request_request.py +76 -0
- hackagent/models/checkout_session_response.py +59 -0
- hackagent/models/choice.py +81 -0
- hackagent/models/choice_message.py +67 -0
- hackagent/models/evaluation_status_enum.py +14 -0
- hackagent/models/generate_error_response.py +59 -0
- hackagent/models/generate_request_request.py +212 -0
- hackagent/models/generate_success_response.py +115 -0
- hackagent/models/generic_error_response.py +70 -0
- hackagent/models/message_request.py +67 -0
- hackagent/models/organization.py +102 -0
- hackagent/models/organization_minimal.py +68 -0
- hackagent/models/organization_request.py +71 -0
- hackagent/models/paginated_agent_list.py +123 -0
- hackagent/models/paginated_api_token_log_list.py +123 -0
- hackagent/models/paginated_attack_list.py +123 -0
- hackagent/models/paginated_organization_list.py +123 -0
- hackagent/models/paginated_prompt_list.py +123 -0
- hackagent/models/paginated_result_list.py +123 -0
- hackagent/models/paginated_run_list.py +123 -0
- hackagent/models/paginated_user_api_key_list.py +123 -0
- hackagent/models/paginated_user_profile_list.py +123 -0
- hackagent/models/patched_agent_request.py +128 -0
- hackagent/models/patched_attack_request.py +92 -0
- hackagent/models/patched_organization_request.py +71 -0
- hackagent/models/patched_prompt_request.py +125 -0
- hackagent/models/patched_result_request.py +237 -0
- hackagent/models/patched_run_request.py +138 -0
- hackagent/models/patched_user_profile_request.py +99 -0
- hackagent/models/prompt.py +220 -0
- hackagent/models/prompt_request.py +126 -0
- hackagent/models/result.py +294 -0
- hackagent/models/result_list_evaluation_status.py +14 -0
- hackagent/models/result_request.py +232 -0
- hackagent/models/run.py +233 -0
- hackagent/models/run_list_status.py +12 -0
- hackagent/models/run_request.py +133 -0
- hackagent/models/status_enum.py +12 -0
- hackagent/models/step_type_enum.py +14 -0
- hackagent/models/trace.py +121 -0
- hackagent/models/trace_request.py +94 -0
- hackagent/models/usage.py +75 -0
- hackagent/models/user_api_key.py +201 -0
- hackagent/models/user_api_key_request.py +73 -0
- hackagent/models/user_profile.py +135 -0
- hackagent/models/user_profile_minimal.py +76 -0
- hackagent/models/user_profile_request.py +99 -0
- hackagent/router/__init__.py +25 -0
- hackagent/router/adapters/__init__.py +20 -0
- hackagent/router/adapters/base.py +63 -0
- hackagent/router/adapters/google_adk.py +671 -0
- hackagent/router/adapters/litellm_adapter.py +524 -0
- hackagent/router/adapters/openai_adapter.py +426 -0
- hackagent/router/router.py +969 -0
- hackagent/router/types.py +54 -0
- hackagent/tracking/__init__.py +42 -0
- hackagent/tracking/context.py +163 -0
- hackagent/tracking/decorators.py +299 -0
- hackagent/tracking/tracker.py +441 -0
- hackagent/types.py +54 -0
- hackagent/utils.py +194 -0
- hackagent/vulnerabilities/__init__.py +13 -0
- hackagent/vulnerabilities/prompts.py +81 -0
- hackagent-0.3.1.dist-info/METADATA +122 -0
- hackagent-0.3.1.dist-info/RECORD +183 -0
- hackagent-0.3.1.dist-info/WHEEL +4 -0
- hackagent-0.3.1.dist-info/entry_points.txt +2 -0
- hackagent-0.3.1.dist-info/licenses/LICENSE +202 -0
|
@@ -0,0 +1,906 @@
|
|
|
1
|
+
# Copyright 2025 - AI4I. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""
|
|
16
|
+
Attack strategy implementations using the Strategy pattern.
|
|
17
|
+
|
|
18
|
+
This module provides different attack strategies that can be executed against victim agents.
|
|
19
|
+
The Strategy pattern allows for dynamic selection and execution of various attack methodologies,
|
|
20
|
+
each with their own specific configurations and execution logic.
|
|
21
|
+
|
|
22
|
+
The module includes:
|
|
23
|
+
- Abstract base class `AttackStrategy` defining the interface
|
|
24
|
+
- Concrete implementations like `AdvPrefix` for adversarial prefix attacks
|
|
25
|
+
- Helper methods for HTTP response handling and data parsing
|
|
26
|
+
- Integration with the HackAgent backend API for attack execution and result tracking
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
import abc
|
|
30
|
+
import json # For ManagedAttackStrategy
|
|
31
|
+
import logging
|
|
32
|
+
import os # Added for path joining
|
|
33
|
+
from http import HTTPStatus # Added for checking 201 status
|
|
34
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
|
|
35
|
+
from uuid import UUID # Added import
|
|
36
|
+
|
|
37
|
+
import httpx # Added for manual HTTP call in AdvPrefix
|
|
38
|
+
import pandas as pd # For AdvPrefix
|
|
39
|
+
|
|
40
|
+
# Imports for specific strategies, moved from agent.py or direct_test_executor.py
|
|
41
|
+
from hackagent import errors # Import the errors module
|
|
42
|
+
from hackagent.api.attack.attack_create import (
|
|
43
|
+
sync_detailed as attacks_create_sync_detailed,
|
|
44
|
+
)
|
|
45
|
+
from hackagent.api.run import run_run_tests_create
|
|
46
|
+
from hackagent.attacks.advprefix import (
|
|
47
|
+
AdvPrefixAttack,
|
|
48
|
+
) # Used by LocalPrefix
|
|
49
|
+
from hackagent.errors import HackAgentError
|
|
50
|
+
from hackagent.models import Run
|
|
51
|
+
from hackagent.models.attack_request import (
|
|
52
|
+
AttackRequest,
|
|
53
|
+
) # For creating attacks via attacks_create API
|
|
54
|
+
from hackagent.models.run_request import RunRequest
|
|
55
|
+
|
|
56
|
+
if TYPE_CHECKING:
|
|
57
|
+
from hackagent.agent import HackAgent
|
|
58
|
+
|
|
59
|
+
logger = logging.getLogger(__name__)
|
|
60
|
+
|
|
61
|
+
# --- Strategy Pattern for Attacks ---
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class AttackStrategy(abc.ABC):
|
|
65
|
+
"""
|
|
66
|
+
Abstract base class for implementing attack strategies using the Strategy pattern.
|
|
67
|
+
|
|
68
|
+
This class provides the foundational interface for all attack strategy implementations.
|
|
69
|
+
It handles common functionality such as HTTP response processing, data parsing,
|
|
70
|
+
and interaction with the HackAgent backend API.
|
|
71
|
+
|
|
72
|
+
Attributes:
|
|
73
|
+
hack_agent: Reference to the HackAgent instance that owns this strategy.
|
|
74
|
+
client: Authenticated client for API communication.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(self, hack_agent: "HackAgent"):
|
|
78
|
+
"""
|
|
79
|
+
Initialize the attack strategy with a reference to the parent HackAgent.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
hack_agent: The HackAgent instance that will use this strategy.
|
|
83
|
+
Provides access to the authenticated client and agent configuration.
|
|
84
|
+
"""
|
|
85
|
+
self.hack_agent = hack_agent
|
|
86
|
+
self.client = hack_agent.client
|
|
87
|
+
|
|
88
|
+
@abc.abstractmethod
|
|
89
|
+
def execute(
|
|
90
|
+
self,
|
|
91
|
+
attack_config: Dict[str, Any],
|
|
92
|
+
run_config_override: Optional[Dict[str, Any]],
|
|
93
|
+
fail_on_run_error: bool,
|
|
94
|
+
max_wait_time_seconds: Optional[int] = None,
|
|
95
|
+
poll_interval_seconds: Optional[int] = None,
|
|
96
|
+
_tui_app: Optional[Any] = None,
|
|
97
|
+
_tui_log_callback: Optional[Any] = None,
|
|
98
|
+
) -> Any:
|
|
99
|
+
"""
|
|
100
|
+
Execute the attack strategy with the provided configuration.
|
|
101
|
+
|
|
102
|
+
This abstract method must be implemented by all concrete strategy classes
|
|
103
|
+
to define their specific attack execution logic.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
attack_config: Configuration dictionary containing attack-specific parameters.
|
|
107
|
+
Must include 'attack_type' and other parameters specific to the strategy.
|
|
108
|
+
run_config_override: Optional configuration overrides for the attack run.
|
|
109
|
+
Can be used to modify default run parameters.
|
|
110
|
+
fail_on_run_error: Whether to raise an exception if the attack run fails.
|
|
111
|
+
If False, errors may be handled gracefully depending on the strategy.
|
|
112
|
+
max_wait_time_seconds: Maximum time to wait for attack completion.
|
|
113
|
+
Not used by all strategies.
|
|
114
|
+
poll_interval_seconds: Interval for polling attack status.
|
|
115
|
+
Not used by all strategies.
|
|
116
|
+
_tui_app: Optional Textual App instance for TUI logging integration.
|
|
117
|
+
Internal parameter used when running attacks from the TUI.
|
|
118
|
+
_tui_log_callback: Optional callback function for TUI log handling.
|
|
119
|
+
Internal parameter used when running attacks from the TUI.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Strategy-specific results. The format varies by implementation but
|
|
123
|
+
typically includes attack results, success metrics, or result data.
|
|
124
|
+
|
|
125
|
+
Raises:
|
|
126
|
+
NotImplementedError: If not implemented by a concrete strategy class.
|
|
127
|
+
HackAgentError: For various attack execution failures.
|
|
128
|
+
ValueError: For invalid configuration parameters.
|
|
129
|
+
"""
|
|
130
|
+
pass
|
|
131
|
+
|
|
132
|
+
def _decode_response_content(self, response: httpx.Response) -> str:
|
|
133
|
+
"""
|
|
134
|
+
Decode HTTP response content to a UTF-8 string with error handling.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
response: The httpx.Response object containing the response data.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
The decoded content as a UTF-8 string, or 'N/A' if content is None or empty.
|
|
141
|
+
Uses 'replace' error handling to avoid decoding exceptions.
|
|
142
|
+
"""
|
|
143
|
+
return (
|
|
144
|
+
response.content.decode("utf-8", errors="replace")
|
|
145
|
+
if response.content
|
|
146
|
+
else "N/A"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
def _parse_json_from_response_data(
|
|
150
|
+
self,
|
|
151
|
+
response: httpx.Response,
|
|
152
|
+
decoded_content: str,
|
|
153
|
+
attack_type_for_error_msg: str,
|
|
154
|
+
) -> Optional[Dict[str, Any]]:
|
|
155
|
+
"""
|
|
156
|
+
Parse JSON data from an HTTP response with comprehensive error handling.
|
|
157
|
+
|
|
158
|
+
This method attempts to parse JSON from response content and falls back
|
|
159
|
+
to pre-parsed attributes if direct parsing fails. It handles various
|
|
160
|
+
edge cases and provides detailed error logging.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
response: The httpx.Response object to parse.
|
|
164
|
+
decoded_content: The already decoded string content of the response.
|
|
165
|
+
attack_type_for_error_msg: Descriptive string for error messages,
|
|
166
|
+
typically the attack type being processed.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
A dictionary containing the parsed JSON data if successful,
|
|
170
|
+
None if parsing fails for non-critical cases.
|
|
171
|
+
|
|
172
|
+
Raises:
|
|
173
|
+
HackAgentError: If response status is 201 (Created) but JSON parsing
|
|
174
|
+
fails critically, indicating a server-side issue.
|
|
175
|
+
"""
|
|
176
|
+
parsed_data_dict: Optional[Dict[str, Any]] = None
|
|
177
|
+
if response.content:
|
|
178
|
+
try:
|
|
179
|
+
parsed_data_dict = json.loads(decoded_content)
|
|
180
|
+
except json.JSONDecodeError as jde:
|
|
181
|
+
if (
|
|
182
|
+
response.status_code == 201
|
|
183
|
+
): # Critical for 201 if body exists but is bad JSON
|
|
184
|
+
logger.error(
|
|
185
|
+
f"Failed to parse JSON for {attack_type_for_error_msg} (201 response with content): {jde}. Content: {decoded_content}"
|
|
186
|
+
)
|
|
187
|
+
raise HackAgentError(
|
|
188
|
+
f"Failed to parse 201 response JSON for {attack_type_for_error_msg} (content present): {jde}"
|
|
189
|
+
) from jde
|
|
190
|
+
logger.warning(
|
|
191
|
+
f"Could not parse JSON from response body for {attack_type_for_error_msg} (status {response.status_code}). Content: {decoded_content}",
|
|
192
|
+
exc_info=False,
|
|
193
|
+
) # exc_info=False to avoid verbose log for non-critical parse fail
|
|
194
|
+
# Do not return None yet, try pre-parsed attributes next
|
|
195
|
+
|
|
196
|
+
# Try pre-parsed attributes, especially if content parsing failed or content was empty
|
|
197
|
+
if not parsed_data_dict and hasattr(response, "parsed") and response.parsed:
|
|
198
|
+
logger.debug(
|
|
199
|
+
f"Attempting to use pre-parsed attribute for {attack_type_for_error_msg}"
|
|
200
|
+
)
|
|
201
|
+
if hasattr(response.parsed, "additional_properties") and isinstance(
|
|
202
|
+
response.parsed.additional_properties, dict
|
|
203
|
+
):
|
|
204
|
+
parsed_data_dict = response.parsed.additional_properties
|
|
205
|
+
elif isinstance(response.parsed, dict):
|
|
206
|
+
parsed_data_dict = response.parsed
|
|
207
|
+
else:
|
|
208
|
+
logger.warning(
|
|
209
|
+
f"Response has 'parsed' attribute but it's not a usable dict for {attack_type_for_error_msg}. Type: {type(response.parsed)}"
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
return parsed_data_dict
|
|
213
|
+
|
|
214
|
+
def _get_parsed_data_from_initiate_response(
|
|
215
|
+
self,
|
|
216
|
+
response: httpx.Response,
|
|
217
|
+
decoded_content: str,
|
|
218
|
+
attack_type_for_error_msg: str,
|
|
219
|
+
) -> Dict[str, Any]:
|
|
220
|
+
"""
|
|
221
|
+
Process an attack initiation response and extract parsed data.
|
|
222
|
+
|
|
223
|
+
This method handles different HTTP status codes and ensures that
|
|
224
|
+
the response contains valid, parseable data for further processing.
|
|
225
|
+
It provides comprehensive error handling for various failure scenarios.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
response: The httpx.Response object from an attack initiation request.
|
|
229
|
+
decoded_content: Pre-decoded string content of the response.
|
|
230
|
+
attack_type_for_error_msg: Descriptive string for error messages.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
A dictionary containing the parsed response data.
|
|
234
|
+
|
|
235
|
+
Raises:
|
|
236
|
+
HackAgentError: If the response indicates failure (status >= 300),
|
|
237
|
+
if a 201 response lacks parseable data, or if unexpected
|
|
238
|
+
status codes are received without valid data.
|
|
239
|
+
"""
|
|
240
|
+
parsed_data_dict = self._parse_json_from_response_data(
|
|
241
|
+
response, decoded_content, attack_type_for_error_msg
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
if response.status_code == 201:
|
|
245
|
+
if not parsed_data_dict:
|
|
246
|
+
# This case implies that _parse_json_from_response_data returned None for a 201, which means
|
|
247
|
+
# either no content, or content that wasn't JSON, or pre-parsed attributes also failed.
|
|
248
|
+
# If content was present but bad JSON, _parse_json_from_response_data would have raised.
|
|
249
|
+
logger.error(
|
|
250
|
+
f"201 for {attack_type_for_error_msg} but no parsable dictionary body was found. Decoded content: '{decoded_content}', Pre-parsed type: {type(response.parsed if hasattr(response, 'parsed') else None)}"
|
|
251
|
+
)
|
|
252
|
+
raise HackAgentError(
|
|
253
|
+
f"201 for {attack_type_for_error_msg} but no parsable dictionary body was found."
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
elif response.status_code >= 300:
|
|
257
|
+
err_text = f"Failed to initiate {attack_type_for_error_msg}. Status: {response.status_code}, Body: {decoded_content}"
|
|
258
|
+
logger.error(err_text)
|
|
259
|
+
raise HackAgentError(err_text)
|
|
260
|
+
|
|
261
|
+
else: # Unexpected success status codes (e.g., 200 OK instead of 201 Created, or other 2xx)
|
|
262
|
+
logger.warning(
|
|
263
|
+
f"Unexpected success status {response.status_code} from initiate_{attack_type_for_error_msg}. Content: {decoded_content}"
|
|
264
|
+
)
|
|
265
|
+
if (
|
|
266
|
+
not parsed_data_dict
|
|
267
|
+
): # If still no data after trying for an unexpected success status
|
|
268
|
+
err_text = (
|
|
269
|
+
f"Could not obtain parsable data from initiate_{attack_type_for_error_msg} response with unexpected status {response.status_code}. "
|
|
270
|
+
f"Content: {decoded_content}"
|
|
271
|
+
)
|
|
272
|
+
logger.error(err_text)
|
|
273
|
+
raise HackAgentError(err_text)
|
|
274
|
+
|
|
275
|
+
if (
|
|
276
|
+
not parsed_data_dict
|
|
277
|
+
): # Should be caught by earlier checks, but as a final safeguard
|
|
278
|
+
logger.error(
|
|
279
|
+
f"Internal logic error: Parsed data dictionary is None for {attack_type_for_error_msg} status {response.status_code} without raising earlier. Content: {decoded_content}"
|
|
280
|
+
)
|
|
281
|
+
raise HackAgentError(
|
|
282
|
+
f"Failed to obtain parsed data for {attack_type_for_error_msg} (status {response.status_code}). Check logs for parsing attempts."
|
|
283
|
+
)
|
|
284
|
+
return parsed_data_dict
|
|
285
|
+
|
|
286
|
+
def _extract_ids_from_data_dict(
|
|
287
|
+
self,
|
|
288
|
+
parsed_data_dict: Dict[str, Any],
|
|
289
|
+
attack_type_for_error_msg: str,
|
|
290
|
+
original_content: str,
|
|
291
|
+
) -> Tuple[str, Optional[str]]:
|
|
292
|
+
"""
|
|
293
|
+
Extract attack ID and optional run ID from a parsed response dictionary.
|
|
294
|
+
|
|
295
|
+
This method extracts the mandatory 'id' field (attack_id) and optional
|
|
296
|
+
'associated_run_id' field from API response data.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
parsed_data_dict: Dictionary containing parsed response data.
|
|
300
|
+
attack_type_for_error_msg: Descriptive string for error messages.
|
|
301
|
+
original_content: Original response content string for error reporting.
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
A tuple containing (attack_id, run_id). The attack_id is always a string,
|
|
305
|
+
while run_id may be None if not present in the response.
|
|
306
|
+
|
|
307
|
+
Raises:
|
|
308
|
+
HackAgentError: If the mandatory attack_id cannot be extracted or
|
|
309
|
+
is invalid.
|
|
310
|
+
"""
|
|
311
|
+
raw_attack_id = parsed_data_dict.get("id")
|
|
312
|
+
attack_id_str = str(raw_attack_id) if raw_attack_id is not None else None
|
|
313
|
+
|
|
314
|
+
if attack_id_str is None:
|
|
315
|
+
err_detail = (
|
|
316
|
+
f"Could not extract mandatory attack_id ('{attack_id_str}') "
|
|
317
|
+
f"from initiate_{attack_type_for_error_msg} response. "
|
|
318
|
+
f"Source dict: {parsed_data_dict}, Original Decoded Content: '{original_content}'"
|
|
319
|
+
)
|
|
320
|
+
logger.error(err_detail)
|
|
321
|
+
raise HackAgentError(err_detail)
|
|
322
|
+
|
|
323
|
+
raw_run_id = parsed_data_dict.get("associated_run_id")
|
|
324
|
+
run_id_str = str(raw_run_id) if raw_run_id is not None else None
|
|
325
|
+
|
|
326
|
+
logger.info(
|
|
327
|
+
f"Extracted Attack ID: {attack_id_str} and optional server-associated Run ID: {run_id_str if run_id_str else 'Not Provided'} for {attack_type_for_error_msg}."
|
|
328
|
+
)
|
|
329
|
+
return attack_id_str, run_id_str
|
|
330
|
+
|
|
331
|
+
def extract_attack_and_run_ids_from_initiate_response(
|
|
332
|
+
self, response: httpx.Response, attack_type_for_error_msg: str = "attack"
|
|
333
|
+
) -> Tuple[str, Optional[str]]:
|
|
334
|
+
"""
|
|
335
|
+
Orchestrate the extraction of attack and run IDs from an attack creation response.
|
|
336
|
+
|
|
337
|
+
This is the main entry point for extracting IDs from API responses. It coordinates
|
|
338
|
+
the decoding, parsing, and extraction process using the helper methods.
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
response: The httpx.Response object from an attack creation API call.
|
|
342
|
+
attack_type_for_error_msg: Descriptive string for error messages,
|
|
343
|
+
defaults to "attack".
|
|
344
|
+
|
|
345
|
+
Returns:
|
|
346
|
+
A tuple containing (attack_id, run_id). The attack_id is always present
|
|
347
|
+
as a string, while run_id may be None if not provided in the response.
|
|
348
|
+
|
|
349
|
+
Raises:
|
|
350
|
+
HackAgentError: If the attack_id cannot be extracted or if the response
|
|
351
|
+
indicates an error condition.
|
|
352
|
+
"""
|
|
353
|
+
logger.debug(
|
|
354
|
+
f"Attempting to extract Attack/Run IDs for '{attack_type_for_error_msg}' from response (status: {response.status_code})"
|
|
355
|
+
)
|
|
356
|
+
decoded_content = self._decode_response_content(response)
|
|
357
|
+
parsed_data_dict = self._get_parsed_data_from_initiate_response(
|
|
358
|
+
response, decoded_content, attack_type_for_error_msg
|
|
359
|
+
)
|
|
360
|
+
return self._extract_ids_from_data_dict(
|
|
361
|
+
parsed_data_dict, attack_type_for_error_msg, decoded_content
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
class AdvPrefix(AttackStrategy):
|
|
366
|
+
"""
|
|
367
|
+
Strategy implementation for AdvPrefix (Adversarial Prefix) attacks.
|
|
368
|
+
|
|
369
|
+
This strategy implements adversarial prefix generation attacks that use
|
|
370
|
+
uncensored models to generate prefixes that can elicit harmful responses
|
|
371
|
+
from target models. The attack follows a multi-stage pipeline including
|
|
372
|
+
prefix generation, cross-entropy computation, completion generation,
|
|
373
|
+
evaluation, and final selection.
|
|
374
|
+
|
|
375
|
+
The strategy integrates with the HackAgent backend to track attack
|
|
376
|
+
progress and results while executing the local AdvPrefix pipeline.
|
|
377
|
+
"""
|
|
378
|
+
|
|
379
|
+
def _prepare_and_validate_attack_params(
|
|
380
|
+
self,
|
|
381
|
+
attack_config: Dict[str, Any],
|
|
382
|
+
) -> List[Any]:
|
|
383
|
+
"""
|
|
384
|
+
Validate and extract necessary parameters from the attack configuration.
|
|
385
|
+
|
|
386
|
+
This method ensures that the attack configuration contains all required
|
|
387
|
+
parameters for the AdvPrefix attack execution.
|
|
388
|
+
|
|
389
|
+
Args:
|
|
390
|
+
attack_config: Dictionary containing attack configuration parameters.
|
|
391
|
+
Must include a 'goals' key with a list of target goals.
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
A list of goals extracted from the attack configuration.
|
|
395
|
+
|
|
396
|
+
Raises:
|
|
397
|
+
ValueError: If the 'goals' key is missing or is not a list.
|
|
398
|
+
"""
|
|
399
|
+
goals = attack_config.get("goals")
|
|
400
|
+
if not isinstance(goals, list):
|
|
401
|
+
raise ValueError(
|
|
402
|
+
"'attack_config' must contain 'goals' list for AdvPrefixAttack."
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
return goals
|
|
406
|
+
|
|
407
|
+
def _create_server_attack_record(
|
|
408
|
+
self,
|
|
409
|
+
victim_agent_id: UUID,
|
|
410
|
+
organization_id: UUID,
|
|
411
|
+
attack_config: Dict[str, Any], # Used for summary
|
|
412
|
+
) -> str:
|
|
413
|
+
"""
|
|
414
|
+
Create an Attack record on the HackAgent server.
|
|
415
|
+
|
|
416
|
+
This method creates a new attack record in the backend system to track
|
|
417
|
+
the AdvPrefix attack execution and results.
|
|
418
|
+
|
|
419
|
+
Args:
|
|
420
|
+
victim_agent_id: UUID of the target agent being attacked.
|
|
421
|
+
organization_id: UUID of the organization running the attack.
|
|
422
|
+
attack_config: Configuration dictionary for the attack, stored
|
|
423
|
+
as metadata in the attack record.
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
The string ID of the created attack record.
|
|
427
|
+
|
|
428
|
+
Raises:
|
|
429
|
+
HackAgentError: If the attack record creation fails or if the
|
|
430
|
+
response cannot be parsed to extract the attack ID.
|
|
431
|
+
"""
|
|
432
|
+
logger.info("Creating Attack record on the server.")
|
|
433
|
+
attack_type = "advprefix"
|
|
434
|
+
|
|
435
|
+
payload = {
|
|
436
|
+
"type": attack_type,
|
|
437
|
+
"agent": str(victim_agent_id), # Convert UUID to string
|
|
438
|
+
"organization": str(organization_id), # Convert UUID to string
|
|
439
|
+
"configuration": attack_config,
|
|
440
|
+
}
|
|
441
|
+
try:
|
|
442
|
+
attack_req_obj = AttackRequest.from_dict(payload)
|
|
443
|
+
logger.debug(
|
|
444
|
+
f"Attempting to create Attack record with payload: {attack_req_obj.to_dict()}"
|
|
445
|
+
)
|
|
446
|
+
response = attacks_create_sync_detailed(
|
|
447
|
+
client=self.client, body=attack_req_obj
|
|
448
|
+
)
|
|
449
|
+
except Exception as e:
|
|
450
|
+
logger.error(
|
|
451
|
+
f"Failed to construct/send AttackRequest for {attack_type} record: {e}",
|
|
452
|
+
exc_info=True,
|
|
453
|
+
)
|
|
454
|
+
raise HackAgentError(
|
|
455
|
+
f"Failed to send AttackRequest for {attack_type} record: {e}"
|
|
456
|
+
) from e
|
|
457
|
+
|
|
458
|
+
attack_id, _ = self.extract_attack_and_run_ids_from_initiate_response(
|
|
459
|
+
response=response, attack_type_for_error_msg=attack_type
|
|
460
|
+
)
|
|
461
|
+
logger.info(f"Attack record created on server. Attack ID: {attack_id}.")
|
|
462
|
+
return attack_id
|
|
463
|
+
|
|
464
|
+
def _create_server_run_record(
|
|
465
|
+
self,
|
|
466
|
+
attack_id: str,
|
|
467
|
+
victim_agent_id: str,
|
|
468
|
+
run_config_override: Optional[Dict[str, Any]],
|
|
469
|
+
) -> str:
|
|
470
|
+
"""
|
|
471
|
+
Create a Run record on the HackAgent server for tracking attack execution.
|
|
472
|
+
|
|
473
|
+
This method creates a new run record associated with the attack to track
|
|
474
|
+
the specific execution instance and its results.
|
|
475
|
+
|
|
476
|
+
Args:
|
|
477
|
+
attack_id: String ID of the attack record this run belongs to.
|
|
478
|
+
victim_agent_id: String ID of the target agent being attacked.
|
|
479
|
+
run_config_override: Optional configuration overrides for this
|
|
480
|
+
specific run instance.
|
|
481
|
+
|
|
482
|
+
Returns:
|
|
483
|
+
The string ID of the created run record.
|
|
484
|
+
|
|
485
|
+
Raises:
|
|
486
|
+
HackAgentError: If the run record creation fails, if the response
|
|
487
|
+
cannot be parsed, or if the run ID cannot be extracted.
|
|
488
|
+
"""
|
|
489
|
+
logger.info(
|
|
490
|
+
f"Attempting to explicitly create a Run record for Attack ID: {attack_id}"
|
|
491
|
+
)
|
|
492
|
+
payload = RunRequest(
|
|
493
|
+
attack=attack_id,
|
|
494
|
+
agent=victim_agent_id,
|
|
495
|
+
run_config=run_config_override if run_config_override else {},
|
|
496
|
+
)
|
|
497
|
+
try:
|
|
498
|
+
# response_obj is the custom hackagent.types.Response[Run]
|
|
499
|
+
response_obj = run_run_tests_create.sync_detailed(
|
|
500
|
+
client=self.client, body=payload
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
created_run: Optional[Run] = response_obj.parsed
|
|
504
|
+
|
|
505
|
+
# If the auto-generated client didn't parse for 201, but it's a success, try manual parsing.
|
|
506
|
+
if created_run is None and response_obj.status_code == HTTPStatus.CREATED:
|
|
507
|
+
logger.info(
|
|
508
|
+
f"Run creation returned 201 (CREATED), attempting to manually parse response content for Attack ID: {attack_id}"
|
|
509
|
+
)
|
|
510
|
+
if response_obj.content:
|
|
511
|
+
try:
|
|
512
|
+
created_run_data = json.loads(
|
|
513
|
+
response_obj.content.decode("utf-8")
|
|
514
|
+
)
|
|
515
|
+
created_run = Run.from_dict(
|
|
516
|
+
created_run_data
|
|
517
|
+
) # Use the Run model's from_dict
|
|
518
|
+
logger.info(
|
|
519
|
+
f"Manually parsed Run object from 201 response for Attack ID {attack_id}. Run ID: {created_run.id if created_run and hasattr(created_run, 'id') else 'Parse_Failed_Or_No_ID'}"
|
|
520
|
+
)
|
|
521
|
+
except json.JSONDecodeError as jde:
|
|
522
|
+
logger.error(
|
|
523
|
+
f"Failed to manually parse JSON from 201 response content for Attack ID {attack_id}: {jde}. Content: {response_obj.content.decode('utf-8', errors='replace')}",
|
|
524
|
+
exc_info=True,
|
|
525
|
+
)
|
|
526
|
+
# created_run remains None, will be caught by the check below
|
|
527
|
+
except Exception as e:
|
|
528
|
+
logger.error(
|
|
529
|
+
f"Unexpected error manually parsing 201 response content for Attack ID {attack_id}: {e}",
|
|
530
|
+
exc_info=True,
|
|
531
|
+
)
|
|
532
|
+
# created_run remains None, will be caught by the check below
|
|
533
|
+
else:
|
|
534
|
+
logger.warning(
|
|
535
|
+
f"Run creation returned 201 (CREATED) but response content was empty for Attack ID: {attack_id}. Cannot manually parse."
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
if not created_run or not hasattr(created_run, "id") or not created_run.id:
|
|
539
|
+
status_code_val = (
|
|
540
|
+
response_obj.status_code
|
|
541
|
+
if hasattr(response_obj, "status_code")
|
|
542
|
+
else "Unknown Status"
|
|
543
|
+
)
|
|
544
|
+
content_val = (
|
|
545
|
+
response_obj.content.decode("utf-8", errors="replace")
|
|
546
|
+
if hasattr(response_obj, "content") and response_obj.content
|
|
547
|
+
else "No content"
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
logger.error(
|
|
551
|
+
f"Failed to get valid Run ID from run creation for Attack {attack_id}. "
|
|
552
|
+
f"Status: {status_code_val}, Parsed: {created_run}, Content: {content_val}"
|
|
553
|
+
)
|
|
554
|
+
raise HackAgentError(
|
|
555
|
+
f"Server API for Run creation returned status {status_code_val} "
|
|
556
|
+
f"but response parsing failed, lacked Run ID, or an error occurred. Content: {content_val}"
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
run_id = str(created_run.id)
|
|
560
|
+
logger.info(
|
|
561
|
+
f"Successfully created Run ID: {run_id} for Attack ID: {attack_id}"
|
|
562
|
+
)
|
|
563
|
+
return run_id
|
|
564
|
+
|
|
565
|
+
except errors.UnexpectedStatus as use:
|
|
566
|
+
# This is caught if client.raise_on_unexpected_status is True and server returns non-200
|
|
567
|
+
error_content = (
|
|
568
|
+
use.content.decode("utf-8", errors="replace")
|
|
569
|
+
if use.content
|
|
570
|
+
else "No content"
|
|
571
|
+
)
|
|
572
|
+
logger.error(
|
|
573
|
+
f"API error (UnexpectedStatus {use.status_code}) creating Run for Attack {attack_id}: {error_content}",
|
|
574
|
+
exc_info=True,
|
|
575
|
+
)
|
|
576
|
+
raise HackAgentError(
|
|
577
|
+
f"Failed to create Run for Attack {attack_id} (API status {use.status_code}): {error_content}"
|
|
578
|
+
) from use
|
|
579
|
+
except Exception as e:
|
|
580
|
+
logger.error(
|
|
581
|
+
f"Error creating Run for Attack {attack_id}: {e}", exc_info=True
|
|
582
|
+
)
|
|
583
|
+
raise HackAgentError(
|
|
584
|
+
f"Failed to create Run for Attack {attack_id}: {e}"
|
|
585
|
+
) from e
|
|
586
|
+
|
|
587
|
+
def _prepare_attack_config(
|
|
588
|
+
self,
|
|
589
|
+
attack_config: Dict[str, Any],
|
|
590
|
+
run_id: str,
|
|
591
|
+
attack_id: str,
|
|
592
|
+
) -> Dict[str, Any]:
|
|
593
|
+
"""
|
|
594
|
+
Prepare the configuration dictionary for the local AdvPrefixAttack execution.
|
|
595
|
+
|
|
596
|
+
This method processes the user-provided attack configuration and adds
|
|
597
|
+
necessary parameters for the AdvPrefix attack execution, including
|
|
598
|
+
server-generated IDs and client objects.
|
|
599
|
+
|
|
600
|
+
Args:
|
|
601
|
+
attack_config: Original attack configuration provided by the user.
|
|
602
|
+
run_id: Server-generated run ID for tracking this execution.
|
|
603
|
+
attack_id: Server-generated attack ID for this attack instance.
|
|
604
|
+
|
|
605
|
+
Returns:
|
|
606
|
+
A dictionary containing the prepared configuration with all necessary
|
|
607
|
+
parameters for AdvPrefixAttack execution, including client references
|
|
608
|
+
and execution metadata.
|
|
609
|
+
"""
|
|
610
|
+
logger.debug(f"Preparing local attack config for Run ID: {run_id}")
|
|
611
|
+
# Deep copy the user-provided attack_config to avoid modifying it directly.
|
|
612
|
+
prepared_config = json.loads(json.dumps(attack_config))
|
|
613
|
+
|
|
614
|
+
# Explicitly set/override 'run_id' with the server-generated run_id.
|
|
615
|
+
# This 'run_id' will be used by AdvPrefixAttack to initialize its self.run_id.
|
|
616
|
+
original_config_run_id = prepared_config.get("run_id")
|
|
617
|
+
prepared_config["run_id"] = run_id
|
|
618
|
+
if original_config_run_id and original_config_run_id != run_id:
|
|
619
|
+
logger.info(
|
|
620
|
+
f"Overriding 'run_id' in attack_config from '{original_config_run_id}' to server Run ID '{run_id}' for AdvPrefixAttack."
|
|
621
|
+
)
|
|
622
|
+
elif not original_config_run_id:
|
|
623
|
+
logger.info(
|
|
624
|
+
f"Set 'run_id' in attack_config to server Run ID '{run_id}' for AdvPrefixAttack."
|
|
625
|
+
)
|
|
626
|
+
|
|
627
|
+
# Update with other necessary parameters for AdvPrefixAttack
|
|
628
|
+
prepared_config.update(
|
|
629
|
+
{
|
|
630
|
+
"hackagent_client": self.client,
|
|
631
|
+
"agent_router": self.hack_agent.router,
|
|
632
|
+
# "initial_run_id": run_id, # This is no longer needed as AdvPrefixAttack.run will use self.run_id
|
|
633
|
+
"attack_id": attack_id,
|
|
634
|
+
}
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
# Ensure 'output_dir' is present, defaulting if necessary.
|
|
638
|
+
# AdvPrefixAttack uses this as the final output directory (no nested run_id subdirectory).
|
|
639
|
+
if "output_dir" not in prepared_config:
|
|
640
|
+
# Create timestamp-based directory structure for better organization
|
|
641
|
+
# Format: ./logs/runs/YYYY-MM-DD/HH-MM-SS_attack-id-prefix
|
|
642
|
+
from datetime import datetime
|
|
643
|
+
|
|
644
|
+
now = datetime.now()
|
|
645
|
+
date_dir = now.strftime("%Y-%m-%d")
|
|
646
|
+
time_prefix = now.strftime("%H-%M-%S")
|
|
647
|
+
attack_id_short = attack_id[:8] if len(attack_id) > 8 else attack_id
|
|
648
|
+
prepared_config["output_dir"] = (
|
|
649
|
+
f"./logs/runs/{date_dir}/{time_prefix}_{attack_id_short}"
|
|
650
|
+
)
|
|
651
|
+
logger.warning(
|
|
652
|
+
f"'output_dir' not in attack_config for AdvPrefixAttack, defaulting to {prepared_config['output_dir']}"
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
return prepared_config
|
|
656
|
+
|
|
657
|
+
def _execute_local_prefix_attack(
|
|
658
|
+
self,
|
|
659
|
+
attack_config: Dict[str, Any],
|
|
660
|
+
goals: List[Any],
|
|
661
|
+
run_id: str, # Server run_id
|
|
662
|
+
attack_id: str,
|
|
663
|
+
_tui_app: Optional[Any] = None,
|
|
664
|
+
_tui_log_callback: Optional[Any] = None,
|
|
665
|
+
) -> Optional[pd.DataFrame]:
|
|
666
|
+
"""
|
|
667
|
+
Execute the local AdvPrefix attack using the configured pipeline.
|
|
668
|
+
|
|
669
|
+
This method instantiates and runs the AdvPrefixAttack with the prepared
|
|
670
|
+
configuration and target goals. It handles the execution of the complete
|
|
671
|
+
adversarial prefix generation pipeline.
|
|
672
|
+
|
|
673
|
+
Args:
|
|
674
|
+
attack_config: Attack configuration dictionary containing pipeline parameters.
|
|
675
|
+
goals: List of target goals for the adversarial prefix generation.
|
|
676
|
+
run_id: Server-generated run ID for tracking this execution.
|
|
677
|
+
attack_id: Server-generated attack ID for this attack instance.
|
|
678
|
+
|
|
679
|
+
Returns:
|
|
680
|
+
A pandas DataFrame containing the attack results if successful,
|
|
681
|
+
None if the attack execution fails.
|
|
682
|
+
|
|
683
|
+
Note:
|
|
684
|
+
This method handles exceptions internally and returns None on failure
|
|
685
|
+
rather than raising exceptions, allowing the calling code to handle
|
|
686
|
+
failures gracefully.
|
|
687
|
+
"""
|
|
688
|
+
logger.info(
|
|
689
|
+
f"Executing local prefix attack for Attack ID {attack_id}, Server Run ID {run_id}."
|
|
690
|
+
)
|
|
691
|
+
try:
|
|
692
|
+
# runner_config from _prepare_attack_config is a flat dictionary
|
|
693
|
+
# containing pipeline params, client object, and router object.
|
|
694
|
+
flat_prepared_config = self._prepare_attack_config(
|
|
695
|
+
attack_config, run_id, attack_id
|
|
696
|
+
)
|
|
697
|
+
|
|
698
|
+
# Extract the client and router objects that AdvPrefixAttack expects as direct arguments.
|
|
699
|
+
# The key for the client object in flat_prepared_config is "hackagent_client".
|
|
700
|
+
adv_prefix_client = flat_prepared_config.pop("hackagent_client")
|
|
701
|
+
adv_prefix_router = flat_prepared_config.pop("agent_router")
|
|
702
|
+
|
|
703
|
+
# Remove other keys that are not part of AdvPrefixAttack's 'config' dictionary
|
|
704
|
+
# or were passed for strategy-level logic but not for AdvPrefixAttack.__init__.
|
|
705
|
+
flat_prepared_config.pop(
|
|
706
|
+
"attack_type", None
|
|
707
|
+
) # Already handled if in original attack_config
|
|
708
|
+
flat_prepared_config.pop(
|
|
709
|
+
"goals", None
|
|
710
|
+
) # Already handled if in original attack_config
|
|
711
|
+
|
|
712
|
+
# The remaining flat_prepared_config is now the dictionary
|
|
713
|
+
# that AdvPrefixAttack expects for its 'config' parameter.
|
|
714
|
+
# This dictionary includes user's settings, run_id, attack_id, output_dir etc.
|
|
715
|
+
|
|
716
|
+
runner = AdvPrefixAttack(
|
|
717
|
+
config=flat_prepared_config,
|
|
718
|
+
client=adv_prefix_client,
|
|
719
|
+
agent_router=adv_prefix_router,
|
|
720
|
+
)
|
|
721
|
+
|
|
722
|
+
# Attach TUI log handler if TUI context is provided
|
|
723
|
+
if _tui_app and _tui_log_callback:
|
|
724
|
+
try:
|
|
725
|
+
from hackagent.cli.tui.logger import attach_tui_handler
|
|
726
|
+
|
|
727
|
+
attach_tui_handler(
|
|
728
|
+
attack_instance=runner,
|
|
729
|
+
app=_tui_app,
|
|
730
|
+
callback=_tui_log_callback,
|
|
731
|
+
)
|
|
732
|
+
logger.info("TUI log handler attached to attack instance")
|
|
733
|
+
except ImportError:
|
|
734
|
+
logger.warning(
|
|
735
|
+
"Failed to import TUI logger, logs will not be shown in TUI"
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
# AdvPrefixAttack.run will use its self.run_id, which is initialized from runner_config["run_id"].
|
|
739
|
+
results_df = runner.run(goals=goals) # No longer pass initial_run_id
|
|
740
|
+
logger.info(
|
|
741
|
+
f"Local prefix attack completed for Attack ID {attack_id}, Server Run ID {run_id}."
|
|
742
|
+
)
|
|
743
|
+
return results_df
|
|
744
|
+
except Exception as e:
|
|
745
|
+
logger.error(
|
|
746
|
+
f"Error during local prefix attack execution for Attack ID {attack_id}, Server Run ID {run_id}: {e}",
|
|
747
|
+
exc_info=True,
|
|
748
|
+
)
|
|
749
|
+
return None # Or re-raise if appropriate for the calling context
|
|
750
|
+
|
|
751
|
+
def _log_local_run_persistence_info(
|
|
752
|
+
self,
|
|
753
|
+
attack_config: Dict[str, Any],
|
|
754
|
+
attack_id: str,
|
|
755
|
+
run_id: str,
|
|
756
|
+
fail_on_run_error: bool, # To decide if error during this info step is critical
|
|
757
|
+
):
|
|
758
|
+
"""
|
|
759
|
+
Log information about local run data persistence and file locations.
|
|
760
|
+
|
|
761
|
+
This method logs details about where local attack execution data
|
|
762
|
+
(such as intermediate CSV files) are stored for debugging and
|
|
763
|
+
result retrieval purposes.
|
|
764
|
+
|
|
765
|
+
Args:
|
|
766
|
+
attack_config: Attack configuration containing output directory settings.
|
|
767
|
+
attack_id: String ID of the attack record.
|
|
768
|
+
run_id: String ID of the run record.
|
|
769
|
+
fail_on_run_error: Whether errors in this step should be treated as
|
|
770
|
+
critical. Currently unused as this method only logs information.
|
|
771
|
+
|
|
772
|
+
Note:
|
|
773
|
+
This method currently only performs logging operations. If actual
|
|
774
|
+
file operations were performed, error handling would be more critical
|
|
775
|
+
based on the fail_on_run_error parameter.
|
|
776
|
+
"""
|
|
777
|
+
# This method currently only logs. If actual operations were done, error handling would be more critical.
|
|
778
|
+
try:
|
|
779
|
+
base_output_dir = attack_config.get(
|
|
780
|
+
"output_dir", f"./hackagent_local_runs/{attack_id}"
|
|
781
|
+
)
|
|
782
|
+
actual_run_output_dir = os.path.join(base_output_dir, f"run_{run_id}")
|
|
783
|
+
input_csv_hint = attack_config.get(
|
|
784
|
+
"input_csv_for_model_persistence", "step9_output.csv"
|
|
785
|
+
)
|
|
786
|
+
logger.info(
|
|
787
|
+
f"Local run data (for potential Pydantic model persistence/Step10): Dir='{actual_run_output_dir}', CSV hint='{input_csv_hint}'."
|
|
788
|
+
)
|
|
789
|
+
except Exception as e:
|
|
790
|
+
logger.error(
|
|
791
|
+
f"Error preparing local run persistence info for Attack {attack_id}: {e}",
|
|
792
|
+
exc_info=True,
|
|
793
|
+
)
|
|
794
|
+
if fail_on_run_error:
|
|
795
|
+
# This is just logging info, so might not be fatal unless other operations depend on it.
|
|
796
|
+
# For now, just log and continue, but could raise if this setup was critical.
|
|
797
|
+
pass
|
|
798
|
+
|
|
799
|
+
def execute(
|
|
800
|
+
self,
|
|
801
|
+
attack_config: Dict[str, Any],
|
|
802
|
+
run_config_override: Optional[Dict[str, Any]],
|
|
803
|
+
fail_on_run_error: bool,
|
|
804
|
+
max_wait_time_seconds: Optional[int] = None,
|
|
805
|
+
poll_interval_seconds: Optional[int] = None,
|
|
806
|
+
_tui_app: Optional[Any] = None,
|
|
807
|
+
_tui_log_callback: Optional[Any] = None,
|
|
808
|
+
) -> Any:
|
|
809
|
+
"""
|
|
810
|
+
Execute the complete AdvPrefix attack workflow.
|
|
811
|
+
|
|
812
|
+
This method orchestrates the full AdvPrefix attack execution, including
|
|
813
|
+
server-side record creation, local attack execution, and result processing.
|
|
814
|
+
It follows a structured workflow:
|
|
815
|
+
|
|
816
|
+
1. Create an Attack record on the HackAgent server for tracking
|
|
817
|
+
2. Create a Run record associated with the Attack for this execution
|
|
818
|
+
3. Execute the local AdvPrefix pipeline with the target goals
|
|
819
|
+
4. Log persistence information for results and intermediate data
|
|
820
|
+
|
|
821
|
+
Args:
|
|
822
|
+
attack_config: Configuration dictionary containing attack parameters.
|
|
823
|
+
Must include 'goals' key with a list of target goals for the attack.
|
|
824
|
+
May include 'output_dir' and other AdvPrefix pipeline parameters.
|
|
825
|
+
run_config_override: Optional configuration overrides for this specific
|
|
826
|
+
run. Can be used to modify default run parameters without affecting
|
|
827
|
+
the main attack configuration.
|
|
828
|
+
fail_on_run_error: Whether to raise an exception if the local attack
|
|
829
|
+
execution fails. If False, the method will return None for failed
|
|
830
|
+
executions instead of raising an exception.
|
|
831
|
+
|
|
832
|
+
Returns:
|
|
833
|
+
A pandas DataFrame containing the attack results from the local AdvPrefix
|
|
834
|
+
execution if successful. Returns None if the attack fails and
|
|
835
|
+
fail_on_run_error is False.
|
|
836
|
+
|
|
837
|
+
Raises:
|
|
838
|
+
HackAgentError: If victim agent ID or organization ID is not available,
|
|
839
|
+
if server record creation fails, or if local execution fails and
|
|
840
|
+
fail_on_run_error is True.
|
|
841
|
+
ValueError: If the 'goals' key is missing from attack_config.
|
|
842
|
+
|
|
843
|
+
Note:
|
|
844
|
+
This method creates server-side records for tracking and audit purposes
|
|
845
|
+
but the actual attack execution happens locally. Future versions may
|
|
846
|
+
include server-side result uploading and status updates.
|
|
847
|
+
"""
|
|
848
|
+
victim_agent_id: UUID = self.hack_agent.router.backend_agent.id
|
|
849
|
+
organization_id: UUID = self.hack_agent.router.organization_id
|
|
850
|
+
|
|
851
|
+
if not victim_agent_id or not organization_id:
|
|
852
|
+
raise HackAgentError(
|
|
853
|
+
"Victim agent ID or Organization ID is not available. Ensure agent is initialized."
|
|
854
|
+
)
|
|
855
|
+
|
|
856
|
+
# 1. Create Attack record on the server
|
|
857
|
+
attack_id = self._create_server_attack_record(
|
|
858
|
+
victim_agent_id=victim_agent_id,
|
|
859
|
+
organization_id=organization_id,
|
|
860
|
+
attack_config=attack_config, # Pass for summary or details
|
|
861
|
+
)
|
|
862
|
+
logger.info(f"AdvPrefix server Attack record created with ID: {attack_id}")
|
|
863
|
+
|
|
864
|
+
# 2. Create Run record on the server
|
|
865
|
+
run_id = self._create_server_run_record(
|
|
866
|
+
attack_id=attack_id,
|
|
867
|
+
victim_agent_id=victim_agent_id,
|
|
868
|
+
run_config_override=run_config_override,
|
|
869
|
+
)
|
|
870
|
+
logger.info(
|
|
871
|
+
f"AdvPrefix server Run record created with ID: {run_id} for Attack ID: {attack_id}"
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
# 3. Execute the local AdvPrefix attack logic
|
|
875
|
+
goals = attack_config.get("goals")
|
|
876
|
+
if not goals:
|
|
877
|
+
raise ValueError("AdvPrefix attack requires 'goals' in attack_config.")
|
|
878
|
+
|
|
879
|
+
# Assuming _execute_local_prefix_attack is now synchronous
|
|
880
|
+
local_results_df = self._execute_local_prefix_attack(
|
|
881
|
+
attack_config=attack_config,
|
|
882
|
+
goals=goals,
|
|
883
|
+
run_id=run_id,
|
|
884
|
+
attack_id=attack_id,
|
|
885
|
+
_tui_app=_tui_app,
|
|
886
|
+
_tui_log_callback=_tui_log_callback,
|
|
887
|
+
)
|
|
888
|
+
|
|
889
|
+
# 4. Log persistence info (which internally might update server records)
|
|
890
|
+
# This step might be expanded to explicitly update server records if needed.
|
|
891
|
+
self._log_local_run_persistence_info(
|
|
892
|
+
attack_config, attack_id, run_id, fail_on_run_error
|
|
893
|
+
)
|
|
894
|
+
|
|
895
|
+
if local_results_df is None and fail_on_run_error:
|
|
896
|
+
raise HackAgentError(
|
|
897
|
+
f"AdvPrefix local execution failed for Attack ID {attack_id} and Run ID {run_id}."
|
|
898
|
+
)
|
|
899
|
+
|
|
900
|
+
logger.info(f"AdvPrefix attack execution completed for Attack ID {attack_id}.")
|
|
901
|
+
# Return the DataFrame from the local execution as the primary result for now.
|
|
902
|
+
# Future: Might return a more comprehensive result object or the server Run object.
|
|
903
|
+
return local_results_df
|
|
904
|
+
|
|
905
|
+
|
|
906
|
+
# --- End Strategy Pattern ---
|