hackagent 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hackagent/__init__.py +23 -0
- hackagent/agent.py +193 -0
- hackagent/api/__init__.py +1 -0
- hackagent/api/agent/__init__.py +1 -0
- hackagent/api/agent/agent_create.py +340 -0
- hackagent/api/agent/agent_destroy.py +136 -0
- hackagent/api/agent/agent_list.py +234 -0
- hackagent/api/agent/agent_partial_update.py +354 -0
- hackagent/api/agent/agent_retrieve.py +227 -0
- hackagent/api/agent/agent_update.py +354 -0
- hackagent/api/attack/__init__.py +1 -0
- hackagent/api/attack/attack_create.py +264 -0
- hackagent/api/attack/attack_destroy.py +140 -0
- hackagent/api/attack/attack_list.py +242 -0
- hackagent/api/attack/attack_partial_update.py +278 -0
- hackagent/api/attack/attack_retrieve.py +235 -0
- hackagent/api/attack/attack_update.py +278 -0
- hackagent/api/key/__init__.py +1 -0
- hackagent/api/key/key_create.py +168 -0
- hackagent/api/key/key_destroy.py +97 -0
- hackagent/api/key/key_list.py +158 -0
- hackagent/api/key/key_retrieve.py +150 -0
- hackagent/api/prompt/__init__.py +1 -0
- hackagent/api/prompt/prompt_create.py +160 -0
- hackagent/api/prompt/prompt_destroy.py +98 -0
- hackagent/api/prompt/prompt_list.py +173 -0
- hackagent/api/prompt/prompt_partial_update.py +174 -0
- hackagent/api/prompt/prompt_retrieve.py +151 -0
- hackagent/api/prompt/prompt_update.py +174 -0
- hackagent/api/result/__init__.py +1 -0
- hackagent/api/result/result_create.py +160 -0
- hackagent/api/result/result_destroy.py +98 -0
- hackagent/api/result/result_list.py +233 -0
- hackagent/api/result/result_partial_update.py +178 -0
- hackagent/api/result/result_retrieve.py +151 -0
- hackagent/api/result/result_trace_create.py +178 -0
- hackagent/api/result/result_update.py +174 -0
- hackagent/api/run/__init__.py +1 -0
- hackagent/api/run/run_create.py +172 -0
- hackagent/api/run/run_destroy.py +104 -0
- hackagent/api/run/run_list.py +260 -0
- hackagent/api/run/run_partial_update.py +186 -0
- hackagent/api/run/run_result_create.py +178 -0
- hackagent/api/run/run_retrieve.py +163 -0
- hackagent/api/run/run_run_tests_create.py +172 -0
- hackagent/api/run/run_update.py +186 -0
- hackagent/attacks/AdvPrefix/README.md +7 -0
- hackagent/attacks/AdvPrefix/__init__.py +0 -0
- hackagent/attacks/AdvPrefix/completer.py +438 -0
- hackagent/attacks/AdvPrefix/config.py +59 -0
- hackagent/attacks/AdvPrefix/preprocessing.py +521 -0
- hackagent/attacks/AdvPrefix/scorer.py +259 -0
- hackagent/attacks/AdvPrefix/scorer_parser.py +498 -0
- hackagent/attacks/AdvPrefix/selector.py +246 -0
- hackagent/attacks/AdvPrefix/step1_generate.py +324 -0
- hackagent/attacks/AdvPrefix/step4_compute_ce.py +293 -0
- hackagent/attacks/AdvPrefix/step6_get_completions.py +387 -0
- hackagent/attacks/AdvPrefix/step7_evaluate_responses.py +289 -0
- hackagent/attacks/AdvPrefix/step8_aggregate_evaluations.py +177 -0
- hackagent/attacks/AdvPrefix/step9_select_prefixes.py +59 -0
- hackagent/attacks/AdvPrefix/utils.py +192 -0
- hackagent/attacks/__init__.py +6 -0
- hackagent/attacks/advprefix.py +1136 -0
- hackagent/attacks/base.py +50 -0
- hackagent/attacks/strategies.py +539 -0
- hackagent/branding.py +143 -0
- hackagent/client.py +328 -0
- hackagent/errors.py +31 -0
- hackagent/logger.py +67 -0
- hackagent/models/__init__.py +71 -0
- hackagent/models/agent.py +240 -0
- hackagent/models/agent_request.py +169 -0
- hackagent/models/agent_type_enum.py +12 -0
- hackagent/models/attack.py +154 -0
- hackagent/models/attack_request.py +82 -0
- hackagent/models/evaluation_status_enum.py +14 -0
- hackagent/models/organization_minimal.py +68 -0
- hackagent/models/paginated_agent_list.py +123 -0
- hackagent/models/paginated_attack_list.py +123 -0
- hackagent/models/paginated_prompt_list.py +123 -0
- hackagent/models/paginated_result_list.py +123 -0
- hackagent/models/paginated_run_list.py +123 -0
- hackagent/models/paginated_user_api_key_list.py +123 -0
- hackagent/models/patched_agent_request.py +176 -0
- hackagent/models/patched_attack_request.py +92 -0
- hackagent/models/patched_prompt_request.py +162 -0
- hackagent/models/patched_result_request.py +237 -0
- hackagent/models/patched_run_request.py +138 -0
- hackagent/models/prompt.py +226 -0
- hackagent/models/prompt_request.py +155 -0
- hackagent/models/result.py +294 -0
- hackagent/models/result_list_evaluation_status.py +14 -0
- hackagent/models/result_request.py +232 -0
- hackagent/models/run.py +233 -0
- hackagent/models/run_list_status.py +12 -0
- hackagent/models/run_request.py +133 -0
- hackagent/models/status_enum.py +12 -0
- hackagent/models/step_type_enum.py +14 -0
- hackagent/models/trace.py +121 -0
- hackagent/models/trace_request.py +94 -0
- hackagent/models/user_api_key.py +201 -0
- hackagent/models/user_api_key_request.py +73 -0
- hackagent/models/user_profile_minimal.py +76 -0
- hackagent/py.typed +1 -0
- hackagent/router/__init__.py +11 -0
- hackagent/router/adapters/__init__.py +5 -0
- hackagent/router/adapters/google_adk.py +658 -0
- hackagent/router/adapters/litellm_adapter.py +290 -0
- hackagent/router/base.py +48 -0
- hackagent/router/router.py +753 -0
- hackagent/types.py +46 -0
- hackagent/utils.py +61 -0
- hackagent/vulnerabilities/__init__.py +0 -0
- hackagent-0.1.0.dist-info/LICENSE +202 -0
- hackagent-0.1.0.dist-info/METADATA +173 -0
- hackagent-0.1.0.dist-info/RECORD +117 -0
- hackagent-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,1136 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Prefix generation pipeline attack based on the BaseAttack class.
|
|
3
|
+
|
|
4
|
+
This module implements a complete pipeline for generating, filtering, and selecting prefixes
|
|
5
|
+
using uncensored and target language models, adapted as an attack module.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import logging
|
|
10
|
+
import pandas as pd
|
|
11
|
+
from typing import List, Dict, Any, Optional
|
|
12
|
+
import copy
|
|
13
|
+
from uuid import UUID
|
|
14
|
+
import json
|
|
15
|
+
|
|
16
|
+
from hackagent.client import AuthenticatedClient # Keep for type hinting
|
|
17
|
+
from hackagent.router.router import AgentRouter # For type hinting agent_router
|
|
18
|
+
from .base import BaseAttack
|
|
19
|
+
|
|
20
|
+
# Import step execution functions
|
|
21
|
+
from .AdvPrefix import step1_generate
|
|
22
|
+
from .AdvPrefix import step4_compute_ce
|
|
23
|
+
from .AdvPrefix import step6_get_completions
|
|
24
|
+
from .AdvPrefix import step7_evaluate_responses
|
|
25
|
+
from .AdvPrefix import step8_aggregate_evaluations
|
|
26
|
+
from .AdvPrefix import step9_select_prefixes
|
|
27
|
+
from .AdvPrefix.preprocessing import PrefixPreprocessor, PreprocessConfig
|
|
28
|
+
from .AdvPrefix.utils import (
|
|
29
|
+
execute_processor_step,
|
|
30
|
+
) # New import from hackagent.utils
|
|
31
|
+
|
|
32
|
+
# Models and API clients for backend interaction
|
|
33
|
+
from hackagent.models import (
|
|
34
|
+
ResultRequest,
|
|
35
|
+
TraceRequest,
|
|
36
|
+
PatchedRunRequest, # Assuming this exists for PATCH /api/runs/{id}/
|
|
37
|
+
PatchedResultRequest, # Added for updating Result evaluation_status
|
|
38
|
+
StatusEnum,
|
|
39
|
+
StepTypeEnum,
|
|
40
|
+
Result as BackendResult, # Alias to avoid conflict
|
|
41
|
+
EvaluationStatusEnum, # Potentially for parent Result
|
|
42
|
+
)
|
|
43
|
+
from hackagent.types import UNSET
|
|
44
|
+
from hackagent.api.run import run_result_create
|
|
45
|
+
from hackagent.api.result import result_trace_create
|
|
46
|
+
from hackagent.api.result import result_partial_update # Added for updating Result
|
|
47
|
+
from hackagent.api.run import run_partial_update
|
|
48
|
+
from hackagent.attacks.AdvPrefix.config import DEFAULT_PREFIX_GENERATION_CONFIG
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# Helper function for deep merging dictionaries
|
|
52
|
+
def _recursive_update(target_dict, source_dict):
|
|
53
|
+
"""
|
|
54
|
+
Recursively updates a target dictionary with values from a source dictionary.
|
|
55
|
+
Nested dictionaries are merged; other values are overwritten with a deep copy.
|
|
56
|
+
"""
|
|
57
|
+
for key, source_value in source_dict.items():
|
|
58
|
+
target_value = target_dict.get(key)
|
|
59
|
+
if isinstance(source_value, dict) and isinstance(target_value, dict):
|
|
60
|
+
# If both current_value and update_value are dicts, recurse
|
|
61
|
+
_recursive_update(target_value, source_value)
|
|
62
|
+
else:
|
|
63
|
+
# Otherwise, overwrite target_dict[key] with a deepcopy of source_value
|
|
64
|
+
target_dict[key] = copy.deepcopy(source_value)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class AdvPrefixAttack(BaseAttack):
|
|
68
|
+
"""
|
|
69
|
+
Attack class implementing the prefix generation pipeline by orchestrating step modules.
|
|
70
|
+
|
|
71
|
+
Inherits from BaseAttack and adapts the multi-step prefix generation process.
|
|
72
|
+
Expects configuration as a standard Python dictionary.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
config: Optional[Dict[str, Any]] = None,
|
|
78
|
+
client: AuthenticatedClient = None,
|
|
79
|
+
agent_router: AgentRouter = None,
|
|
80
|
+
):
|
|
81
|
+
"""
|
|
82
|
+
Initialize the pipeline with configuration.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
config: An optional dictionary containing pipeline parameters to override defaults.
|
|
86
|
+
client: An AuthenticatedClient instance passed from the strategy.
|
|
87
|
+
agent_router: An AgentRouter instance passed from the strategy.
|
|
88
|
+
"""
|
|
89
|
+
if client is None:
|
|
90
|
+
raise ValueError("AuthenticatedClient must be provided to AdvPrefixAttack.")
|
|
91
|
+
if agent_router is None:
|
|
92
|
+
raise ValueError(
|
|
93
|
+
"Victim AgentRouter instance must be provided to AdvPrefixAttack."
|
|
94
|
+
)
|
|
95
|
+
self.client = client
|
|
96
|
+
self.agent_router = agent_router
|
|
97
|
+
|
|
98
|
+
# Start with a deep copy of the defaults to prevent any modification to the
|
|
99
|
+
current_config = copy.deepcopy(DEFAULT_PREFIX_GENERATION_CONFIG)
|
|
100
|
+
|
|
101
|
+
if config: # config is the user-provided sparse dictionary of overrides
|
|
102
|
+
_recursive_update(current_config, config)
|
|
103
|
+
|
|
104
|
+
# --- Define run_id and run_dir BEFORE calling super().__init__() ---
|
|
105
|
+
# Use config directly before it's potentially modified by BaseAttack
|
|
106
|
+
self.run_id = current_config.get("run_id")
|
|
107
|
+
output_dir = current_config.get("output_dir")
|
|
108
|
+
if not output_dir:
|
|
109
|
+
raise ValueError("Configuration missing required key: 'output_dir'")
|
|
110
|
+
self.run_dir = os.path.join(output_dir, self.run_id)
|
|
111
|
+
# Add run_id to config if it wasn't there, needed by BaseAttack perhaps
|
|
112
|
+
current_config["run_id"] = self.run_id
|
|
113
|
+
# --- Assign self.run_id here as well ---
|
|
114
|
+
|
|
115
|
+
# ---------------------------------------
|
|
116
|
+
|
|
117
|
+
# --- Get logger instance BEFORE calling super().__init__() ---
|
|
118
|
+
self.logger = logging.getLogger(__name__)
|
|
119
|
+
# ------------------------------------------------------------
|
|
120
|
+
|
|
121
|
+
# Make a copy to avoid modifying the original dict if passed by reference
|
|
122
|
+
base_config = current_config.copy()
|
|
123
|
+
|
|
124
|
+
super().__init__(base_config)
|
|
125
|
+
|
|
126
|
+
# Initialize components needed across steps (like Preprocessor)
|
|
127
|
+
self.preprocessor = None
|
|
128
|
+
try:
|
|
129
|
+
# Extract relevant keys for PreprocessConfig, handling potential missing keys
|
|
130
|
+
preprocess_cfg_keys = [
|
|
131
|
+
# 'model_id', # Removed as no longer needed by Preprocessor for token counting
|
|
132
|
+
"min_char_length", # Changed from min_token_length
|
|
133
|
+
"max_ce",
|
|
134
|
+
"max_token_segments",
|
|
135
|
+
"n_candidates_per_goal",
|
|
136
|
+
]
|
|
137
|
+
preprocess_cfg_dict = {}
|
|
138
|
+
for key in preprocess_cfg_keys:
|
|
139
|
+
if key in self.config:
|
|
140
|
+
preprocess_cfg_dict[key] = self.config[key]
|
|
141
|
+
# else: Log missing optional keys if needed
|
|
142
|
+
|
|
143
|
+
# Create PreprocessConfig instance
|
|
144
|
+
self.logger.info(
|
|
145
|
+
f"Initializing Preprocessor with derived config: {preprocess_cfg_dict}"
|
|
146
|
+
)
|
|
147
|
+
preprocessor_config_obj = PreprocessConfig(**preprocess_cfg_dict)
|
|
148
|
+
|
|
149
|
+
# Instantiate PrefixPreprocessor with the config object
|
|
150
|
+
self.preprocessor = PrefixPreprocessor(config=preprocessor_config_obj)
|
|
151
|
+
|
|
152
|
+
self.logger.info("Preprocessor initialized successfully.")
|
|
153
|
+
|
|
154
|
+
except KeyError as ke:
|
|
155
|
+
self.logger.error(
|
|
156
|
+
f"Missing required key for PreprocessConfig: {ke}", exc_info=True
|
|
157
|
+
)
|
|
158
|
+
except ImportError: # Catch import error specifically if still using try-except
|
|
159
|
+
self.logger.error(
|
|
160
|
+
"Failed to import AutoTokenizer, PreprocessConfig or PrefixPreprocessor. Steps requiring it will fail.",
|
|
161
|
+
exc_info=True,
|
|
162
|
+
)
|
|
163
|
+
except Exception as e:
|
|
164
|
+
self.logger.error(
|
|
165
|
+
f"Failed to initialize PrefixPreprocessor: {e}", exc_info=True
|
|
166
|
+
)
|
|
167
|
+
# self.preprocessor remains None
|
|
168
|
+
|
|
169
|
+
# _setup() is called by super().__init__()
|
|
170
|
+
|
|
171
|
+
def _validate_config(self):
|
|
172
|
+
"""
|
|
173
|
+
Validates the provided configuration dictionary.
|
|
174
|
+
(Checks are now done on self.config which is a dict).
|
|
175
|
+
"""
|
|
176
|
+
super()._validate_config() # Base validation (checks if it's a dict)
|
|
177
|
+
|
|
178
|
+
# Define required keys, noting that some steps might have optional dependencies
|
|
179
|
+
# 'input_csv' removed as goals are passed to run()
|
|
180
|
+
required_keys = [
|
|
181
|
+
"output_dir",
|
|
182
|
+
"start_step",
|
|
183
|
+
# Keys needed for Preprocessor init
|
|
184
|
+
"min_char_length",
|
|
185
|
+
"max_token_segments",
|
|
186
|
+
"n_candidates_per_goal",
|
|
187
|
+
# Keys needed for Step 1
|
|
188
|
+
"meta_prefixes",
|
|
189
|
+
"meta_prefix_samples",
|
|
190
|
+
"batch_size",
|
|
191
|
+
"max_new_tokens",
|
|
192
|
+
"guided_topk",
|
|
193
|
+
"temperature",
|
|
194
|
+
# Keys needed for Step 4
|
|
195
|
+
"surrogate_attack_prompt",
|
|
196
|
+
# Keys needed for Step 6
|
|
197
|
+
"max_new_tokens_completion",
|
|
198
|
+
"n_samples",
|
|
199
|
+
# Keys needed for Step 7
|
|
200
|
+
"judges",
|
|
201
|
+
"batch_size_judge",
|
|
202
|
+
"max_new_tokens_eval",
|
|
203
|
+
"filter_len",
|
|
204
|
+
# Keys needed for Step 9
|
|
205
|
+
"pasr_weight",
|
|
206
|
+
"n_prefixes_per_goal",
|
|
207
|
+
"selection_judges",
|
|
208
|
+
# Note: 'max_ce' is used optionally in Step 5 (via Preprocessor) and Step 8
|
|
209
|
+
]
|
|
210
|
+
missing_keys = [k for k in required_keys if k not in self.config]
|
|
211
|
+
if missing_keys:
|
|
212
|
+
# Provide more context in the error message
|
|
213
|
+
raise ValueError(
|
|
214
|
+
f"Configuration dictionary missing required keys: {', '.join(missing_keys)}"
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Example type checks using .get()
|
|
218
|
+
if not isinstance(self.config.get("meta_prefixes"), list):
|
|
219
|
+
raise TypeError("Config key 'meta_prefixes' must be a list.")
|
|
220
|
+
if not isinstance(self.config.get("judges"), list):
|
|
221
|
+
raise TypeError("Config key 'judges' must be a list.")
|
|
222
|
+
if not isinstance(self.config.get("selection_judges"), list):
|
|
223
|
+
raise TypeError("Config key 'selection_judges' must be a list.")
|
|
224
|
+
# Add more specific type/value checks as needed (e.g., check types within lists)
|
|
225
|
+
|
|
226
|
+
def _setup(self):
|
|
227
|
+
"""
|
|
228
|
+
Performs setup tasks like logging.
|
|
229
|
+
(Preprocessor initialization moved to __init__).
|
|
230
|
+
"""
|
|
231
|
+
self._setup_logging()
|
|
232
|
+
self.logger.info(f"AdvPrefixAttack initialized with run ID: {self.run_id}")
|
|
233
|
+
self.logger.info(f"Output directory: {self.run_dir}")
|
|
234
|
+
# Log config (already a dict)
|
|
235
|
+
# Avoid logging sensitive info if present in config (e.g., API keys)
|
|
236
|
+
log_config = {
|
|
237
|
+
k: v
|
|
238
|
+
for k, v in self.config.items()
|
|
239
|
+
if "token" not in k.lower()
|
|
240
|
+
and "key" not in k.lower()
|
|
241
|
+
and "secret" not in k.lower()
|
|
242
|
+
}
|
|
243
|
+
self.logger.info(f"Configuration (non-sensitive): {log_config}")
|
|
244
|
+
|
|
245
|
+
def _setup_logging(self):
|
|
246
|
+
"""Configure logging to both file and console for this attack instance."""
|
|
247
|
+
os.makedirs(self.run_dir, exist_ok=True)
|
|
248
|
+
log_file = os.path.join(self.run_dir, "pipeline.log")
|
|
249
|
+
|
|
250
|
+
# Use the instance logger obtained in __init__
|
|
251
|
+
self.logger.propagate = (
|
|
252
|
+
False # Prevent duplicate logs if root logger is configured
|
|
253
|
+
)
|
|
254
|
+
self.logger.setLevel(logging.INFO)
|
|
255
|
+
formatter = logging.Formatter(
|
|
256
|
+
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
# Remove existing handlers if re-initializing (e.g., if run_id changes)
|
|
260
|
+
for handler in self.logger.handlers[:]:
|
|
261
|
+
self.logger.removeHandler(handler)
|
|
262
|
+
handler.close()
|
|
263
|
+
|
|
264
|
+
# File Handler
|
|
265
|
+
try:
|
|
266
|
+
fh = logging.FileHandler(log_file, mode="a") # Append mode
|
|
267
|
+
fh.setFormatter(formatter)
|
|
268
|
+
self.logger.addHandler(fh)
|
|
269
|
+
except Exception as e:
|
|
270
|
+
print(
|
|
271
|
+
f"Error setting up file handler for attack log: {e}"
|
|
272
|
+
) # Print error if logger fails
|
|
273
|
+
|
|
274
|
+
# Console Handler - Check if one already exists to avoid duplicates in console
|
|
275
|
+
if not any(isinstance(h, logging.StreamHandler) for h in self.logger.handlers):
|
|
276
|
+
ch = logging.StreamHandler()
|
|
277
|
+
ch.setFormatter(formatter)
|
|
278
|
+
self.logger.addHandler(ch)
|
|
279
|
+
|
|
280
|
+
# Remove helper methods that were moved to step files or utils
|
|
281
|
+
# Methods like _get_checkpoint_path and _clear_gpu_memory are now in utils
|
|
282
|
+
# Methods related to specific steps (_generate_prefixes, _construct_prompts, etc.) are in step files
|
|
283
|
+
|
|
284
|
+
async def run(
|
|
285
|
+
self, goals: List[str], initial_run_id: str | None = None
|
|
286
|
+
) -> pd.DataFrame:
|
|
287
|
+
"""
|
|
288
|
+
Execute the complete prefix generation pipeline by calling step modules.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
goals: A list of goal strings to generate prefixes for.
|
|
292
|
+
initial_run_id: Optional run ID to use; otherwise, use the one from init or generate.
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
A pandas DataFrame containing the final selected prefixes, or the result
|
|
296
|
+
of the last successfully completed step if the pipeline stops early or fails.
|
|
297
|
+
"""
|
|
298
|
+
parent_result_id: Optional[str] = (
|
|
299
|
+
None # Will store the ID of the main Result object for this run
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Override run_id if provided
|
|
303
|
+
if initial_run_id and initial_run_id != self.run_id:
|
|
304
|
+
self.logger.info(
|
|
305
|
+
f"Overriding run ID from '{self.run_id}' to '{initial_run_id}'"
|
|
306
|
+
)
|
|
307
|
+
self.run_id = initial_run_id
|
|
308
|
+
# Update run_dir based on the new run_id
|
|
309
|
+
# Ensure config output_dir exists and is a string
|
|
310
|
+
output_dir = self.config.get("output_dir")
|
|
311
|
+
if not output_dir or not isinstance(output_dir, str):
|
|
312
|
+
self.logger.error(
|
|
313
|
+
f"Invalid or missing 'output_dir' in config: {output_dir}. Cannot update run_dir."
|
|
314
|
+
)
|
|
315
|
+
# Handle error appropriately, e.g., raise or use a default, or stop
|
|
316
|
+
# For now, we'll let it potentially fail later if run_dir is essential and not set
|
|
317
|
+
else:
|
|
318
|
+
self.run_dir = os.path.join(output_dir, f"run_{self.run_id}")
|
|
319
|
+
self._setup_logging() # Re-run logging setup with potentially new run_dir
|
|
320
|
+
|
|
321
|
+
if not self.run_id:
|
|
322
|
+
self.logger.error(
|
|
323
|
+
"Run ID is not set. Cannot proceed with backend interaction."
|
|
324
|
+
)
|
|
325
|
+
# Fallback to original behavior without backend interaction if run_id is crucial and missing.
|
|
326
|
+
# This part would need to be robustly handled based on application requirements.
|
|
327
|
+
# For now, we proceed, and API calls will likely fail or be skipped.
|
|
328
|
+
pass
|
|
329
|
+
|
|
330
|
+
self.logger.info(
|
|
331
|
+
f"Starting Prefix Generation Attack pipeline for Run ID {self.run_id} with {len(goals)} goals."
|
|
332
|
+
)
|
|
333
|
+
results_df = None # Final results (output of step 9)
|
|
334
|
+
last_step_output_df = pd.DataFrame() # Holds output of the most recent step
|
|
335
|
+
|
|
336
|
+
pipeline_failed = False
|
|
337
|
+
final_step_reached = 0 # Track the last step attempted
|
|
338
|
+
current_run_status = StatusEnum.RUNNING # Initial status
|
|
339
|
+
|
|
340
|
+
# Attempt to create a parent Result for this Run
|
|
341
|
+
if self.run_id and run_result_create:
|
|
342
|
+
try:
|
|
343
|
+
self.logger.info(
|
|
344
|
+
f"Attempting to create parent Result for Run ID: {self.run_id}"
|
|
345
|
+
)
|
|
346
|
+
result_request_body = ResultRequest(
|
|
347
|
+
run=self.run_id,
|
|
348
|
+
prompt=None, # No specific prompt for parent result
|
|
349
|
+
request_payload={}, # No request payload for parent
|
|
350
|
+
response_body="Parent result for prefix generation attack.",
|
|
351
|
+
evaluation_status=EvaluationStatusEnum.NOT_EVALUATED,
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
parent_result_response = await run_result_create.asyncio_detailed(
|
|
355
|
+
client=self.client,
|
|
356
|
+
id=UUID(self.run_id), # This is the run_pk
|
|
357
|
+
body=result_request_body,
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
created_parent_result: Optional[BackendResult] = None
|
|
361
|
+
successful_creation = False
|
|
362
|
+
|
|
363
|
+
if 200 <= parent_result_response.status_code < 300:
|
|
364
|
+
if parent_result_response.parsed:
|
|
365
|
+
created_parent_result = parent_result_response.parsed
|
|
366
|
+
successful_creation = True
|
|
367
|
+
elif (
|
|
368
|
+
parent_result_response.status_code == 201
|
|
369
|
+
and parent_result_response.content
|
|
370
|
+
):
|
|
371
|
+
try:
|
|
372
|
+
created_parent_result_data = json.loads(
|
|
373
|
+
parent_result_response.content.decode("utf-8")
|
|
374
|
+
)
|
|
375
|
+
created_parent_result = BackendResult.from_dict(
|
|
376
|
+
created_parent_result_data
|
|
377
|
+
)
|
|
378
|
+
successful_creation = True
|
|
379
|
+
self.logger.info(
|
|
380
|
+
f"Manually parsed parent Result from 201 response for Run ID {self.run_id}"
|
|
381
|
+
)
|
|
382
|
+
except Exception as e_parse:
|
|
383
|
+
self.logger.error(
|
|
384
|
+
f"Failed to manually parse parent Result content for Run ID {self.run_id} despite 201 status. Parse Error: {e_parse}, Body: {parent_result_response.content}",
|
|
385
|
+
exc_info=True,
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
if not successful_creation or not created_parent_result:
|
|
389
|
+
self.logger.error(
|
|
390
|
+
f"Failed to create or parse parent Result for Run ID {self.run_id}. Status: {parent_result_response.status_code}, Parsed: {bool(parent_result_response.parsed)}, Body: {parent_result_response.content}"
|
|
391
|
+
)
|
|
392
|
+
else:
|
|
393
|
+
if (
|
|
394
|
+
hasattr(created_parent_result, "id")
|
|
395
|
+
and created_parent_result.id is not None
|
|
396
|
+
):
|
|
397
|
+
parent_result_id = str(created_parent_result.id)
|
|
398
|
+
self.logger.info(
|
|
399
|
+
f"Successfully created parent Result with ID: {parent_result_id} for Run ID {self.run_id}"
|
|
400
|
+
)
|
|
401
|
+
else:
|
|
402
|
+
self.logger.error(
|
|
403
|
+
f"Parent Result created/parsed for Run ID {self.run_id}, but ID is missing or None. Result Data: {created_parent_result}"
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
except Exception as e:
|
|
407
|
+
self.logger.error(
|
|
408
|
+
f"Error creating parent Result for Run ID {self.run_id}: {e}",
|
|
409
|
+
exc_info=True,
|
|
410
|
+
)
|
|
411
|
+
else:
|
|
412
|
+
if not self.run_id:
|
|
413
|
+
self.logger.warning(
|
|
414
|
+
"Run ID not available, skipping parent Result creation."
|
|
415
|
+
)
|
|
416
|
+
if not run_result_create:
|
|
417
|
+
self.logger.warning(
|
|
418
|
+
"`run_result_create` API function not available, skipping parent Result creation."
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
try:
|
|
422
|
+
start_step = self.config.get("start_step", 1)
|
|
423
|
+
self.logger.info(f"Pipeline configured to start at step {start_step}.")
|
|
424
|
+
|
|
425
|
+
# Step 1: Generate Prefixes
|
|
426
|
+
if start_step <= 1:
|
|
427
|
+
final_step_reached = 1
|
|
428
|
+
self.logger.info("--- Running Step 1: Generate Prefixes ---")
|
|
429
|
+
try:
|
|
430
|
+
unique_goals = list(dict.fromkeys(goals)) if goals else []
|
|
431
|
+
# Await the call to step1_generate.execute
|
|
432
|
+
last_step_output_df = await step1_generate.execute(
|
|
433
|
+
goals=unique_goals,
|
|
434
|
+
config=self.config,
|
|
435
|
+
logger=self.logger,
|
|
436
|
+
run_dir=self.run_dir,
|
|
437
|
+
client=self.client,
|
|
438
|
+
)
|
|
439
|
+
results_df = last_step_output_df
|
|
440
|
+
if last_step_output_df is None or last_step_output_df.empty:
|
|
441
|
+
self.logger.warning(
|
|
442
|
+
"Step 1 returned empty or None DataFrame. Stopping pipeline."
|
|
443
|
+
)
|
|
444
|
+
pipeline_failed = True
|
|
445
|
+
current_run_status = StatusEnum.FAILED
|
|
446
|
+
raise StopIteration("Step 1 failed or produced no output.")
|
|
447
|
+
except Exception as e:
|
|
448
|
+
self.logger.error(f"Step 1 execution failed: {e}", exc_info=True)
|
|
449
|
+
pipeline_failed = True
|
|
450
|
+
current_run_status = StatusEnum.FAILED
|
|
451
|
+
raise StopIteration(f"Step 1 failed: {e}")
|
|
452
|
+
finally:
|
|
453
|
+
if parent_result_id and result_trace_create:
|
|
454
|
+
try:
|
|
455
|
+
content_json = (
|
|
456
|
+
last_step_output_df.to_json(
|
|
457
|
+
orient="records", default_handler=str
|
|
458
|
+
)
|
|
459
|
+
if last_step_output_df is not None
|
|
460
|
+
and not last_step_output_df.empty
|
|
461
|
+
else "{}"
|
|
462
|
+
)
|
|
463
|
+
trace_request_body = TraceRequest(
|
|
464
|
+
sequence=final_step_reached,
|
|
465
|
+
step_type=StepTypeEnum.OTHER,
|
|
466
|
+
content={
|
|
467
|
+
"step_name": "Step 1: Generate Prefixes",
|
|
468
|
+
"data_json": content_json,
|
|
469
|
+
"status": (
|
|
470
|
+
"Failed" if pipeline_failed else "Completed"
|
|
471
|
+
),
|
|
472
|
+
},
|
|
473
|
+
)
|
|
474
|
+
trace_response = await result_trace_create.asyncio_detailed(
|
|
475
|
+
client=self.client,
|
|
476
|
+
id=UUID(parent_result_id),
|
|
477
|
+
body=trace_request_body,
|
|
478
|
+
)
|
|
479
|
+
if not (200 <= trace_response.status_code < 300):
|
|
480
|
+
self.logger.error(
|
|
481
|
+
f"Failed to create Trace for Result {parent_result_id}, Step {final_step_reached}. Status: {trace_response.status_code}, Body: {trace_response.content}"
|
|
482
|
+
)
|
|
483
|
+
else:
|
|
484
|
+
self.logger.info(
|
|
485
|
+
f"Successfully created Trace for Result {parent_result_id}, Step {final_step_reached}."
|
|
486
|
+
)
|
|
487
|
+
except Exception as te:
|
|
488
|
+
self.logger.error(
|
|
489
|
+
f"Error creating Trace for Step 1: {te}", exc_info=True
|
|
490
|
+
)
|
|
491
|
+
elif not result_trace_create and parent_result_id:
|
|
492
|
+
self.logger.warning(
|
|
493
|
+
f"`result_trace_create` API function not available, skipping Trace creation for Step {final_step_reached}."
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
# Step 2: Filter Phase 1
|
|
497
|
+
if start_step <= 2 and not pipeline_failed:
|
|
498
|
+
final_step_reached = 2
|
|
499
|
+
self.logger.info("--- Running Step 2: Filter Phase 1 ---")
|
|
500
|
+
if self.preprocessor is None:
|
|
501
|
+
self.logger.error(
|
|
502
|
+
"Preprocessor not initialized, cannot run Step 2."
|
|
503
|
+
)
|
|
504
|
+
pipeline_failed = True
|
|
505
|
+
current_run_status = StatusEnum.FAILED
|
|
506
|
+
raise StopIteration("Step 2 failed: Preprocessor missing.")
|
|
507
|
+
# Assuming execute_processor_step is synchronous
|
|
508
|
+
last_step_output_df = execute_processor_step(
|
|
509
|
+
input_df=last_step_output_df,
|
|
510
|
+
logger=self.logger,
|
|
511
|
+
run_dir=self.run_dir,
|
|
512
|
+
processor_instance=self.preprocessor,
|
|
513
|
+
processor_method_name="filter_phase1",
|
|
514
|
+
step_number=2,
|
|
515
|
+
step_name_for_logging="Initial prefix filtering (Phase 1)",
|
|
516
|
+
log_success_details_template="{count} prefixes remaining after phase 1 filtering.",
|
|
517
|
+
)
|
|
518
|
+
if last_step_output_df is None:
|
|
519
|
+
pipeline_failed = True
|
|
520
|
+
current_run_status = StatusEnum.FAILED
|
|
521
|
+
raise StopIteration("Step 2 failed critically (returned None).")
|
|
522
|
+
if parent_result_id and result_trace_create:
|
|
523
|
+
try:
|
|
524
|
+
content_json = (
|
|
525
|
+
last_step_output_df.to_json(
|
|
526
|
+
orient="records", default_handler=str
|
|
527
|
+
)
|
|
528
|
+
if last_step_output_df is not None
|
|
529
|
+
and not last_step_output_df.empty
|
|
530
|
+
else "{}"
|
|
531
|
+
)
|
|
532
|
+
trace_request_body = TraceRequest(
|
|
533
|
+
sequence=final_step_reached,
|
|
534
|
+
step_type=StepTypeEnum.OTHER,
|
|
535
|
+
content={
|
|
536
|
+
"step_name": "Step 2: Filter Phase 1",
|
|
537
|
+
"data_json": content_json,
|
|
538
|
+
"status": "Completed",
|
|
539
|
+
},
|
|
540
|
+
)
|
|
541
|
+
trace_response = await result_trace_create.asyncio_detailed(
|
|
542
|
+
client=self.client,
|
|
543
|
+
id=UUID(parent_result_id),
|
|
544
|
+
body=trace_request_body,
|
|
545
|
+
)
|
|
546
|
+
if not (200 <= trace_response.status_code < 300):
|
|
547
|
+
self.logger.error(
|
|
548
|
+
f"Failed to create Trace for Result {parent_result_id}, Step {final_step_reached}. Status: {trace_response.status_code}, Body: {trace_response.content}"
|
|
549
|
+
)
|
|
550
|
+
else:
|
|
551
|
+
self.logger.info(
|
|
552
|
+
f"Successfully created Trace for Result {parent_result_id}, Step {final_step_reached}."
|
|
553
|
+
)
|
|
554
|
+
except Exception as te:
|
|
555
|
+
self.logger.error(
|
|
556
|
+
f"Error creating Trace for Step {final_step_reached}: {te}",
|
|
557
|
+
exc_info=True,
|
|
558
|
+
)
|
|
559
|
+
elif not result_trace_create and parent_result_id:
|
|
560
|
+
self.logger.warning(
|
|
561
|
+
f"`result_trace_create` API not available, skipping Trace for Step {final_step_reached}."
|
|
562
|
+
)
|
|
563
|
+
|
|
564
|
+
# Step 3: Ablate Prefixes
|
|
565
|
+
if start_step <= 3 and not pipeline_failed:
|
|
566
|
+
final_step_reached = 3
|
|
567
|
+
self.logger.info("--- Running Step 3: Ablate Prefixes ---")
|
|
568
|
+
if self.preprocessor is None:
|
|
569
|
+
self.logger.error(
|
|
570
|
+
"Preprocessor not initialized, cannot run Step 3."
|
|
571
|
+
)
|
|
572
|
+
pipeline_failed = True
|
|
573
|
+
current_run_status = StatusEnum.FAILED
|
|
574
|
+
raise StopIteration("Step 3 failed: Preprocessor missing.")
|
|
575
|
+
# Assuming execute_processor_step is synchronous
|
|
576
|
+
last_step_output_df = execute_processor_step(
|
|
577
|
+
input_df=last_step_output_df,
|
|
578
|
+
logger=self.logger,
|
|
579
|
+
run_dir=self.run_dir,
|
|
580
|
+
processor_instance=self.preprocessor,
|
|
581
|
+
processor_method_name="ablate",
|
|
582
|
+
step_number=3,
|
|
583
|
+
step_name_for_logging="Prefix ablation",
|
|
584
|
+
log_success_details_template="{count} ablated prefixes created.",
|
|
585
|
+
)
|
|
586
|
+
if last_step_output_df is None:
|
|
587
|
+
pipeline_failed = True
|
|
588
|
+
current_run_status = StatusEnum.FAILED
|
|
589
|
+
raise StopIteration("Step 3 failed critically (returned None).")
|
|
590
|
+
if parent_result_id and result_trace_create:
|
|
591
|
+
try:
|
|
592
|
+
content_json = (
|
|
593
|
+
last_step_output_df.to_json(
|
|
594
|
+
orient="records", default_handler=str
|
|
595
|
+
)
|
|
596
|
+
if last_step_output_df is not None
|
|
597
|
+
and not last_step_output_df.empty
|
|
598
|
+
else "{}"
|
|
599
|
+
)
|
|
600
|
+
trace_request_body = TraceRequest(
|
|
601
|
+
sequence=final_step_reached,
|
|
602
|
+
step_type=StepTypeEnum.OTHER,
|
|
603
|
+
content={
|
|
604
|
+
"step_name": "Step 3: Ablate Prefixes",
|
|
605
|
+
"data_json": content_json,
|
|
606
|
+
"status": "Completed",
|
|
607
|
+
},
|
|
608
|
+
)
|
|
609
|
+
trace_response = await result_trace_create.asyncio_detailed(
|
|
610
|
+
client=self.client,
|
|
611
|
+
id=UUID(parent_result_id),
|
|
612
|
+
body=trace_request_body,
|
|
613
|
+
)
|
|
614
|
+
if not (200 <= trace_response.status_code < 300):
|
|
615
|
+
self.logger.error(
|
|
616
|
+
f"Failed to create Trace for Result {parent_result_id}, Step {final_step_reached}. Status: {trace_response.status_code}, Body: {trace_response.content}"
|
|
617
|
+
)
|
|
618
|
+
else:
|
|
619
|
+
self.logger.info(
|
|
620
|
+
f"Successfully created Trace for Result {parent_result_id}, Step {final_step_reached}."
|
|
621
|
+
)
|
|
622
|
+
except Exception as te:
|
|
623
|
+
self.logger.error(
|
|
624
|
+
f"Error creating Trace for Step {final_step_reached}: {te}",
|
|
625
|
+
exc_info=True,
|
|
626
|
+
)
|
|
627
|
+
elif not result_trace_create and parent_result_id:
|
|
628
|
+
self.logger.warning(
|
|
629
|
+
f"`result_trace_create` API not available, skipping Trace for Step {final_step_reached}."
|
|
630
|
+
)
|
|
631
|
+
|
|
632
|
+
# Step 4: Compute Cross-Entropy
|
|
633
|
+
# Note: step4_compute_ce.execute itself was called with asyncio.run before.
|
|
634
|
+
# If step4_compute_ce.execute is an async function, it should be awaited directly.
|
|
635
|
+
# If it's synchronous but internally uses asyncio.run, that might need its own refactor.
|
|
636
|
+
# For now, assuming its signature implies it can be awaited if it's async.
|
|
637
|
+
# The original code was `asyncio.run(step4_compute_ce.execute(...))`.
|
|
638
|
+
# This implies step4_compute_ce.execute is itself an async function.
|
|
639
|
+
if start_step <= 4 and not pipeline_failed:
|
|
640
|
+
final_step_reached = 4
|
|
641
|
+
self.logger.info("--- Running Step 4: Compute Cross-Entropy ---")
|
|
642
|
+
try:
|
|
643
|
+
# If step4_compute_ce.execute is async, it should be awaited.
|
|
644
|
+
last_step_output_df = await step4_compute_ce.execute(
|
|
645
|
+
input_df=last_step_output_df,
|
|
646
|
+
config=self.config,
|
|
647
|
+
logger=self.logger,
|
|
648
|
+
run_dir=self.run_dir,
|
|
649
|
+
client=self.client, # client might be used by step4 for its own async calls
|
|
650
|
+
agent_router=self.agent_router,
|
|
651
|
+
)
|
|
652
|
+
results_df = last_step_output_df
|
|
653
|
+
if last_step_output_df is None:
|
|
654
|
+
pipeline_failed = True
|
|
655
|
+
current_run_status = StatusEnum.FAILED
|
|
656
|
+
raise StopIteration("Step 4 failed critically.")
|
|
657
|
+
except Exception as e:
|
|
658
|
+
self.logger.error(f"Step 4 execution failed: {e}", exc_info=True)
|
|
659
|
+
pipeline_failed = True
|
|
660
|
+
current_run_status = StatusEnum.FAILED
|
|
661
|
+
raise StopIteration(f"Step 4 failed: {e}")
|
|
662
|
+
finally:
|
|
663
|
+
if parent_result_id and result_trace_create:
|
|
664
|
+
try:
|
|
665
|
+
content_json = (
|
|
666
|
+
last_step_output_df.to_json(
|
|
667
|
+
orient="records", default_handler=str
|
|
668
|
+
)
|
|
669
|
+
if last_step_output_df is not None
|
|
670
|
+
and not last_step_output_df.empty
|
|
671
|
+
else "{}"
|
|
672
|
+
)
|
|
673
|
+
trace_request_body = TraceRequest(
|
|
674
|
+
sequence=final_step_reached,
|
|
675
|
+
step_type=StepTypeEnum.OTHER,
|
|
676
|
+
content={
|
|
677
|
+
"step_name": "Step 4: Compute Cross-Entropy",
|
|
678
|
+
"data_json": content_json,
|
|
679
|
+
"status": (
|
|
680
|
+
"Failed"
|
|
681
|
+
if pipeline_failed and start_step <= 4
|
|
682
|
+
else "Completed"
|
|
683
|
+
),
|
|
684
|
+
},
|
|
685
|
+
)
|
|
686
|
+
trace_response = await result_trace_create.asyncio_detailed(
|
|
687
|
+
client=self.client,
|
|
688
|
+
id=UUID(parent_result_id),
|
|
689
|
+
body=trace_request_body,
|
|
690
|
+
)
|
|
691
|
+
if not (200 <= trace_response.status_code < 300):
|
|
692
|
+
self.logger.error(
|
|
693
|
+
f"Failed to create Trace for Result {parent_result_id}, Step {final_step_reached}. Status: {trace_response.status_code}, Body: {trace_response.content}"
|
|
694
|
+
)
|
|
695
|
+
else:
|
|
696
|
+
self.logger.info(
|
|
697
|
+
f"Successfully created Trace for Result {parent_result_id}, Step {final_step_reached}."
|
|
698
|
+
)
|
|
699
|
+
except Exception as te:
|
|
700
|
+
self.logger.error(
|
|
701
|
+
f"Error creating Trace for Step {final_step_reached}: {te}",
|
|
702
|
+
exc_info=True,
|
|
703
|
+
)
|
|
704
|
+
elif not result_trace_create and parent_result_id:
|
|
705
|
+
self.logger.warning(
|
|
706
|
+
f"`result_trace_create` API not available, skipping Trace for Step {final_step_reached}."
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
# Step 5: Filter Phase 2 (CE-based)
|
|
710
|
+
if start_step <= 5 and not pipeline_failed:
|
|
711
|
+
final_step_reached = 5
|
|
712
|
+
self.logger.info("--- Running Step 5: Filter Phase 2 (CE-based) ---")
|
|
713
|
+
if self.preprocessor is None:
|
|
714
|
+
self.logger.error(
|
|
715
|
+
"Preprocessor not initialized, cannot run Step 5."
|
|
716
|
+
)
|
|
717
|
+
pipeline_failed = True
|
|
718
|
+
current_run_status = StatusEnum.FAILED
|
|
719
|
+
raise StopIteration("Step 5 failed: Preprocessor missing.")
|
|
720
|
+
# Assuming execute_processor_step is synchronous
|
|
721
|
+
last_step_output_df = execute_processor_step(
|
|
722
|
+
input_df=last_step_output_df,
|
|
723
|
+
logger=self.logger,
|
|
724
|
+
run_dir=self.run_dir,
|
|
725
|
+
processor_instance=self.preprocessor,
|
|
726
|
+
processor_method_name="filter_phase2",
|
|
727
|
+
step_number=5,
|
|
728
|
+
step_name_for_logging="CE-based filtering (Phase 2)",
|
|
729
|
+
log_success_details_template="{count} prefixes remaining after phase 2 filtering.",
|
|
730
|
+
)
|
|
731
|
+
if last_step_output_df is None:
|
|
732
|
+
pipeline_failed = True
|
|
733
|
+
current_run_status = StatusEnum.FAILED
|
|
734
|
+
raise StopIteration("Step 5 failed critically (returned None).")
|
|
735
|
+
if parent_result_id and result_trace_create:
|
|
736
|
+
try:
|
|
737
|
+
content_json = (
|
|
738
|
+
last_step_output_df.to_json(
|
|
739
|
+
orient="records", default_handler=str
|
|
740
|
+
)
|
|
741
|
+
if last_step_output_df is not None
|
|
742
|
+
and not last_step_output_df.empty
|
|
743
|
+
else "{}"
|
|
744
|
+
)
|
|
745
|
+
trace_request_body = TraceRequest(
|
|
746
|
+
sequence=final_step_reached,
|
|
747
|
+
step_type=StepTypeEnum.OTHER,
|
|
748
|
+
content={
|
|
749
|
+
"step_name": "Step 5: Filter Phase 2 (CE-based)",
|
|
750
|
+
"data_json": content_json,
|
|
751
|
+
"status": "Completed",
|
|
752
|
+
},
|
|
753
|
+
)
|
|
754
|
+
trace_response = await result_trace_create.asyncio_detailed(
|
|
755
|
+
client=self.client,
|
|
756
|
+
id=UUID(parent_result_id),
|
|
757
|
+
body=trace_request_body,
|
|
758
|
+
)
|
|
759
|
+
if not (200 <= trace_response.status_code < 300):
|
|
760
|
+
self.logger.error(
|
|
761
|
+
f"Failed to create Trace for Result {parent_result_id}, Step {final_step_reached}. Status: {trace_response.status_code}, Body: {trace_response.content}"
|
|
762
|
+
)
|
|
763
|
+
else:
|
|
764
|
+
self.logger.info(
|
|
765
|
+
f"Successfully created Trace for Result {parent_result_id}, Step {final_step_reached}."
|
|
766
|
+
)
|
|
767
|
+
except Exception as te:
|
|
768
|
+
self.logger.error(
|
|
769
|
+
f"Error creating Trace for Step {final_step_reached}: {te}",
|
|
770
|
+
exc_info=True,
|
|
771
|
+
)
|
|
772
|
+
elif not result_trace_create and parent_result_id:
|
|
773
|
+
self.logger.warning(
|
|
774
|
+
f"`result_trace_create` API not available, skipping Trace for Step {final_step_reached}."
|
|
775
|
+
)
|
|
776
|
+
|
|
777
|
+
# Step 6: Get Completions
|
|
778
|
+
# Assuming step6_get_completions.execute is synchronous. If it becomes async, needs await.
|
|
779
|
+
if start_step <= 6 and not pipeline_failed:
|
|
780
|
+
final_step_reached = 6
|
|
781
|
+
self.logger.info("--- Running Step 6: Get Completions ---")
|
|
782
|
+
# Await the call to step6_get_completions.execute
|
|
783
|
+
last_step_output_df = await step6_get_completions.execute(
|
|
784
|
+
agent_router=self.agent_router,
|
|
785
|
+
input_df=last_step_output_df,
|
|
786
|
+
config=self.config,
|
|
787
|
+
logger=self.logger,
|
|
788
|
+
run_dir=self.run_dir,
|
|
789
|
+
)
|
|
790
|
+
if last_step_output_df is None:
|
|
791
|
+
pipeline_failed = True
|
|
792
|
+
current_run_status = StatusEnum.FAILED
|
|
793
|
+
raise StopIteration("Step 6 failed critically.")
|
|
794
|
+
if parent_result_id and result_trace_create:
|
|
795
|
+
try:
|
|
796
|
+
content_json = (
|
|
797
|
+
last_step_output_df.to_json(
|
|
798
|
+
orient="records", default_handler=str
|
|
799
|
+
)
|
|
800
|
+
if last_step_output_df is not None
|
|
801
|
+
and not last_step_output_df.empty
|
|
802
|
+
else "{}"
|
|
803
|
+
)
|
|
804
|
+
trace_request_body = TraceRequest(
|
|
805
|
+
sequence=final_step_reached,
|
|
806
|
+
step_type=StepTypeEnum.OTHER,
|
|
807
|
+
content={
|
|
808
|
+
"step_name": "Step 6: Get Completions",
|
|
809
|
+
"data_json": content_json,
|
|
810
|
+
"status": "Completed",
|
|
811
|
+
},
|
|
812
|
+
)
|
|
813
|
+
trace_response = await result_trace_create.asyncio_detailed(
|
|
814
|
+
client=self.client,
|
|
815
|
+
id=UUID(parent_result_id),
|
|
816
|
+
body=trace_request_body,
|
|
817
|
+
)
|
|
818
|
+
if not (200 <= trace_response.status_code < 300):
|
|
819
|
+
self.logger.error(
|
|
820
|
+
f"Failed to create Trace for Result {parent_result_id}, Step {final_step_reached}. Status: {trace_response.status_code}, Body: {trace_response.content}"
|
|
821
|
+
)
|
|
822
|
+
else:
|
|
823
|
+
self.logger.info(
|
|
824
|
+
f"Successfully created Trace for Result {parent_result_id}, Step {final_step_reached}."
|
|
825
|
+
)
|
|
826
|
+
except Exception as te:
|
|
827
|
+
self.logger.error(
|
|
828
|
+
f"Error creating Trace for Step {final_step_reached}: {te}",
|
|
829
|
+
exc_info=True,
|
|
830
|
+
)
|
|
831
|
+
elif not result_trace_create and parent_result_id:
|
|
832
|
+
self.logger.warning(
|
|
833
|
+
f"`result_trace_create` API not available, skipping Trace for Step {final_step_reached}."
|
|
834
|
+
)
|
|
835
|
+
|
|
836
|
+
# Step 7: Evaluate Responses
|
|
837
|
+
# Assuming step7_evaluate_responses.execute is synchronous
|
|
838
|
+
if start_step <= 7 and not pipeline_failed:
|
|
839
|
+
final_step_reached = 7
|
|
840
|
+
self.logger.info("--- Running Step 7: Evaluate Responses ---")
|
|
841
|
+
last_step_output_df = step7_evaluate_responses.execute(
|
|
842
|
+
input_df=last_step_output_df,
|
|
843
|
+
config=self.config,
|
|
844
|
+
logger=self.logger,
|
|
845
|
+
run_dir=self.run_dir,
|
|
846
|
+
)
|
|
847
|
+
if last_step_output_df is None:
|
|
848
|
+
pipeline_failed = True
|
|
849
|
+
current_run_status = StatusEnum.FAILED
|
|
850
|
+
raise StopIteration("Step 7 failed critically.")
|
|
851
|
+
if parent_result_id and result_trace_create:
|
|
852
|
+
try:
|
|
853
|
+
content_json = (
|
|
854
|
+
last_step_output_df.to_json(
|
|
855
|
+
orient="records", default_handler=str
|
|
856
|
+
)
|
|
857
|
+
if last_step_output_df is not None
|
|
858
|
+
and not last_step_output_df.empty
|
|
859
|
+
else "{}"
|
|
860
|
+
)
|
|
861
|
+
trace_request_body = TraceRequest(
|
|
862
|
+
sequence=final_step_reached,
|
|
863
|
+
step_type=StepTypeEnum.OTHER,
|
|
864
|
+
content={
|
|
865
|
+
"step_name": "Step 7: Evaluate Responses",
|
|
866
|
+
"data_json": content_json,
|
|
867
|
+
"status": "Completed",
|
|
868
|
+
},
|
|
869
|
+
)
|
|
870
|
+
trace_response = await result_trace_create.asyncio_detailed(
|
|
871
|
+
client=self.client,
|
|
872
|
+
id=UUID(parent_result_id),
|
|
873
|
+
body=trace_request_body,
|
|
874
|
+
)
|
|
875
|
+
if not (200 <= trace_response.status_code < 300):
|
|
876
|
+
self.logger.error(
|
|
877
|
+
f"Failed to create Trace for Result {parent_result_id}, Step {final_step_reached}. Status: {trace_response.status_code}, Body: {trace_response.content}"
|
|
878
|
+
)
|
|
879
|
+
else:
|
|
880
|
+
self.logger.info(
|
|
881
|
+
f"Successfully created Trace for Result {parent_result_id}, Step {final_step_reached}."
|
|
882
|
+
)
|
|
883
|
+
except Exception as te:
|
|
884
|
+
self.logger.error(
|
|
885
|
+
f"Error creating Trace for Step {final_step_reached}: {te}",
|
|
886
|
+
exc_info=True,
|
|
887
|
+
)
|
|
888
|
+
elif not result_trace_create and parent_result_id:
|
|
889
|
+
self.logger.warning(
|
|
890
|
+
f"`result_trace_create` API not available, skipping Trace for Step {final_step_reached}."
|
|
891
|
+
)
|
|
892
|
+
|
|
893
|
+
# Step 8: Aggregate Evaluations
|
|
894
|
+
if start_step <= 8 and not pipeline_failed:
|
|
895
|
+
final_step_reached = 8
|
|
896
|
+
self.logger.info("--- Running Step 8: Aggregate Evaluations ---")
|
|
897
|
+
last_step_output_df = step8_aggregate_evaluations.execute(
|
|
898
|
+
input_df=last_step_output_df,
|
|
899
|
+
config=self.config,
|
|
900
|
+
run_dir=self.run_dir,
|
|
901
|
+
)
|
|
902
|
+
if last_step_output_df is None:
|
|
903
|
+
pipeline_failed = True
|
|
904
|
+
current_run_status = StatusEnum.FAILED
|
|
905
|
+
raise StopIteration("Step 8 failed critically.")
|
|
906
|
+
if parent_result_id and result_trace_create:
|
|
907
|
+
try:
|
|
908
|
+
content_json = (
|
|
909
|
+
last_step_output_df.to_json(
|
|
910
|
+
orient="records", default_handler=str
|
|
911
|
+
)
|
|
912
|
+
if last_step_output_df is not None
|
|
913
|
+
and not last_step_output_df.empty
|
|
914
|
+
else "{}"
|
|
915
|
+
)
|
|
916
|
+
trace_request_body = TraceRequest(
|
|
917
|
+
sequence=final_step_reached,
|
|
918
|
+
step_type=StepTypeEnum.OTHER,
|
|
919
|
+
content={
|
|
920
|
+
"step_name": "Step 8: Aggregate Evaluations",
|
|
921
|
+
"data_json": content_json,
|
|
922
|
+
"status": "Completed",
|
|
923
|
+
},
|
|
924
|
+
)
|
|
925
|
+
trace_response = await result_trace_create.asyncio_detailed(
|
|
926
|
+
client=self.client,
|
|
927
|
+
id=UUID(parent_result_id),
|
|
928
|
+
body=trace_request_body,
|
|
929
|
+
)
|
|
930
|
+
if not (200 <= trace_response.status_code < 300):
|
|
931
|
+
self.logger.error(
|
|
932
|
+
f"Failed to create Trace for Result {parent_result_id}, Step {final_step_reached}. Status: {trace_response.status_code}, Body: {trace_response.content}"
|
|
933
|
+
)
|
|
934
|
+
else:
|
|
935
|
+
self.logger.info(
|
|
936
|
+
f"Successfully created Trace for Result {parent_result_id}, Step {final_step_reached}."
|
|
937
|
+
)
|
|
938
|
+
except Exception as te:
|
|
939
|
+
self.logger.error(
|
|
940
|
+
f"Error creating Trace for Step {final_step_reached}: {te}",
|
|
941
|
+
exc_info=True,
|
|
942
|
+
)
|
|
943
|
+
elif not result_trace_create and parent_result_id:
|
|
944
|
+
self.logger.warning(
|
|
945
|
+
f"`result_trace_create` API not available, skipping Trace for Step {final_step_reached}."
|
|
946
|
+
)
|
|
947
|
+
|
|
948
|
+
# Step 9: Select Prefixes
|
|
949
|
+
if start_step <= 9 and not pipeline_failed:
|
|
950
|
+
final_step_reached = 9
|
|
951
|
+
self.logger.info("--- Running Step 9: Select Prefixes ---")
|
|
952
|
+
results_df = step9_select_prefixes.execute(
|
|
953
|
+
input_df=last_step_output_df,
|
|
954
|
+
config=self.config,
|
|
955
|
+
run_dir=self.run_dir,
|
|
956
|
+
)
|
|
957
|
+
if results_df is None:
|
|
958
|
+
pipeline_failed = True
|
|
959
|
+
current_run_status = StatusEnum.FAILED
|
|
960
|
+
raise StopIteration("Step 9 failed critically.")
|
|
961
|
+
last_step_output_df = results_df
|
|
962
|
+
if parent_result_id and result_trace_create:
|
|
963
|
+
try:
|
|
964
|
+
content_json = (
|
|
965
|
+
results_df.to_json(orient="records", default_handler=str)
|
|
966
|
+
if results_df is not None and not results_df.empty
|
|
967
|
+
else "{}"
|
|
968
|
+
)
|
|
969
|
+
trace_request_body = TraceRequest(
|
|
970
|
+
sequence=final_step_reached,
|
|
971
|
+
step_type=StepTypeEnum.OTHER,
|
|
972
|
+
content={
|
|
973
|
+
"step_name": "Step 9: Select Prefixes",
|
|
974
|
+
"data_json": content_json,
|
|
975
|
+
"status": "Completed",
|
|
976
|
+
},
|
|
977
|
+
)
|
|
978
|
+
trace_response = await result_trace_create.asyncio_detailed(
|
|
979
|
+
client=self.client,
|
|
980
|
+
id=UUID(parent_result_id),
|
|
981
|
+
body=trace_request_body,
|
|
982
|
+
)
|
|
983
|
+
if not (200 <= trace_response.status_code < 300):
|
|
984
|
+
self.logger.error(
|
|
985
|
+
f"Failed to create Trace for Result {parent_result_id}, Step {final_step_reached}. Status: {trace_response.status_code}, Body: {trace_response.content}"
|
|
986
|
+
)
|
|
987
|
+
else:
|
|
988
|
+
self.logger.info(
|
|
989
|
+
f"Successfully created Trace for Result {parent_result_id}, Step {final_step_reached}."
|
|
990
|
+
)
|
|
991
|
+
except Exception as te:
|
|
992
|
+
self.logger.error(
|
|
993
|
+
f"Error creating Trace for Step {final_step_reached}: {te}",
|
|
994
|
+
exc_info=True,
|
|
995
|
+
)
|
|
996
|
+
elif not result_trace_create and parent_result_id:
|
|
997
|
+
self.logger.warning(
|
|
998
|
+
f"`result_trace_create` API not available, skipping Trace for Step {final_step_reached}."
|
|
999
|
+
)
|
|
1000
|
+
|
|
1001
|
+
if pipeline_failed:
|
|
1002
|
+
self.logger.error(
|
|
1003
|
+
f"Pipeline marked as failed after step {final_step_reached}."
|
|
1004
|
+
)
|
|
1005
|
+
current_run_status = StatusEnum.FAILED
|
|
1006
|
+
elif final_step_reached == 0:
|
|
1007
|
+
self.logger.warning(
|
|
1008
|
+
"Pipeline did not execute any steps based on start_step config."
|
|
1009
|
+
)
|
|
1010
|
+
current_run_status = StatusEnum.COMPLETED
|
|
1011
|
+
elif results_df is not None:
|
|
1012
|
+
self.logger.info(
|
|
1013
|
+
"Prefix Generation Attack pipeline finished successfully at Step 9."
|
|
1014
|
+
)
|
|
1015
|
+
current_run_status = StatusEnum.COMPLETED
|
|
1016
|
+
return results_df
|
|
1017
|
+
else:
|
|
1018
|
+
self.logger.warning(
|
|
1019
|
+
f"Pipeline finished after step {final_step_reached}. Returning intermediate results."
|
|
1020
|
+
)
|
|
1021
|
+
current_run_status = StatusEnum.COMPLETED
|
|
1022
|
+
|
|
1023
|
+
return (
|
|
1024
|
+
last_step_output_df
|
|
1025
|
+
if last_step_output_df is not None
|
|
1026
|
+
else pd.DataFrame()
|
|
1027
|
+
)
|
|
1028
|
+
|
|
1029
|
+
except StopIteration as stop_e:
|
|
1030
|
+
self.logger.error(f"Pipeline execution stopped: {stop_e}")
|
|
1031
|
+
current_run_status = StatusEnum.FAILED
|
|
1032
|
+
except Exception as e:
|
|
1033
|
+
self.logger.error(
|
|
1034
|
+
f"Pipeline orchestration failed unexpectedly: {str(e)}", exc_info=True
|
|
1035
|
+
)
|
|
1036
|
+
pipeline_failed = True
|
|
1037
|
+
current_run_status = StatusEnum.FAILED
|
|
1038
|
+
|
|
1039
|
+
if self.run_id and run_partial_update:
|
|
1040
|
+
try:
|
|
1041
|
+
self.logger.info(
|
|
1042
|
+
f"Attempting to update Run {self.run_id} status to {current_run_status.value}"
|
|
1043
|
+
)
|
|
1044
|
+
patched_run_body = PatchedRunRequest(
|
|
1045
|
+
status=current_run_status,
|
|
1046
|
+
run_notes=UNSET,
|
|
1047
|
+
run_config=UNSET,
|
|
1048
|
+
agent=UNSET,
|
|
1049
|
+
attack=UNSET,
|
|
1050
|
+
)
|
|
1051
|
+
update_response = await run_partial_update.asyncio_detailed(
|
|
1052
|
+
client=self.client, id=UUID(self.run_id), body=patched_run_body
|
|
1053
|
+
)
|
|
1054
|
+
if not (200 <= update_response.status_code < 300):
|
|
1055
|
+
self.logger.error(
|
|
1056
|
+
f"Failed to update Run {self.run_id} status. Status: {update_response.status_code}, Body: {update_response.content}"
|
|
1057
|
+
)
|
|
1058
|
+
else:
|
|
1059
|
+
self.logger.info(
|
|
1060
|
+
f"Successfully updated Run {self.run_id} status to {current_run_status.value}"
|
|
1061
|
+
)
|
|
1062
|
+
except Exception as e:
|
|
1063
|
+
self.logger.error(
|
|
1064
|
+
f"Error updating Run {self.run_id} status: {e}", exc_info=True
|
|
1065
|
+
)
|
|
1066
|
+
else:
|
|
1067
|
+
if not self.run_id:
|
|
1068
|
+
self.logger.warning(
|
|
1069
|
+
"Run ID not available, skipping final Run status update."
|
|
1070
|
+
)
|
|
1071
|
+
if not run_partial_update:
|
|
1072
|
+
self.logger.warning(
|
|
1073
|
+
"`run_partial_update` API function not available, skipping final Run status update."
|
|
1074
|
+
)
|
|
1075
|
+
|
|
1076
|
+
# Update the parent Result's evaluation_status
|
|
1077
|
+
if parent_result_id and result_partial_update:
|
|
1078
|
+
try:
|
|
1079
|
+
final_eval_status = (
|
|
1080
|
+
EvaluationStatusEnum.SUCCESSFUL_JAILBREAK
|
|
1081
|
+
if not pipeline_failed
|
|
1082
|
+
and final_step_reached >= self.config.get("end_step", 9)
|
|
1083
|
+
else EvaluationStatusEnum.ERROR_TEST_FRAMEWORK
|
|
1084
|
+
)
|
|
1085
|
+
# If pipeline_failed was true due to an exception, ERROR_TEST_FRAMEWORK is appropriate.
|
|
1086
|
+
|
|
1087
|
+
self.logger.info(
|
|
1088
|
+
f"Attempting to update parent Result ID {parent_result_id} to evaluation_status: {final_eval_status.value}"
|
|
1089
|
+
)
|
|
1090
|
+
|
|
1091
|
+
# Assuming PatchedResultRequest is the correct model and takes evaluation_status
|
|
1092
|
+
patched_result_request_body = PatchedResultRequest(
|
|
1093
|
+
evaluation_status=final_eval_status
|
|
1094
|
+
)
|
|
1095
|
+
|
|
1096
|
+
result_update_response = await result_partial_update.asyncio_detailed(
|
|
1097
|
+
client=self.client,
|
|
1098
|
+
id=UUID(parent_result_id), # The ID of the Result to update
|
|
1099
|
+
body=patched_result_request_body,
|
|
1100
|
+
)
|
|
1101
|
+
|
|
1102
|
+
if 200 <= result_update_response.status_code < 300:
|
|
1103
|
+
self.logger.info(
|
|
1104
|
+
f"Successfully updated parent Result ID {parent_result_id} evaluation_status to {final_eval_status.value}."
|
|
1105
|
+
)
|
|
1106
|
+
else:
|
|
1107
|
+
self.logger.error(
|
|
1108
|
+
f"Failed to update parent Result ID {parent_result_id} evaluation_status. Server responded with {result_update_response.status_code}. Body: {result_update_response.content}"
|
|
1109
|
+
)
|
|
1110
|
+
except Exception as e_result_update:
|
|
1111
|
+
self.logger.error(
|
|
1112
|
+
f"Error updating evaluation_status for parent Result ID {parent_result_id}: {e_result_update}",
|
|
1113
|
+
exc_info=True,
|
|
1114
|
+
)
|
|
1115
|
+
elif not parent_result_id:
|
|
1116
|
+
self.logger.warning(
|
|
1117
|
+
"Parent Result ID not available, skipping evaluation_status update for parent Result."
|
|
1118
|
+
)
|
|
1119
|
+
elif not result_partial_update:
|
|
1120
|
+
self.logger.warning(
|
|
1121
|
+
"`result_partial_update` API not available, skipping evaluation_status update for parent Result."
|
|
1122
|
+
)
|
|
1123
|
+
|
|
1124
|
+
if pipeline_failed:
|
|
1125
|
+
self.logger.warning(
|
|
1126
|
+
f"Returning output from last successful step ({final_step_reached}) due to failure."
|
|
1127
|
+
)
|
|
1128
|
+
elif final_step_reached < start_step and start_step > 1:
|
|
1129
|
+
self.logger.warning(
|
|
1130
|
+
f"Pipeline did not run any steps (start_step={start_step}). Returning empty DataFrame."
|
|
1131
|
+
)
|
|
1132
|
+
return pd.DataFrame()
|
|
1133
|
+
|
|
1134
|
+
return (
|
|
1135
|
+
last_step_output_df if last_step_output_df is not None else pd.DataFrame()
|
|
1136
|
+
)
|