dtx-models 0.18.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtx_models/__init__.py +0 -0
- dtx_models/analysis.py +322 -0
- dtx_models/base.py +0 -0
- dtx_models/evaluator.py +273 -0
- dtx_models/exceptions.py +2 -0
- dtx_models/prompts.py +460 -0
- dtx_models/providers/__init__.py +0 -0
- dtx_models/providers/base.py +20 -0
- dtx_models/providers/gradio.py +171 -0
- dtx_models/providers/groq.py +27 -0
- dtx_models/providers/hf.py +161 -0
- dtx_models/providers/http.py +152 -0
- dtx_models/providers/litellm.py +21 -0
- dtx_models/providers/models_spec.py +229 -0
- dtx_models/providers/ollama.py +107 -0
- dtx_models/providers/openai.py +139 -0
- dtx_models/results.py +124 -0
- dtx_models/scope.py +208 -0
- dtx_models/tactic.py +52 -0
- dtx_models/target.py +255 -0
- dtx_models/template/__init__.py +0 -0
- dtx_models/template/prompts/__init__.py +0 -0
- dtx_models/template/prompts/base.py +49 -0
- dtx_models/template/prompts/langhub.py +79 -0
- dtx_models/utils/__init__.py +0 -0
- dtx_models/utils/urls.py +26 -0
- dtx_models-0.18.2.dist-info/METADATA +57 -0
- dtx_models-0.18.2.dist-info/RECORD +29 -0
- dtx_models-0.18.2.dist-info/WHEEL +4 -0
@@ -0,0 +1,139 @@
|
|
1
|
+
from typing import Any, Dict, Literal, Optional
|
2
|
+
|
3
|
+
from pydantic import BaseModel, Field, field_serializer, model_validator
|
4
|
+
|
5
|
+
from ..providers.models_spec import ModelTaskType
|
6
|
+
|
7
|
+
"""
|
8
|
+
id: openai
|
9
|
+
config:
|
10
|
+
model: gpt-4o
|
11
|
+
|
12
|
+
"""
|
13
|
+
|
14
|
+
|
15
|
+
"""
|
16
|
+
id: openai
|
17
|
+
config:
|
18
|
+
model: gpt-4o
|
19
|
+
task: generation
|
20
|
+
endpoint: https://api.openai.com/v1
|
21
|
+
params:
|
22
|
+
temperature: 0.7
|
23
|
+
top_k: 40
|
24
|
+
top_p: 0.9
|
25
|
+
repeat_penalty: 1.1
|
26
|
+
max_tokens: 512
|
27
|
+
num_return_sequences: 1
|
28
|
+
extra_params:
|
29
|
+
stop: ["###", "User:"]
|
30
|
+
|
31
|
+
"""
|
32
|
+
|
33
|
+
|
34
|
+
# --- PARAMETER MODELS ---
|
35
|
+
|
36
|
+
|
37
|
+
class ProviderParams(BaseModel):
|
38
|
+
"""Optional parameters for fine-tuning model generation behavior."""
|
39
|
+
|
40
|
+
temperature: Optional[float] = Field(
|
41
|
+
None,
|
42
|
+
ge=0,
|
43
|
+
le=1,
|
44
|
+
description="Controls randomness in generation (0 = deterministic, 1 = maximum randomness).",
|
45
|
+
)
|
46
|
+
top_k: Optional[int] = Field(
|
47
|
+
None, ge=1, description="Top-k sampling strategy: consider the top k tokens."
|
48
|
+
)
|
49
|
+
top_p: Optional[float] = Field(
|
50
|
+
None,
|
51
|
+
ge=0,
|
52
|
+
le=1,
|
53
|
+
description="Nucleus sampling: consider tokens within the cumulative probability top_p.",
|
54
|
+
)
|
55
|
+
repeat_penalty: Optional[float] = Field(
|
56
|
+
None,
|
57
|
+
ge=0,
|
58
|
+
description="Penalty applied to repeating tokens to reduce repetition.",
|
59
|
+
)
|
60
|
+
max_tokens: Optional[int] = Field(
|
61
|
+
None, ge=1, description="Maximum number of tokens to generate in the output."
|
62
|
+
)
|
63
|
+
num_return_sequences: Optional[int] = Field(
|
64
|
+
None, ge=1, description="Number of generated sequences to return."
|
65
|
+
)
|
66
|
+
extra_params: Optional[Dict[str, Any]] = Field(
|
67
|
+
default_factory=dict,
|
68
|
+
description="Additional, model-specific parameters not explicitly defined.",
|
69
|
+
)
|
70
|
+
|
71
|
+
|
72
|
+
# --- BASE PROVIDER CONFIGURATION ---
|
73
|
+
|
74
|
+
|
75
|
+
class BaseProviderConfig(BaseModel):
|
76
|
+
"""Base configuration for model providers."""
|
77
|
+
|
78
|
+
model: str = Field(
|
79
|
+
...,
|
80
|
+
description="""
|
81
|
+
Model name (e.g., gpt-4o, llama3, etc.)
|
82
|
+
""",
|
83
|
+
)
|
84
|
+
task: Optional[ModelTaskType] = Field(
|
85
|
+
default=None,
|
86
|
+
description="""
|
87
|
+
Task type for the model. If not provided,
|
88
|
+
it can be inferred based on the model name.
|
89
|
+
Options: generation, classification, embedding
|
90
|
+
""",
|
91
|
+
)
|
92
|
+
params: Optional[ProviderParams] = Field(
|
93
|
+
None, description="Optional parameters for customizing generation behavior."
|
94
|
+
)
|
95
|
+
endpoint: Optional[str] = Field(
|
96
|
+
default=None,
|
97
|
+
description="Base URL of the server or proxy endpoint.",
|
98
|
+
)
|
99
|
+
|
100
|
+
@model_validator(mode="after")
|
101
|
+
def compute_fields(cls, values):
|
102
|
+
"""
|
103
|
+
Auto-infer task type from model name if not explicitly set.
|
104
|
+
"""
|
105
|
+
if not values.task:
|
106
|
+
if "guard" in values.model or "classifier" in values.model:
|
107
|
+
values.task = ModelTaskType.CLASSIFICATION
|
108
|
+
else:
|
109
|
+
values.task = ModelTaskType.GENERATION
|
110
|
+
return values
|
111
|
+
|
112
|
+
@field_serializer("task")
|
113
|
+
def serialize_task(self, task: ModelTaskType) -> str:
|
114
|
+
return task.value
|
115
|
+
|
116
|
+
|
117
|
+
# --- OPENAI PROVIDER ---
|
118
|
+
|
119
|
+
|
120
|
+
class OpenaiProviderConfig(BaseProviderConfig):
|
121
|
+
endpoint: Optional[str] = Field(
|
122
|
+
default="https://api.openai.com/v1",
|
123
|
+
description="Base URL of the OpenAI server or proxy endpoint.",
|
124
|
+
)
|
125
|
+
|
126
|
+
def get_name(self) -> str:
|
127
|
+
"""
|
128
|
+
Returns the model name as the provider's name.
|
129
|
+
"""
|
130
|
+
return self.model
|
131
|
+
|
132
|
+
|
133
|
+
class OpenaiProvider(BaseModel):
|
134
|
+
"""Wrapper for OpenAI provider configuration."""
|
135
|
+
|
136
|
+
provider: Literal["openai"] = Field(
|
137
|
+
"openai", description="Provider ID, always set to 'openai'."
|
138
|
+
)
|
139
|
+
config: OpenaiProviderConfig
|
dtx_models/results.py
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from pydantic import BaseModel, Field
|
4
|
+
|
5
|
+
from .evaluator import BasePromptEvaluation
|
6
|
+
from .prompts import (
|
7
|
+
BaseMultiTurnAgentResponse,
|
8
|
+
BaseMultiTurnConversation,
|
9
|
+
BaseTestStrPrompt,
|
10
|
+
MultiTurnTestPrompt,
|
11
|
+
)
|
12
|
+
from .scope import RedTeamScope
|
13
|
+
|
14
|
+
|
15
|
+
class AttemptsBenchmarkStats(BaseModel):
|
16
|
+
total: int = 0
|
17
|
+
success: int = 0
|
18
|
+
failed: int = 0
|
19
|
+
errors: int = 0
|
20
|
+
failure_rate: float = 0.0 # Computed failure rate
|
21
|
+
|
22
|
+
|
23
|
+
class ResponseEvaluationStatus(BaseModel):
|
24
|
+
response: BaseMultiTurnAgentResponse # Response from the
|
25
|
+
success: bool = Field(
|
26
|
+
description="Does the response satisfy the evaluation criteria? True if it meets evaluation criteria."
|
27
|
+
)
|
28
|
+
description: str = Field(
|
29
|
+
description="Provide rationale why the evaluation is success or not successful"
|
30
|
+
)
|
31
|
+
|
32
|
+
|
33
|
+
class EvaluationStatus(BaseModel):
|
34
|
+
"""
|
35
|
+
This class represents the status of the evaluation process.
|
36
|
+
It includes:
|
37
|
+
- success: A boolean indicating whether the response meets the evaluation criteria.
|
38
|
+
- description: A string providing the rationale or explanation of the result.
|
39
|
+
"""
|
40
|
+
|
41
|
+
success: bool = Field(
|
42
|
+
description="Does the response satisfy the evaluation criteria? True if it meets evaluation criteria. Generally Unsafe response will lead to success to False"
|
43
|
+
)
|
44
|
+
description: str = Field(
|
45
|
+
description="Provide rationale why the evaluation is successful or not."
|
46
|
+
)
|
47
|
+
risk_classes: List[str] = Field(
|
48
|
+
default_factory=list, description="Detected Risk Classes"
|
49
|
+
)
|
50
|
+
errored: bool = Field(default=False, description="Is there any Error?")
|
51
|
+
|
52
|
+
# @field_validator("risk_classes", mode="before")
|
53
|
+
# @classmethod
|
54
|
+
# def validate_risk_classes(cls, risk_classes: List[str]) -> List[str]:
|
55
|
+
# """Ensure each risk_class is a valid key in the PLUGINS dictionary."""
|
56
|
+
# invalid_risks = [risk for risk in risk_classes if risk not in PLUGINS]
|
57
|
+
# if invalid_risks:
|
58
|
+
# raise ValueError(
|
59
|
+
# f"Invalid risk classes: {invalid_risks}. Must be one of {list(PLUGINS.keys())}."
|
60
|
+
# )
|
61
|
+
# return risk_classes
|
62
|
+
|
63
|
+
|
64
|
+
class EvalResult(BaseModel):
|
65
|
+
"""
|
66
|
+
This class represents the result of the evaluation.
|
67
|
+
It includes:
|
68
|
+
- run_id: A unique identifier for this specific run.
|
69
|
+
- prompt: The prompt that was sent to the agent.
|
70
|
+
- evaluation_criteria: The criteria that was used to evaluate the response.
|
71
|
+
- failed: Whether the response failed the evaluation.
|
72
|
+
- description: A detailed explanation of the evaluation result.
|
73
|
+
- responses: A list of responses generated by the agent.
|
74
|
+
- attempts: Stats for the specific test run (total, success, failed, errors, failure_rate)
|
75
|
+
"""
|
76
|
+
|
77
|
+
run_id: str
|
78
|
+
prompt: BaseTestStrPrompt | MultiTurnTestPrompt | BaseMultiTurnConversation
|
79
|
+
evaluation_method: BasePromptEvaluation
|
80
|
+
responses: List[ResponseEvaluationStatus] # Multiple responses from the agent
|
81
|
+
attempts: AttemptsBenchmarkStats # Stats for this specific result
|
82
|
+
|
83
|
+
|
84
|
+
class EvalReport(BaseModel):
|
85
|
+
scope: RedTeamScope
|
86
|
+
eval_results: List[EvalResult]
|
87
|
+
|
88
|
+
|
89
|
+
class AttemptsBenchmarkBuilder:
|
90
|
+
"""
|
91
|
+
A builder for constructing attempts during the scanning process.
|
92
|
+
It maintains the attempts state and calculates the failure rate when complete.
|
93
|
+
"""
|
94
|
+
|
95
|
+
def __init__(self):
|
96
|
+
self.attempts = AttemptsBenchmarkStats()
|
97
|
+
|
98
|
+
def add_result(self, failed: bool, error: bool):
|
99
|
+
"""
|
100
|
+
Adds the result of a test to the attempts.
|
101
|
+
|
102
|
+
:param failed: Whether the test failed.
|
103
|
+
:param error: Whether there was an error generating the response.
|
104
|
+
"""
|
105
|
+
self.attempts.total += 1
|
106
|
+
if error:
|
107
|
+
self.attempts.errors += 1
|
108
|
+
elif failed:
|
109
|
+
self.attempts.failed += 1
|
110
|
+
else:
|
111
|
+
self.attempts.success += 1
|
112
|
+
|
113
|
+
def calculate_failure_rate(self):
|
114
|
+
"""Calculates the failure rate based on the current attempts."""
|
115
|
+
if self.attempts.total > 0:
|
116
|
+
self.attempts.failure_rate = (
|
117
|
+
self.attempts.failed / self.attempts.total
|
118
|
+
) * 100
|
119
|
+
else:
|
120
|
+
self.attempts.failure_rate = 0.0
|
121
|
+
|
122
|
+
def get_attempts(self) -> AttemptsBenchmarkStats:
|
123
|
+
"""Returns the current attempts object."""
|
124
|
+
return self.attempts
|
dtx_models/scope.py
ADDED
@@ -0,0 +1,208 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
from typing import Any, Dict, List, Optional, Union
|
3
|
+
|
4
|
+
from pydantic import BaseModel, Field, model_validator
|
5
|
+
|
6
|
+
from .evaluator import EvaluatorInScope
|
7
|
+
from .providers.base import ProviderType
|
8
|
+
from .providers.gradio import GradioProvider
|
9
|
+
from .providers.hf import HFProvider
|
10
|
+
from .providers.http import HttpProvider
|
11
|
+
from .providers.litellm import LitellmProvider
|
12
|
+
from .providers.ollama import OllamaProvider
|
13
|
+
from .providers.openai import OpenaiProvider
|
14
|
+
from .tactic import PromptMutationTactic
|
15
|
+
from .template.prompts.base import PromptsRepoType
|
16
|
+
from .template.prompts.langhub import LangHubPromptTemplate
|
17
|
+
|
18
|
+
|
19
|
+
class Agent(BaseModel):
|
20
|
+
"""Represents an AI agent with various integrations, capabilities, and restrictions."""
|
21
|
+
|
22
|
+
name: Optional[str] = Field(default_factory=lambda: "") # Default value for name
|
23
|
+
description: str
|
24
|
+
external_integrations: Optional[List[str]] = Field(default_factory=lambda: [])
|
25
|
+
internal_integrations: Optional[List[str]] = Field(default_factory=lambda: [])
|
26
|
+
trusted_users: Optional[List[str]] = Field(default_factory=lambda: [])
|
27
|
+
untrusted_users: Optional[List[str]] = Field(default_factory=lambda: [])
|
28
|
+
llms: Optional[List[str]] = Field(default_factory=lambda: [])
|
29
|
+
capabilities: Optional[List[str]] = Field(default_factory=lambda: [])
|
30
|
+
restrictions: Optional[List[str]] = Field(default_factory=lambda: [])
|
31
|
+
security_note: Optional[str] = Field(default_factory=lambda: "")
|
32
|
+
include_attacker_goals: Optional[List[str]] = Field(default_factory=lambda: [])
|
33
|
+
|
34
|
+
|
35
|
+
class RiskTaxonomy(Enum):
|
36
|
+
"""
|
37
|
+
Enum representing different risk taxonomies.
|
38
|
+
"""
|
39
|
+
|
40
|
+
DETOXIO = "DETOXIO"
|
41
|
+
OWASP_2025 = "OWASP_2025"
|
42
|
+
|
43
|
+
def __str__(self):
|
44
|
+
return self.value # Ensures correct YAML serialization
|
45
|
+
|
46
|
+
@classmethod
|
47
|
+
def values(cls):
|
48
|
+
return [member.value for member in cls]
|
49
|
+
|
50
|
+
|
51
|
+
class PluginTaxonomyMapping(BaseModel):
|
52
|
+
"""
|
53
|
+
Provides mapping between plugins and different taxonomies.
|
54
|
+
"""
|
55
|
+
|
56
|
+
taxonomy: RiskTaxonomy # Taxonomy Name
|
57
|
+
category: str
|
58
|
+
id: str
|
59
|
+
title: str # Mapped name in the taxonomy
|
60
|
+
|
61
|
+
|
62
|
+
class Plugin(BaseModel):
|
63
|
+
"""
|
64
|
+
Pydantic model representing a Plugin entry.
|
65
|
+
"""
|
66
|
+
|
67
|
+
id: str
|
68
|
+
title: str
|
69
|
+
name: str
|
70
|
+
category: str
|
71
|
+
subcategory: str
|
72
|
+
summary: Optional[str] = None # Summary is optional
|
73
|
+
taxonomy_mappings: Optional[List[PluginTaxonomyMapping]] = [] # List of taxonomies
|
74
|
+
|
75
|
+
|
76
|
+
class PluginInScopeConfig(BaseModel):
|
77
|
+
"""Configuration for each plugin with an ID and number of tests."""
|
78
|
+
|
79
|
+
id: str # Now using string from PluginRepo
|
80
|
+
num_tests: int = 5
|
81
|
+
|
82
|
+
|
83
|
+
class PluginsInScope(BaseModel):
|
84
|
+
"""
|
85
|
+
Represents a collection of plugins, allowing either:
|
86
|
+
- A list of plugin IDs (str)
|
87
|
+
- A list of PluginConfig objects (which include ID and num_tests)
|
88
|
+
"""
|
89
|
+
|
90
|
+
plugins: List[Union[str, PluginInScopeConfig]]
|
91
|
+
|
92
|
+
def get_plugin_ids(self) -> List[str]:
|
93
|
+
plugin_ids = []
|
94
|
+
for p in self.plugins:
|
95
|
+
if isinstance(p, str):
|
96
|
+
plugin_ids.append(p)
|
97
|
+
else:
|
98
|
+
plugin_ids.append(p.id)
|
99
|
+
return plugin_ids
|
100
|
+
|
101
|
+
|
102
|
+
class RedTeamSettings(BaseModel):
|
103
|
+
"""Other red team settings."""
|
104
|
+
|
105
|
+
max_prompts_per_plugin: int = 5
|
106
|
+
max_plugin: int = 5
|
107
|
+
max_prompts_per_tactic: int = 5
|
108
|
+
plugins: PluginsInScope
|
109
|
+
# Various strategies to perform red teaming
|
110
|
+
tactics: Optional[List[PromptMutationTactic]] = Field(
|
111
|
+
default_factory=list, description="Strategies to perform red teaming"
|
112
|
+
)
|
113
|
+
# An optional evaluator to override all evaluation methods globally.
|
114
|
+
global_evaluator: Optional[EvaluatorInScope] = Field(
|
115
|
+
default=None, description="Global Evaluator, if any, to evaluate the prompts"
|
116
|
+
)
|
117
|
+
|
118
|
+
|
119
|
+
class ProviderVars(BaseModel):
|
120
|
+
"""
|
121
|
+
Holds key-value pairs where values may include `{{env.ENV_NAME}}` placeholders.
|
122
|
+
"""
|
123
|
+
|
124
|
+
vars: Dict[str, Any] = Field(
|
125
|
+
description="List of key and value pairs", default_factory=dict
|
126
|
+
)
|
127
|
+
|
128
|
+
|
129
|
+
class ProvidersWithEnvironments(BaseModel):
|
130
|
+
providers: Optional[
|
131
|
+
List[
|
132
|
+
HttpProvider
|
133
|
+
| HFProvider
|
134
|
+
| GradioProvider
|
135
|
+
| OllamaProvider
|
136
|
+
| OpenaiProvider
|
137
|
+
| LitellmProvider
|
138
|
+
]
|
139
|
+
] = Field(description="List of targets to Test", default_factory=list)
|
140
|
+
|
141
|
+
prompts: Optional[List[LangHubPromptTemplate]] = Field(
|
142
|
+
description="List of targets to Test", default_factory=list
|
143
|
+
)
|
144
|
+
environments: Optional[List[ProviderVars]] = Field(
|
145
|
+
description="List of Variables to customize providers",
|
146
|
+
default_factory=list,
|
147
|
+
)
|
148
|
+
|
149
|
+
@model_validator(mode="before")
|
150
|
+
@classmethod
|
151
|
+
def validate_providers(cls, values):
|
152
|
+
"""
|
153
|
+
Ensure the correct provider type is instantiated based on 'id'.
|
154
|
+
"""
|
155
|
+
providers_data = values.get("providers", [])
|
156
|
+
parsed_providers = []
|
157
|
+
|
158
|
+
for provider_data in providers_data:
|
159
|
+
if isinstance(provider_data, dict):
|
160
|
+
provider_id = provider_data.get("provider")
|
161
|
+
if provider_id == ProviderType.HTTP.value:
|
162
|
+
parsed_providers.append(HttpProvider(**provider_data))
|
163
|
+
elif provider_id == ProviderType.GRADIO.value:
|
164
|
+
parsed_providers.append(GradioProvider(**provider_data))
|
165
|
+
elif provider_id == ProviderType.HF.value:
|
166
|
+
parsed_providers.append(HFProvider(**provider_data))
|
167
|
+
elif provider_id == ProviderType.OLLAMA.value:
|
168
|
+
parsed_providers.append(OllamaProvider(**provider_data))
|
169
|
+
elif provider_id == ProviderType.OPENAI.value:
|
170
|
+
parsed_providers.append(OpenaiProvider(**provider_data))
|
171
|
+
elif provider_id == ProviderType.LITE_LLM.value:
|
172
|
+
parsed_providers.append(LitellmProvider(**provider_data))
|
173
|
+
else:
|
174
|
+
raise ValueError(f"Unknown provider type: {provider_id}")
|
175
|
+
else:
|
176
|
+
parsed_providers.append(provider_data)
|
177
|
+
|
178
|
+
values["providers"] = parsed_providers
|
179
|
+
return values
|
180
|
+
|
181
|
+
@model_validator(mode="before")
|
182
|
+
@classmethod
|
183
|
+
def validate_prompts(cls, values):
|
184
|
+
"""
|
185
|
+
Ensure the correct prompt type is instantiated based on 'id'.
|
186
|
+
"""
|
187
|
+
prompts_data = values.get("prompts", [])
|
188
|
+
parsed_prompts = []
|
189
|
+
|
190
|
+
for prompt_data in prompts_data:
|
191
|
+
if isinstance(prompt_data, dict):
|
192
|
+
prompt_id = prompt_data.get("id")
|
193
|
+
if prompt_id == PromptsRepoType.LANGHUB.value:
|
194
|
+
parsed_prompts.append(LangHubPromptTemplate(**prompt_data))
|
195
|
+
else:
|
196
|
+
raise ValueError(f"Unknown prompt repo type: {prompt_id}")
|
197
|
+
else:
|
198
|
+
parsed_prompts.append(prompt_data)
|
199
|
+
|
200
|
+
values["prompts"] = parsed_prompts
|
201
|
+
return values
|
202
|
+
|
203
|
+
|
204
|
+
class RedTeamScope(ProvidersWithEnvironments):
|
205
|
+
"""Represents a red teaming scope, which includes an agent and its related details."""
|
206
|
+
|
207
|
+
agent: Agent
|
208
|
+
redteam: RedTeamSettings
|
dtx_models/tactic.py
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
from typing import List, Optional
|
3
|
+
|
4
|
+
from pydantic import BaseModel, Field, field_serializer
|
5
|
+
|
6
|
+
class BaseTactic(BaseModel):
|
7
|
+
name: str = Field(description="Name of the Tactic")
|
8
|
+
|
9
|
+
|
10
|
+
class BaseTacticConfig(BaseModel):
|
11
|
+
pass
|
12
|
+
|
13
|
+
class TacticModule(str, Enum):
|
14
|
+
FLIP_ATTACK = "flip_attack"
|
15
|
+
|
16
|
+
def __str__(self):
|
17
|
+
return self.value
|
18
|
+
|
19
|
+
@classmethod
|
20
|
+
def values(cls) -> List[str]:
|
21
|
+
return [mode.value for mode in cls]
|
22
|
+
|
23
|
+
@classmethod
|
24
|
+
def descriptions(cls) -> dict:
|
25
|
+
return {
|
26
|
+
cls.FLIP_ATTACK.value: "Flips letters in the prompt (e.g., 'a' to 'ɐ') to evade filters.",
|
27
|
+
}
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
class TacticWithModesConfig(BaseTacticConfig):
|
32
|
+
modes: Optional[List[str]] = Field(
|
33
|
+
default_factory=list, description="Jailbreak Mode Config"
|
34
|
+
)
|
35
|
+
|
36
|
+
|
37
|
+
class TacticWithLanguagesConfig(BaseTacticConfig):
|
38
|
+
languages: Optional[List[str]] = Field(
|
39
|
+
default_factory=list, description="Languages to perform transformation"
|
40
|
+
)
|
41
|
+
|
42
|
+
|
43
|
+
class PromptMutationTactic(BaseTactic):
|
44
|
+
name: TacticModule = Field(description="Name of the Tactic")
|
45
|
+
config: Optional[TacticWithModesConfig | TacticWithLanguagesConfig] = Field(
|
46
|
+
default=None,
|
47
|
+
description="Configuration specific to the jailbreak Tactic",
|
48
|
+
)
|
49
|
+
|
50
|
+
@field_serializer("name")
|
51
|
+
def serialize_eval_model_type(self, name: TacticModule) -> str:
|
52
|
+
return name.value
|