dtx-models 0.18.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,139 @@
1
+ from typing import Any, Dict, Literal, Optional
2
+
3
+ from pydantic import BaseModel, Field, field_serializer, model_validator
4
+
5
+ from ..providers.models_spec import ModelTaskType
6
+
7
+ """
8
+ id: openai
9
+ config:
10
+ model: gpt-4o
11
+
12
+ """
13
+
14
+
15
+ """
16
+ id: openai
17
+ config:
18
+ model: gpt-4o
19
+ task: generation
20
+ endpoint: https://api.openai.com/v1
21
+ params:
22
+ temperature: 0.7
23
+ top_k: 40
24
+ top_p: 0.9
25
+ repeat_penalty: 1.1
26
+ max_tokens: 512
27
+ num_return_sequences: 1
28
+ extra_params:
29
+ stop: ["###", "User:"]
30
+
31
+ """
32
+
33
+
34
+ # --- PARAMETER MODELS ---
35
+
36
+
37
+ class ProviderParams(BaseModel):
38
+ """Optional parameters for fine-tuning model generation behavior."""
39
+
40
+ temperature: Optional[float] = Field(
41
+ None,
42
+ ge=0,
43
+ le=1,
44
+ description="Controls randomness in generation (0 = deterministic, 1 = maximum randomness).",
45
+ )
46
+ top_k: Optional[int] = Field(
47
+ None, ge=1, description="Top-k sampling strategy: consider the top k tokens."
48
+ )
49
+ top_p: Optional[float] = Field(
50
+ None,
51
+ ge=0,
52
+ le=1,
53
+ description="Nucleus sampling: consider tokens within the cumulative probability top_p.",
54
+ )
55
+ repeat_penalty: Optional[float] = Field(
56
+ None,
57
+ ge=0,
58
+ description="Penalty applied to repeating tokens to reduce repetition.",
59
+ )
60
+ max_tokens: Optional[int] = Field(
61
+ None, ge=1, description="Maximum number of tokens to generate in the output."
62
+ )
63
+ num_return_sequences: Optional[int] = Field(
64
+ None, ge=1, description="Number of generated sequences to return."
65
+ )
66
+ extra_params: Optional[Dict[str, Any]] = Field(
67
+ default_factory=dict,
68
+ description="Additional, model-specific parameters not explicitly defined.",
69
+ )
70
+
71
+
72
+ # --- BASE PROVIDER CONFIGURATION ---
73
+
74
+
75
+ class BaseProviderConfig(BaseModel):
76
+ """Base configuration for model providers."""
77
+
78
+ model: str = Field(
79
+ ...,
80
+ description="""
81
+ Model name (e.g., gpt-4o, llama3, etc.)
82
+ """,
83
+ )
84
+ task: Optional[ModelTaskType] = Field(
85
+ default=None,
86
+ description="""
87
+ Task type for the model. If not provided,
88
+ it can be inferred based on the model name.
89
+ Options: generation, classification, embedding
90
+ """,
91
+ )
92
+ params: Optional[ProviderParams] = Field(
93
+ None, description="Optional parameters for customizing generation behavior."
94
+ )
95
+ endpoint: Optional[str] = Field(
96
+ default=None,
97
+ description="Base URL of the server or proxy endpoint.",
98
+ )
99
+
100
+ @model_validator(mode="after")
101
+ def compute_fields(cls, values):
102
+ """
103
+ Auto-infer task type from model name if not explicitly set.
104
+ """
105
+ if not values.task:
106
+ if "guard" in values.model or "classifier" in values.model:
107
+ values.task = ModelTaskType.CLASSIFICATION
108
+ else:
109
+ values.task = ModelTaskType.GENERATION
110
+ return values
111
+
112
+ @field_serializer("task")
113
+ def serialize_task(self, task: ModelTaskType) -> str:
114
+ return task.value
115
+
116
+
117
+ # --- OPENAI PROVIDER ---
118
+
119
+
120
+ class OpenaiProviderConfig(BaseProviderConfig):
121
+ endpoint: Optional[str] = Field(
122
+ default="https://api.openai.com/v1",
123
+ description="Base URL of the OpenAI server or proxy endpoint.",
124
+ )
125
+
126
+ def get_name(self) -> str:
127
+ """
128
+ Returns the model name as the provider's name.
129
+ """
130
+ return self.model
131
+
132
+
133
+ class OpenaiProvider(BaseModel):
134
+ """Wrapper for OpenAI provider configuration."""
135
+
136
+ provider: Literal["openai"] = Field(
137
+ "openai", description="Provider ID, always set to 'openai'."
138
+ )
139
+ config: OpenaiProviderConfig
dtx_models/results.py ADDED
@@ -0,0 +1,124 @@
1
+ from typing import List
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from .evaluator import BasePromptEvaluation
6
+ from .prompts import (
7
+ BaseMultiTurnAgentResponse,
8
+ BaseMultiTurnConversation,
9
+ BaseTestStrPrompt,
10
+ MultiTurnTestPrompt,
11
+ )
12
+ from .scope import RedTeamScope
13
+
14
+
15
+ class AttemptsBenchmarkStats(BaseModel):
16
+ total: int = 0
17
+ success: int = 0
18
+ failed: int = 0
19
+ errors: int = 0
20
+ failure_rate: float = 0.0 # Computed failure rate
21
+
22
+
23
+ class ResponseEvaluationStatus(BaseModel):
24
+ response: BaseMultiTurnAgentResponse # Response from the
25
+ success: bool = Field(
26
+ description="Does the response satisfy the evaluation criteria? True if it meets evaluation criteria."
27
+ )
28
+ description: str = Field(
29
+ description="Provide rationale why the evaluation is success or not successful"
30
+ )
31
+
32
+
33
+ class EvaluationStatus(BaseModel):
34
+ """
35
+ This class represents the status of the evaluation process.
36
+ It includes:
37
+ - success: A boolean indicating whether the response meets the evaluation criteria.
38
+ - description: A string providing the rationale or explanation of the result.
39
+ """
40
+
41
+ success: bool = Field(
42
+ description="Does the response satisfy the evaluation criteria? True if it meets evaluation criteria. Generally Unsafe response will lead to success to False"
43
+ )
44
+ description: str = Field(
45
+ description="Provide rationale why the evaluation is successful or not."
46
+ )
47
+ risk_classes: List[str] = Field(
48
+ default_factory=list, description="Detected Risk Classes"
49
+ )
50
+ errored: bool = Field(default=False, description="Is there any Error?")
51
+
52
+ # @field_validator("risk_classes", mode="before")
53
+ # @classmethod
54
+ # def validate_risk_classes(cls, risk_classes: List[str]) -> List[str]:
55
+ # """Ensure each risk_class is a valid key in the PLUGINS dictionary."""
56
+ # invalid_risks = [risk for risk in risk_classes if risk not in PLUGINS]
57
+ # if invalid_risks:
58
+ # raise ValueError(
59
+ # f"Invalid risk classes: {invalid_risks}. Must be one of {list(PLUGINS.keys())}."
60
+ # )
61
+ # return risk_classes
62
+
63
+
64
+ class EvalResult(BaseModel):
65
+ """
66
+ This class represents the result of the evaluation.
67
+ It includes:
68
+ - run_id: A unique identifier for this specific run.
69
+ - prompt: The prompt that was sent to the agent.
70
+ - evaluation_criteria: The criteria that was used to evaluate the response.
71
+ - failed: Whether the response failed the evaluation.
72
+ - description: A detailed explanation of the evaluation result.
73
+ - responses: A list of responses generated by the agent.
74
+ - attempts: Stats for the specific test run (total, success, failed, errors, failure_rate)
75
+ """
76
+
77
+ run_id: str
78
+ prompt: BaseTestStrPrompt | MultiTurnTestPrompt | BaseMultiTurnConversation
79
+ evaluation_method: BasePromptEvaluation
80
+ responses: List[ResponseEvaluationStatus] # Multiple responses from the agent
81
+ attempts: AttemptsBenchmarkStats # Stats for this specific result
82
+
83
+
84
+ class EvalReport(BaseModel):
85
+ scope: RedTeamScope
86
+ eval_results: List[EvalResult]
87
+
88
+
89
+ class AttemptsBenchmarkBuilder:
90
+ """
91
+ A builder for constructing attempts during the scanning process.
92
+ It maintains the attempts state and calculates the failure rate when complete.
93
+ """
94
+
95
+ def __init__(self):
96
+ self.attempts = AttemptsBenchmarkStats()
97
+
98
+ def add_result(self, failed: bool, error: bool):
99
+ """
100
+ Adds the result of a test to the attempts.
101
+
102
+ :param failed: Whether the test failed.
103
+ :param error: Whether there was an error generating the response.
104
+ """
105
+ self.attempts.total += 1
106
+ if error:
107
+ self.attempts.errors += 1
108
+ elif failed:
109
+ self.attempts.failed += 1
110
+ else:
111
+ self.attempts.success += 1
112
+
113
+ def calculate_failure_rate(self):
114
+ """Calculates the failure rate based on the current attempts."""
115
+ if self.attempts.total > 0:
116
+ self.attempts.failure_rate = (
117
+ self.attempts.failed / self.attempts.total
118
+ ) * 100
119
+ else:
120
+ self.attempts.failure_rate = 0.0
121
+
122
+ def get_attempts(self) -> AttemptsBenchmarkStats:
123
+ """Returns the current attempts object."""
124
+ return self.attempts
dtx_models/scope.py ADDED
@@ -0,0 +1,208 @@
1
+ from enum import Enum
2
+ from typing import Any, Dict, List, Optional, Union
3
+
4
+ from pydantic import BaseModel, Field, model_validator
5
+
6
+ from .evaluator import EvaluatorInScope
7
+ from .providers.base import ProviderType
8
+ from .providers.gradio import GradioProvider
9
+ from .providers.hf import HFProvider
10
+ from .providers.http import HttpProvider
11
+ from .providers.litellm import LitellmProvider
12
+ from .providers.ollama import OllamaProvider
13
+ from .providers.openai import OpenaiProvider
14
+ from .tactic import PromptMutationTactic
15
+ from .template.prompts.base import PromptsRepoType
16
+ from .template.prompts.langhub import LangHubPromptTemplate
17
+
18
+
19
+ class Agent(BaseModel):
20
+ """Represents an AI agent with various integrations, capabilities, and restrictions."""
21
+
22
+ name: Optional[str] = Field(default_factory=lambda: "") # Default value for name
23
+ description: str
24
+ external_integrations: Optional[List[str]] = Field(default_factory=lambda: [])
25
+ internal_integrations: Optional[List[str]] = Field(default_factory=lambda: [])
26
+ trusted_users: Optional[List[str]] = Field(default_factory=lambda: [])
27
+ untrusted_users: Optional[List[str]] = Field(default_factory=lambda: [])
28
+ llms: Optional[List[str]] = Field(default_factory=lambda: [])
29
+ capabilities: Optional[List[str]] = Field(default_factory=lambda: [])
30
+ restrictions: Optional[List[str]] = Field(default_factory=lambda: [])
31
+ security_note: Optional[str] = Field(default_factory=lambda: "")
32
+ include_attacker_goals: Optional[List[str]] = Field(default_factory=lambda: [])
33
+
34
+
35
+ class RiskTaxonomy(Enum):
36
+ """
37
+ Enum representing different risk taxonomies.
38
+ """
39
+
40
+ DETOXIO = "DETOXIO"
41
+ OWASP_2025 = "OWASP_2025"
42
+
43
+ def __str__(self):
44
+ return self.value # Ensures correct YAML serialization
45
+
46
+ @classmethod
47
+ def values(cls):
48
+ return [member.value for member in cls]
49
+
50
+
51
+ class PluginTaxonomyMapping(BaseModel):
52
+ """
53
+ Provides mapping between plugins and different taxonomies.
54
+ """
55
+
56
+ taxonomy: RiskTaxonomy # Taxonomy Name
57
+ category: str
58
+ id: str
59
+ title: str # Mapped name in the taxonomy
60
+
61
+
62
+ class Plugin(BaseModel):
63
+ """
64
+ Pydantic model representing a Plugin entry.
65
+ """
66
+
67
+ id: str
68
+ title: str
69
+ name: str
70
+ category: str
71
+ subcategory: str
72
+ summary: Optional[str] = None # Summary is optional
73
+ taxonomy_mappings: Optional[List[PluginTaxonomyMapping]] = [] # List of taxonomies
74
+
75
+
76
+ class PluginInScopeConfig(BaseModel):
77
+ """Configuration for each plugin with an ID and number of tests."""
78
+
79
+ id: str # Now using string from PluginRepo
80
+ num_tests: int = 5
81
+
82
+
83
+ class PluginsInScope(BaseModel):
84
+ """
85
+ Represents a collection of plugins, allowing either:
86
+ - A list of plugin IDs (str)
87
+ - A list of PluginConfig objects (which include ID and num_tests)
88
+ """
89
+
90
+ plugins: List[Union[str, PluginInScopeConfig]]
91
+
92
+ def get_plugin_ids(self) -> List[str]:
93
+ plugin_ids = []
94
+ for p in self.plugins:
95
+ if isinstance(p, str):
96
+ plugin_ids.append(p)
97
+ else:
98
+ plugin_ids.append(p.id)
99
+ return plugin_ids
100
+
101
+
102
+ class RedTeamSettings(BaseModel):
103
+ """Other red team settings."""
104
+
105
+ max_prompts_per_plugin: int = 5
106
+ max_plugin: int = 5
107
+ max_prompts_per_tactic: int = 5
108
+ plugins: PluginsInScope
109
+ # Various strategies to perform red teaming
110
+ tactics: Optional[List[PromptMutationTactic]] = Field(
111
+ default_factory=list, description="Strategies to perform red teaming"
112
+ )
113
+ # An optional evaluator to override all evaluation methods globally.
114
+ global_evaluator: Optional[EvaluatorInScope] = Field(
115
+ default=None, description="Global Evaluator, if any, to evaluate the prompts"
116
+ )
117
+
118
+
119
+ class ProviderVars(BaseModel):
120
+ """
121
+ Holds key-value pairs where values may include `{{env.ENV_NAME}}` placeholders.
122
+ """
123
+
124
+ vars: Dict[str, Any] = Field(
125
+ description="List of key and value pairs", default_factory=dict
126
+ )
127
+
128
+
129
+ class ProvidersWithEnvironments(BaseModel):
130
+ providers: Optional[
131
+ List[
132
+ HttpProvider
133
+ | HFProvider
134
+ | GradioProvider
135
+ | OllamaProvider
136
+ | OpenaiProvider
137
+ | LitellmProvider
138
+ ]
139
+ ] = Field(description="List of targets to Test", default_factory=list)
140
+
141
+ prompts: Optional[List[LangHubPromptTemplate]] = Field(
142
+ description="List of targets to Test", default_factory=list
143
+ )
144
+ environments: Optional[List[ProviderVars]] = Field(
145
+ description="List of Variables to customize providers",
146
+ default_factory=list,
147
+ )
148
+
149
+ @model_validator(mode="before")
150
+ @classmethod
151
+ def validate_providers(cls, values):
152
+ """
153
+ Ensure the correct provider type is instantiated based on 'id'.
154
+ """
155
+ providers_data = values.get("providers", [])
156
+ parsed_providers = []
157
+
158
+ for provider_data in providers_data:
159
+ if isinstance(provider_data, dict):
160
+ provider_id = provider_data.get("provider")
161
+ if provider_id == ProviderType.HTTP.value:
162
+ parsed_providers.append(HttpProvider(**provider_data))
163
+ elif provider_id == ProviderType.GRADIO.value:
164
+ parsed_providers.append(GradioProvider(**provider_data))
165
+ elif provider_id == ProviderType.HF.value:
166
+ parsed_providers.append(HFProvider(**provider_data))
167
+ elif provider_id == ProviderType.OLLAMA.value:
168
+ parsed_providers.append(OllamaProvider(**provider_data))
169
+ elif provider_id == ProviderType.OPENAI.value:
170
+ parsed_providers.append(OpenaiProvider(**provider_data))
171
+ elif provider_id == ProviderType.LITE_LLM.value:
172
+ parsed_providers.append(LitellmProvider(**provider_data))
173
+ else:
174
+ raise ValueError(f"Unknown provider type: {provider_id}")
175
+ else:
176
+ parsed_providers.append(provider_data)
177
+
178
+ values["providers"] = parsed_providers
179
+ return values
180
+
181
+ @model_validator(mode="before")
182
+ @classmethod
183
+ def validate_prompts(cls, values):
184
+ """
185
+ Ensure the correct prompt type is instantiated based on 'id'.
186
+ """
187
+ prompts_data = values.get("prompts", [])
188
+ parsed_prompts = []
189
+
190
+ for prompt_data in prompts_data:
191
+ if isinstance(prompt_data, dict):
192
+ prompt_id = prompt_data.get("id")
193
+ if prompt_id == PromptsRepoType.LANGHUB.value:
194
+ parsed_prompts.append(LangHubPromptTemplate(**prompt_data))
195
+ else:
196
+ raise ValueError(f"Unknown prompt repo type: {prompt_id}")
197
+ else:
198
+ parsed_prompts.append(prompt_data)
199
+
200
+ values["prompts"] = parsed_prompts
201
+ return values
202
+
203
+
204
+ class RedTeamScope(ProvidersWithEnvironments):
205
+ """Represents a red teaming scope, which includes an agent and its related details."""
206
+
207
+ agent: Agent
208
+ redteam: RedTeamSettings
dtx_models/tactic.py ADDED
@@ -0,0 +1,52 @@
1
+ from enum import Enum
2
+ from typing import List, Optional
3
+
4
+ from pydantic import BaseModel, Field, field_serializer
5
+
6
+ class BaseTactic(BaseModel):
7
+ name: str = Field(description="Name of the Tactic")
8
+
9
+
10
+ class BaseTacticConfig(BaseModel):
11
+ pass
12
+
13
+ class TacticModule(str, Enum):
14
+ FLIP_ATTACK = "flip_attack"
15
+
16
+ def __str__(self):
17
+ return self.value
18
+
19
+ @classmethod
20
+ def values(cls) -> List[str]:
21
+ return [mode.value for mode in cls]
22
+
23
+ @classmethod
24
+ def descriptions(cls) -> dict:
25
+ return {
26
+ cls.FLIP_ATTACK.value: "Flips letters in the prompt (e.g., 'a' to 'ɐ') to evade filters.",
27
+ }
28
+
29
+
30
+
31
+ class TacticWithModesConfig(BaseTacticConfig):
32
+ modes: Optional[List[str]] = Field(
33
+ default_factory=list, description="Jailbreak Mode Config"
34
+ )
35
+
36
+
37
+ class TacticWithLanguagesConfig(BaseTacticConfig):
38
+ languages: Optional[List[str]] = Field(
39
+ default_factory=list, description="Languages to perform transformation"
40
+ )
41
+
42
+
43
+ class PromptMutationTactic(BaseTactic):
44
+ name: TacticModule = Field(description="Name of the Tactic")
45
+ config: Optional[TacticWithModesConfig | TacticWithLanguagesConfig] = Field(
46
+ default=None,
47
+ description="Configuration specific to the jailbreak Tactic",
48
+ )
49
+
50
+ @field_serializer("name")
51
+ def serialize_eval_model_type(self, name: TacticModule) -> str:
52
+ return name.value