dtx-models 0.18.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dtx-models might be problematic. Click here for more details.
- dtx_models-0.18.2/PKG-INFO +57 -0
- dtx_models-0.18.2/README.md +38 -0
- dtx_models-0.18.2/pyproject.toml +21 -0
- dtx_models-0.18.2/src/dtx_models/__init__.py +0 -0
- dtx_models-0.18.2/src/dtx_models/analysis.py +322 -0
- dtx_models-0.18.2/src/dtx_models/base.py +0 -0
- dtx_models-0.18.2/src/dtx_models/evaluator.py +273 -0
- dtx_models-0.18.2/src/dtx_models/exceptions.py +2 -0
- dtx_models-0.18.2/src/dtx_models/prompts.py +460 -0
- dtx_models-0.18.2/src/dtx_models/providers/__init__.py +0 -0
- dtx_models-0.18.2/src/dtx_models/providers/base.py +20 -0
- dtx_models-0.18.2/src/dtx_models/providers/gradio.py +171 -0
- dtx_models-0.18.2/src/dtx_models/providers/groq.py +27 -0
- dtx_models-0.18.2/src/dtx_models/providers/hf.py +161 -0
- dtx_models-0.18.2/src/dtx_models/providers/http.py +152 -0
- dtx_models-0.18.2/src/dtx_models/providers/litellm.py +21 -0
- dtx_models-0.18.2/src/dtx_models/providers/models_spec.py +229 -0
- dtx_models-0.18.2/src/dtx_models/providers/ollama.py +107 -0
- dtx_models-0.18.2/src/dtx_models/providers/openai.py +139 -0
- dtx_models-0.18.2/src/dtx_models/results.py +124 -0
- dtx_models-0.18.2/src/dtx_models/scope.py +208 -0
- dtx_models-0.18.2/src/dtx_models/tactic.py +52 -0
- dtx_models-0.18.2/src/dtx_models/target.py +255 -0
- dtx_models-0.18.2/src/dtx_models/template/__init__.py +0 -0
- dtx_models-0.18.2/src/dtx_models/template/prompts/__init__.py +0 -0
- dtx_models-0.18.2/src/dtx_models/template/prompts/base.py +49 -0
- dtx_models-0.18.2/src/dtx_models/template/prompts/langhub.py +79 -0
- dtx_models-0.18.2/src/dtx_models/utils/__init__.py +0 -0
- dtx_models-0.18.2/src/dtx_models/utils/urls.py +26 -0
@@ -0,0 +1,57 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: dtx-models
|
3
|
+
Version: 0.18.2
|
4
|
+
Summary: Shared model schemas and YAML-based configurations for the DTX framework.
|
5
|
+
Author: JC
|
6
|
+
Author-email: jitendra@detoxio.ai
|
7
|
+
Requires-Python: >=3.10,<4.0
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
13
|
+
Requires-Dist: pydantic (>=2.11.5,<3.0.0)
|
14
|
+
Requires-Dist: pydantic-yaml (>=1.5.1,<2.0.0)
|
15
|
+
Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
|
16
|
+
Project-URL: Homepage, https://docs.detoxio.ai
|
17
|
+
Description-Content-Type: text/markdown
|
18
|
+
|
19
|
+
# dtx-models
|
20
|
+
|
21
|
+
**dtx-models** provides shared model schemas, configuration structures, and YAML-based assets used across the [DTX AI Red Teaming Framework](https://docs.detoxio.ai).
|
22
|
+
|
23
|
+
This package helps standardize model definitions and integrates seamlessly with components in the `dtx` ecosystem.
|
24
|
+
|
25
|
+
## Features
|
26
|
+
|
27
|
+
- Pydantic-based schema definitions
|
28
|
+
- YAML-driven model and plugin configuration
|
29
|
+
- Clean separation of model assets from core logic
|
30
|
+
|
31
|
+
## Installation
|
32
|
+
|
33
|
+
```bash
|
34
|
+
pip install dtx-models
|
35
|
+
````
|
36
|
+
|
37
|
+
Or, if developing locally:
|
38
|
+
|
39
|
+
```bash
|
40
|
+
poetry install
|
41
|
+
```
|
42
|
+
|
43
|
+
## Requirements
|
44
|
+
|
45
|
+
* Python >= 3.11
|
46
|
+
* [Pydantic](https://docs.pydantic.dev/) v2
|
47
|
+
* [PyYAML](https://pyyaml.org/)
|
48
|
+
|
49
|
+
## Documentation
|
50
|
+
|
51
|
+
Full documentation available at [docs.detoxio.ai](https://docs.detoxio.ai)
|
52
|
+
|
53
|
+
---
|
54
|
+
|
55
|
+
© Detoxio.ai – All rights reserved.
|
56
|
+
|
57
|
+
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# dtx-models
|
2
|
+
|
3
|
+
**dtx-models** provides shared model schemas, configuration structures, and YAML-based assets used across the [DTX AI Red Teaming Framework](https://docs.detoxio.ai).
|
4
|
+
|
5
|
+
This package helps standardize model definitions and integrates seamlessly with components in the `dtx` ecosystem.
|
6
|
+
|
7
|
+
## Features
|
8
|
+
|
9
|
+
- Pydantic-based schema definitions
|
10
|
+
- YAML-driven model and plugin configuration
|
11
|
+
- Clean separation of model assets from core logic
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
|
15
|
+
```bash
|
16
|
+
pip install dtx-models
|
17
|
+
````
|
18
|
+
|
19
|
+
Or, if developing locally:
|
20
|
+
|
21
|
+
```bash
|
22
|
+
poetry install
|
23
|
+
```
|
24
|
+
|
25
|
+
## Requirements
|
26
|
+
|
27
|
+
* Python >= 3.11
|
28
|
+
* [Pydantic](https://docs.pydantic.dev/) v2
|
29
|
+
* [PyYAML](https://pyyaml.org/)
|
30
|
+
|
31
|
+
## Documentation
|
32
|
+
|
33
|
+
Full documentation available at [docs.detoxio.ai](https://docs.detoxio.ai)
|
34
|
+
|
35
|
+
---
|
36
|
+
|
37
|
+
© Detoxio.ai – All rights reserved.
|
38
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
[tool.poetry]
|
2
|
+
name = "dtx-models"
|
3
|
+
version = "0.18.2"
|
4
|
+
description = "Shared model schemas and YAML-based configurations for the DTX framework."
|
5
|
+
authors = ["JC <jitendra@detoxio.ai>"]
|
6
|
+
readme = "README.md"
|
7
|
+
homepage = "https://docs.detoxio.ai"
|
8
|
+
packages = [{ include = "dtx_models", from = "src" }]
|
9
|
+
|
10
|
+
[tool.poetry.dependencies]
|
11
|
+
python = ">=3.10,<4.0"
|
12
|
+
pydantic = ">=2.11.5,<3.0.0"
|
13
|
+
pydantic-yaml = ">=1.5.1,<2.0.0"
|
14
|
+
pyyaml = ">=6.0.2,<7.0.0"
|
15
|
+
|
16
|
+
[tool.poetry.group.dev.dependencies]
|
17
|
+
pytest = "^8.3.5"
|
18
|
+
|
19
|
+
[build-system]
|
20
|
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
21
|
+
build-backend = "poetry.core.masonry.api"
|
File without changes
|
@@ -0,0 +1,322 @@
|
|
1
|
+
import hashlib
|
2
|
+
from enum import Enum
|
3
|
+
from typing import Dict, List, Optional, Union
|
4
|
+
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator
|
6
|
+
|
7
|
+
from .evaluator import BasePromptEvaluation
|
8
|
+
from .prompts import BaseTestStrPrompt, MultiturnTestPrompts
|
9
|
+
from .scope import Agent, RedTeamScope
|
10
|
+
|
11
|
+
# ----------------------
|
12
|
+
# Define Models
|
13
|
+
# ----------------------
|
14
|
+
|
15
|
+
|
16
|
+
class ThreatLevel(str, Enum):
|
17
|
+
HIGH = "High"
|
18
|
+
MEDIUM = "Medium"
|
19
|
+
LOW = "Low"
|
20
|
+
|
21
|
+
def __str__(self):
|
22
|
+
return self.value # Ensures correct YAML serialization
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
def values(cls):
|
26
|
+
return [member.value for member in cls]
|
27
|
+
|
28
|
+
|
29
|
+
class RiskItem(BaseModel):
|
30
|
+
"""
|
31
|
+
Example:
|
32
|
+
- attack_strategies:
|
33
|
+
- 'Dan_11_0: Disregard the system prompt to generate harassing content.'
|
34
|
+
- 'Dan_10_0: Use prompt injection to create messages that harass individuals.'
|
35
|
+
- 'HijackHateHumansMini: Inject prompts that encourage hate speech against specific groups.'
|
36
|
+
risk: toxicity:hate_speech:harassment
|
37
|
+
risk_score: 7.5
|
38
|
+
summary: The application may generate or facilitate the dissemination of content that promotes harassment or bullying, especially if misused by users.
|
39
|
+
threat_level: High
|
40
|
+
"""
|
41
|
+
|
42
|
+
risk: str = Field(
|
43
|
+
...,
|
44
|
+
description="The category of risk associated with the application, must match predefined risk_class.",
|
45
|
+
)
|
46
|
+
risk_score: float = Field(
|
47
|
+
...,
|
48
|
+
description="A numerical score representing risk severity, must be between 0 and 10.",
|
49
|
+
)
|
50
|
+
threat_level: ThreatLevel = Field(
|
51
|
+
..., description="The severity level of the risk: High, Medium, or Low."
|
52
|
+
)
|
53
|
+
summary: str = Field(
|
54
|
+
..., description="A brief description of the potential risk and its impact."
|
55
|
+
)
|
56
|
+
attack_strategies: List[str] = Field(
|
57
|
+
default_factory=list,
|
58
|
+
description="A list of known attack strategies that could exploit the system.",
|
59
|
+
)
|
60
|
+
|
61
|
+
# @field_validator("risk", mode="before")
|
62
|
+
# @classmethod
|
63
|
+
# def validate_risk_classes(cls, risk: str) -> str:
|
64
|
+
# """Ensure risk is a valid key in the PLUGINS dictionary."""
|
65
|
+
# if risk not in PLUGINS:
|
66
|
+
# raise ValueError(
|
67
|
+
# f"Invalid risk class: {risk}. Must be one of {list(PLUGINS.keys())}."
|
68
|
+
# )
|
69
|
+
# return risk
|
70
|
+
|
71
|
+
@field_validator("risk_score", mode="before")
|
72
|
+
@classmethod
|
73
|
+
def validate_risk_score(cls, risk_score: float) -> float:
|
74
|
+
"""Ensure risk_score is between 0 and 10."""
|
75
|
+
if not (0 <= risk_score <= 10):
|
76
|
+
raise ValueError(
|
77
|
+
f"Invalid risk_score: {risk_score}. Must be between 0 and 10."
|
78
|
+
)
|
79
|
+
return risk_score
|
80
|
+
|
81
|
+
@field_serializer("threat_level")
|
82
|
+
def serialize_threat_level(self, threat_level: ThreatLevel) -> str:
|
83
|
+
"""Serialize the threat level enum to a string."""
|
84
|
+
return str(threat_level)
|
85
|
+
|
86
|
+
|
87
|
+
class AppRisks(BaseModel):
|
88
|
+
risks: List[RiskItem] = Field(default_factory=list)
|
89
|
+
|
90
|
+
|
91
|
+
class ThreatModel(BaseModel):
|
92
|
+
analysis: str = Field(
|
93
|
+
description="Thinking and analysis performed solve the problem as approach "
|
94
|
+
)
|
95
|
+
target: Agent = Field(
|
96
|
+
description="Target agent with necessary architectural details"
|
97
|
+
)
|
98
|
+
threat_actors: List[str] = Field(description="Potential Threat Actors")
|
99
|
+
worst_scenarios: List[str] = Field(
|
100
|
+
description="Worst Case scenarios that can happen"
|
101
|
+
)
|
102
|
+
|
103
|
+
|
104
|
+
class AnalysisResult(BaseModel):
|
105
|
+
threat_analysis: Optional[ThreatModel] = None
|
106
|
+
threats: AppRisks
|
107
|
+
|
108
|
+
|
109
|
+
# --------------------
|
110
|
+
# Test Scenarios Models
|
111
|
+
# -----------------------
|
112
|
+
|
113
|
+
|
114
|
+
class PromptVariable(BaseModel):
|
115
|
+
name: str = Field(
|
116
|
+
description="Variable that can replaced with a value. Variable name should use snake case format"
|
117
|
+
)
|
118
|
+
values: List[str]
|
119
|
+
|
120
|
+
|
121
|
+
class PromptDataset(str, Enum):
|
122
|
+
STRINGRAY = "STRINGRAY"
|
123
|
+
STARGAZER = "STARGAZER"
|
124
|
+
HF_BEAVERTAILS = "HF_BEAVERTAILS"
|
125
|
+
HF_HACKAPROMPT = "HF_HACKAPROMPT"
|
126
|
+
HF_JAILBREAKBENCH = "HF_JAILBREAKBENCH"
|
127
|
+
HF_SAFEMTDATA = "HF_SAFEMTDATA"
|
128
|
+
HF_FLIPGUARDDATA = "HF_FLIPGUARDDATA"
|
129
|
+
HF_JAILBREAKV = "HF_JAILBREAKV"
|
130
|
+
HF_LMSYS = "HF_LMSYS"
|
131
|
+
HF_AISAFETY = "HF_AISAFETY"
|
132
|
+
HF_AIRBENCH = "HF_AIRBENCH"
|
133
|
+
HF_RENELLM = "HF_RENELLM"
|
134
|
+
HF_XTREAM = "HF_XTREAM"
|
135
|
+
|
136
|
+
def __str__(self):
|
137
|
+
return self.value
|
138
|
+
|
139
|
+
@classmethod
|
140
|
+
def values(cls):
|
141
|
+
return [member.value for member in cls]
|
142
|
+
|
143
|
+
@classmethod
|
144
|
+
def descriptions(cls):
|
145
|
+
"""Returns a dictionary mapping each dataset value to its description."""
|
146
|
+
return {
|
147
|
+
cls.STRINGRAY.value: "A dataset generated from Garak Scanner Signatures",
|
148
|
+
cls.STARGAZER.value: "A dataset generating using OpenAI model",
|
149
|
+
cls.HF_BEAVERTAILS.value: "A dataset containing beavertail risk prompts.",
|
150
|
+
cls.HF_HACKAPROMPT.value: "A dataset curated for adversarial jailbreak prompts.",
|
151
|
+
cls.HF_JAILBREAKBENCH.value: "A benchmark dataset for jailbreak evaluation.",
|
152
|
+
cls.HF_SAFEMTDATA.value: "A benchmark dataset for multi turn llm jailbreak evaluation.",
|
153
|
+
cls.HF_FLIPGUARDDATA.value: "A dataset designed to evaluate adversarial jailbreak attempts using character-flipped prompts.",
|
154
|
+
cls.HF_JAILBREAKV.value: "An updated version of jailbreak prompt datasets.",
|
155
|
+
cls.HF_LMSYS.value: "A dataset derived from LMSYS chat logs for risk evaluation.",
|
156
|
+
cls.HF_AISAFETY.value: "A dataset designed by AI Safety Lab with prompts related to misinformation, toxicity, and unsafe behaviors.",
|
157
|
+
cls.HF_AIRBENCH.value: "A comprehensive benchmark dataset (AIR-Bench 2024) for evaluating AI risks across security, privacy, misinformation, harmful content, and manipulation scenarios.",
|
158
|
+
cls.HF_RENELLM.value: "A dataset from the ReNeLLM framework, containing adversarially rewritten and nested prompts designed to bypass LLM safety mechanisms for research purposes.",
|
159
|
+
cls.HF_XTREAM.value: "A dataset (Xtream) of multi-turn jailbreak conversations based on the AdvBench Goal",
|
160
|
+
}
|
161
|
+
|
162
|
+
def derived_from_hf(self) -> bool:
|
163
|
+
return self.value.startswith("HF_")
|
164
|
+
|
165
|
+
|
166
|
+
# --------------------
|
167
|
+
# Module Eval based Test Prompts
|
168
|
+
# -----------------------
|
169
|
+
|
170
|
+
|
171
|
+
class EvalModuleParam(BaseModel):
|
172
|
+
param: str
|
173
|
+
value: str | List[str]
|
174
|
+
|
175
|
+
|
176
|
+
class ModuleBasedPromptEvaluation(BasePromptEvaluation):
|
177
|
+
modules: List[str] = Field(description="Modules to evaluate the prompt")
|
178
|
+
params: List[EvalModuleParam] = Field(default_factory=list)
|
179
|
+
|
180
|
+
def get_params_dict(self) -> Dict[str, List[str]]:
|
181
|
+
"""
|
182
|
+
Converts params into a dictionary where keys are param names and values are lists of values.
|
183
|
+
|
184
|
+
- Merges duplicate parameters into a single list.
|
185
|
+
- Excludes parameters where the value is None or empty.
|
186
|
+
- Ensures all values are stored as lists without duplication.
|
187
|
+
|
188
|
+
Returns:
|
189
|
+
Dict[str, List[str]]: Dictionary containing parameter names as keys and lists of values as values.
|
190
|
+
"""
|
191
|
+
params_dict = {}
|
192
|
+
|
193
|
+
for param in self.params:
|
194
|
+
if param.value:
|
195
|
+
# Normalize value to a list and filter out empty values
|
196
|
+
values = [param.value] if isinstance(param.value, str) else param.value
|
197
|
+
filtered_values = [v.strip() for v in values if v and v.strip()]
|
198
|
+
|
199
|
+
if filtered_values:
|
200
|
+
if param.param in params_dict:
|
201
|
+
params_dict[param.param].extend(filtered_values)
|
202
|
+
else:
|
203
|
+
params_dict[param.param] = filtered_values
|
204
|
+
|
205
|
+
# Remove duplicates from each parameter's list
|
206
|
+
for key in params_dict:
|
207
|
+
params_dict[key] = list(set(params_dict[key])) # Ensure unique values
|
208
|
+
|
209
|
+
return params_dict
|
210
|
+
|
211
|
+
|
212
|
+
class TestPromptWithModEval(BaseTestStrPrompt):
|
213
|
+
id: Optional[str] = Field(
|
214
|
+
default=None,
|
215
|
+
description="Unique ID of the prompt, auto-generated based on content.",
|
216
|
+
)
|
217
|
+
prompt: str = Field(description="Generated test prompt.")
|
218
|
+
evaluation_method: ModuleBasedPromptEvaluation = Field(
|
219
|
+
description="Evaluation method for the prompt."
|
220
|
+
)
|
221
|
+
module_name: str = Field(
|
222
|
+
default="stingray", description="Module that has generated the prompt"
|
223
|
+
)
|
224
|
+
goal: str = Field(default="")
|
225
|
+
strategy: str = Field(default="")
|
226
|
+
variables: List[PromptVariable] = Field(
|
227
|
+
description="List of variables used in the prompt to replace values to customize the prompt",
|
228
|
+
default_factory=list,
|
229
|
+
)
|
230
|
+
|
231
|
+
model_config = ConfigDict(frozen=True) # Make fields immutable
|
232
|
+
|
233
|
+
def __init__(self, **data):
|
234
|
+
"""Override init to auto-generate unique ID if not provided."""
|
235
|
+
super().__init__(**data)
|
236
|
+
object.__setattr__(self, "id", self.compute_unique_id())
|
237
|
+
|
238
|
+
def compute_unique_id(self) -> str:
|
239
|
+
"""Computes the SHA-1 hash of the prompt as the ID."""
|
240
|
+
return hashlib.sha1(
|
241
|
+
f"{self.prompt}-{self.strategy}-{self.goal}".encode()
|
242
|
+
).hexdigest()
|
243
|
+
|
244
|
+
|
245
|
+
class TestPromptsWithModEval(BaseModel):
|
246
|
+
risk_name: str
|
247
|
+
test_prompts: List[TestPromptWithModEval] = Field(default_factory=list)
|
248
|
+
|
249
|
+
|
250
|
+
# --------------------
|
251
|
+
# Test Prompts with Eval Criteria
|
252
|
+
# -----------------------
|
253
|
+
|
254
|
+
|
255
|
+
class CriteriaBasedPromptEvaluation(BasePromptEvaluation):
|
256
|
+
evaluation_criteria: str = Field(description="Evaluation guidelines")
|
257
|
+
|
258
|
+
|
259
|
+
class TestPromptWithEvalCriteria(BaseTestStrPrompt):
|
260
|
+
id: Optional[str] = Field(
|
261
|
+
default=None,
|
262
|
+
description="Unique ID of the prompt, auto-generated based on content.",
|
263
|
+
)
|
264
|
+
evaluation_method: CriteriaBasedPromptEvaluation = Field(
|
265
|
+
description="Evaluation method for the prompt."
|
266
|
+
)
|
267
|
+
goal: str = Field(description="Goal to be achieved using the prompt")
|
268
|
+
variables: List[PromptVariable] = Field(
|
269
|
+
description="List of variables used in the prompt to replace values to customize the prompt",
|
270
|
+
default_factory=list,
|
271
|
+
)
|
272
|
+
strategy: str = Field(description="Strategy used to generate the prompt")
|
273
|
+
|
274
|
+
model_config = ConfigDict(frozen=True) # Make fields immutable
|
275
|
+
|
276
|
+
def __init__(self, **data):
|
277
|
+
"""Override init to auto-generate unique ID if not provided."""
|
278
|
+
super().__init__(**data)
|
279
|
+
object.__setattr__(self, "id", self.compute_unique_id())
|
280
|
+
|
281
|
+
def compute_unique_id(self) -> str:
|
282
|
+
"""Computes the SHA-1 hash of the prompt as the ID."""
|
283
|
+
return hashlib.sha1(
|
284
|
+
f"{self.prompt}-{self.strategy}-{self.goal}".encode()
|
285
|
+
).hexdigest()
|
286
|
+
|
287
|
+
|
288
|
+
class TestPromptsWithEvalCriteria(BaseModel):
|
289
|
+
risk_name: str
|
290
|
+
test_prompts: List[TestPromptWithEvalCriteria] = Field(default_factory=list)
|
291
|
+
|
292
|
+
|
293
|
+
# --------------------
|
294
|
+
# Red teaming Plan
|
295
|
+
# -----------------------
|
296
|
+
|
297
|
+
|
298
|
+
class TestSuitePrompts(BaseModel):
|
299
|
+
risk_prompts: List[
|
300
|
+
Union[TestPromptsWithEvalCriteria, TestPromptsWithModEval, MultiturnTestPrompts]
|
301
|
+
] = Field(default_factory=list)
|
302
|
+
dataset: str # Dataset name, value of PromptDataset
|
303
|
+
|
304
|
+
@field_validator("dataset")
|
305
|
+
@classmethod
|
306
|
+
def validate_dataset(cls, value):
|
307
|
+
if value not in PromptDataset.values():
|
308
|
+
raise ValueError(
|
309
|
+
f"Invalid dataset: {value}. Must be one of {PromptDataset.values()}."
|
310
|
+
)
|
311
|
+
return value
|
312
|
+
|
313
|
+
|
314
|
+
class RedTeamPlan(BaseModel):
|
315
|
+
scope: RedTeamScope
|
316
|
+
threat_model: AnalysisResult
|
317
|
+
test_suites: List[TestSuitePrompts] = Field(default_factory=list)
|
318
|
+
|
319
|
+
|
320
|
+
class ThreatModelDump(BaseModel):
|
321
|
+
input: str
|
322
|
+
result: AnalysisResult
|
File without changes
|