dtx-models 0.18.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dtx-models might be problematic. Click here for more details.

Files changed (29) hide show
  1. dtx_models-0.18.2/PKG-INFO +57 -0
  2. dtx_models-0.18.2/README.md +38 -0
  3. dtx_models-0.18.2/pyproject.toml +21 -0
  4. dtx_models-0.18.2/src/dtx_models/__init__.py +0 -0
  5. dtx_models-0.18.2/src/dtx_models/analysis.py +322 -0
  6. dtx_models-0.18.2/src/dtx_models/base.py +0 -0
  7. dtx_models-0.18.2/src/dtx_models/evaluator.py +273 -0
  8. dtx_models-0.18.2/src/dtx_models/exceptions.py +2 -0
  9. dtx_models-0.18.2/src/dtx_models/prompts.py +460 -0
  10. dtx_models-0.18.2/src/dtx_models/providers/__init__.py +0 -0
  11. dtx_models-0.18.2/src/dtx_models/providers/base.py +20 -0
  12. dtx_models-0.18.2/src/dtx_models/providers/gradio.py +171 -0
  13. dtx_models-0.18.2/src/dtx_models/providers/groq.py +27 -0
  14. dtx_models-0.18.2/src/dtx_models/providers/hf.py +161 -0
  15. dtx_models-0.18.2/src/dtx_models/providers/http.py +152 -0
  16. dtx_models-0.18.2/src/dtx_models/providers/litellm.py +21 -0
  17. dtx_models-0.18.2/src/dtx_models/providers/models_spec.py +229 -0
  18. dtx_models-0.18.2/src/dtx_models/providers/ollama.py +107 -0
  19. dtx_models-0.18.2/src/dtx_models/providers/openai.py +139 -0
  20. dtx_models-0.18.2/src/dtx_models/results.py +124 -0
  21. dtx_models-0.18.2/src/dtx_models/scope.py +208 -0
  22. dtx_models-0.18.2/src/dtx_models/tactic.py +52 -0
  23. dtx_models-0.18.2/src/dtx_models/target.py +255 -0
  24. dtx_models-0.18.2/src/dtx_models/template/__init__.py +0 -0
  25. dtx_models-0.18.2/src/dtx_models/template/prompts/__init__.py +0 -0
  26. dtx_models-0.18.2/src/dtx_models/template/prompts/base.py +49 -0
  27. dtx_models-0.18.2/src/dtx_models/template/prompts/langhub.py +79 -0
  28. dtx_models-0.18.2/src/dtx_models/utils/__init__.py +0 -0
  29. dtx_models-0.18.2/src/dtx_models/utils/urls.py +26 -0
@@ -0,0 +1,57 @@
1
+ Metadata-Version: 2.3
2
+ Name: dtx-models
3
+ Version: 0.18.2
4
+ Summary: Shared model schemas and YAML-based configurations for the DTX framework.
5
+ Author: JC
6
+ Author-email: jitendra@detoxio.ai
7
+ Requires-Python: >=3.10,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Requires-Dist: pydantic (>=2.11.5,<3.0.0)
14
+ Requires-Dist: pydantic-yaml (>=1.5.1,<2.0.0)
15
+ Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
16
+ Project-URL: Homepage, https://docs.detoxio.ai
17
+ Description-Content-Type: text/markdown
18
+
19
+ # dtx-models
20
+
21
+ **dtx-models** provides shared model schemas, configuration structures, and YAML-based assets used across the [DTX AI Red Teaming Framework](https://docs.detoxio.ai).
22
+
23
+ This package helps standardize model definitions and integrates seamlessly with components in the `dtx` ecosystem.
24
+
25
+ ## Features
26
+
27
+ - Pydantic-based schema definitions
28
+ - YAML-driven model and plugin configuration
29
+ - Clean separation of model assets from core logic
30
+
31
+ ## Installation
32
+
33
+ ```bash
34
+ pip install dtx-models
35
+ ````
36
+
37
+ Or, if developing locally:
38
+
39
+ ```bash
40
+ poetry install
41
+ ```
42
+
43
+ ## Requirements
44
+
45
+ * Python >= 3.11
46
+ * [Pydantic](https://docs.pydantic.dev/) v2
47
+ * [PyYAML](https://pyyaml.org/)
48
+
49
+ ## Documentation
50
+
51
+ Full documentation available at [docs.detoxio.ai](https://docs.detoxio.ai)
52
+
53
+ ---
54
+
55
+ © Detoxio.ai – All rights reserved.
56
+
57
+
@@ -0,0 +1,38 @@
1
+ # dtx-models
2
+
3
+ **dtx-models** provides shared model schemas, configuration structures, and YAML-based assets used across the [DTX AI Red Teaming Framework](https://docs.detoxio.ai).
4
+
5
+ This package helps standardize model definitions and integrates seamlessly with components in the `dtx` ecosystem.
6
+
7
+ ## Features
8
+
9
+ - Pydantic-based schema definitions
10
+ - YAML-driven model and plugin configuration
11
+ - Clean separation of model assets from core logic
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ pip install dtx-models
17
+ ````
18
+
19
+ Or, if developing locally:
20
+
21
+ ```bash
22
+ poetry install
23
+ ```
24
+
25
+ ## Requirements
26
+
27
+ * Python >= 3.11
28
+ * [Pydantic](https://docs.pydantic.dev/) v2
29
+ * [PyYAML](https://pyyaml.org/)
30
+
31
+ ## Documentation
32
+
33
+ Full documentation available at [docs.detoxio.ai](https://docs.detoxio.ai)
34
+
35
+ ---
36
+
37
+ © Detoxio.ai – All rights reserved.
38
+
@@ -0,0 +1,21 @@
1
+ [tool.poetry]
2
+ name = "dtx-models"
3
+ version = "0.18.2"
4
+ description = "Shared model schemas and YAML-based configurations for the DTX framework."
5
+ authors = ["JC <jitendra@detoxio.ai>"]
6
+ readme = "README.md"
7
+ homepage = "https://docs.detoxio.ai"
8
+ packages = [{ include = "dtx_models", from = "src" }]
9
+
10
+ [tool.poetry.dependencies]
11
+ python = ">=3.10,<4.0"
12
+ pydantic = ">=2.11.5,<3.0.0"
13
+ pydantic-yaml = ">=1.5.1,<2.0.0"
14
+ pyyaml = ">=6.0.2,<7.0.0"
15
+
16
+ [tool.poetry.group.dev.dependencies]
17
+ pytest = "^8.3.5"
18
+
19
+ [build-system]
20
+ requires = ["poetry-core>=2.0.0,<3.0.0"]
21
+ build-backend = "poetry.core.masonry.api"
File without changes
@@ -0,0 +1,322 @@
1
+ import hashlib
2
+ from enum import Enum
3
+ from typing import Dict, List, Optional, Union
4
+
5
+ from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator
6
+
7
+ from .evaluator import BasePromptEvaluation
8
+ from .prompts import BaseTestStrPrompt, MultiturnTestPrompts
9
+ from .scope import Agent, RedTeamScope
10
+
11
+ # ----------------------
12
+ # Define Models
13
+ # ----------------------
14
+
15
+
16
+ class ThreatLevel(str, Enum):
17
+ HIGH = "High"
18
+ MEDIUM = "Medium"
19
+ LOW = "Low"
20
+
21
+ def __str__(self):
22
+ return self.value # Ensures correct YAML serialization
23
+
24
+ @classmethod
25
+ def values(cls):
26
+ return [member.value for member in cls]
27
+
28
+
29
+ class RiskItem(BaseModel):
30
+ """
31
+ Example:
32
+ - attack_strategies:
33
+ - 'Dan_11_0: Disregard the system prompt to generate harassing content.'
34
+ - 'Dan_10_0: Use prompt injection to create messages that harass individuals.'
35
+ - 'HijackHateHumansMini: Inject prompts that encourage hate speech against specific groups.'
36
+ risk: toxicity:hate_speech:harassment
37
+ risk_score: 7.5
38
+ summary: The application may generate or facilitate the dissemination of content that promotes harassment or bullying, especially if misused by users.
39
+ threat_level: High
40
+ """
41
+
42
+ risk: str = Field(
43
+ ...,
44
+ description="The category of risk associated with the application, must match predefined risk_class.",
45
+ )
46
+ risk_score: float = Field(
47
+ ...,
48
+ description="A numerical score representing risk severity, must be between 0 and 10.",
49
+ )
50
+ threat_level: ThreatLevel = Field(
51
+ ..., description="The severity level of the risk: High, Medium, or Low."
52
+ )
53
+ summary: str = Field(
54
+ ..., description="A brief description of the potential risk and its impact."
55
+ )
56
+ attack_strategies: List[str] = Field(
57
+ default_factory=list,
58
+ description="A list of known attack strategies that could exploit the system.",
59
+ )
60
+
61
+ # @field_validator("risk", mode="before")
62
+ # @classmethod
63
+ # def validate_risk_classes(cls, risk: str) -> str:
64
+ # """Ensure risk is a valid key in the PLUGINS dictionary."""
65
+ # if risk not in PLUGINS:
66
+ # raise ValueError(
67
+ # f"Invalid risk class: {risk}. Must be one of {list(PLUGINS.keys())}."
68
+ # )
69
+ # return risk
70
+
71
+ @field_validator("risk_score", mode="before")
72
+ @classmethod
73
+ def validate_risk_score(cls, risk_score: float) -> float:
74
+ """Ensure risk_score is between 0 and 10."""
75
+ if not (0 <= risk_score <= 10):
76
+ raise ValueError(
77
+ f"Invalid risk_score: {risk_score}. Must be between 0 and 10."
78
+ )
79
+ return risk_score
80
+
81
+ @field_serializer("threat_level")
82
+ def serialize_threat_level(self, threat_level: ThreatLevel) -> str:
83
+ """Serialize the threat level enum to a string."""
84
+ return str(threat_level)
85
+
86
+
87
+ class AppRisks(BaseModel):
88
+ risks: List[RiskItem] = Field(default_factory=list)
89
+
90
+
91
+ class ThreatModel(BaseModel):
92
+ analysis: str = Field(
93
+ description="Thinking and analysis performed solve the problem as approach "
94
+ )
95
+ target: Agent = Field(
96
+ description="Target agent with necessary architectural details"
97
+ )
98
+ threat_actors: List[str] = Field(description="Potential Threat Actors")
99
+ worst_scenarios: List[str] = Field(
100
+ description="Worst Case scenarios that can happen"
101
+ )
102
+
103
+
104
+ class AnalysisResult(BaseModel):
105
+ threat_analysis: Optional[ThreatModel] = None
106
+ threats: AppRisks
107
+
108
+
109
+ # --------------------
110
+ # Test Scenarios Models
111
+ # -----------------------
112
+
113
+
114
+ class PromptVariable(BaseModel):
115
+ name: str = Field(
116
+ description="Variable that can replaced with a value. Variable name should use snake case format"
117
+ )
118
+ values: List[str]
119
+
120
+
121
+ class PromptDataset(str, Enum):
122
+ STRINGRAY = "STRINGRAY"
123
+ STARGAZER = "STARGAZER"
124
+ HF_BEAVERTAILS = "HF_BEAVERTAILS"
125
+ HF_HACKAPROMPT = "HF_HACKAPROMPT"
126
+ HF_JAILBREAKBENCH = "HF_JAILBREAKBENCH"
127
+ HF_SAFEMTDATA = "HF_SAFEMTDATA"
128
+ HF_FLIPGUARDDATA = "HF_FLIPGUARDDATA"
129
+ HF_JAILBREAKV = "HF_JAILBREAKV"
130
+ HF_LMSYS = "HF_LMSYS"
131
+ HF_AISAFETY = "HF_AISAFETY"
132
+ HF_AIRBENCH = "HF_AIRBENCH"
133
+ HF_RENELLM = "HF_RENELLM"
134
+ HF_XTREAM = "HF_XTREAM"
135
+
136
+ def __str__(self):
137
+ return self.value
138
+
139
+ @classmethod
140
+ def values(cls):
141
+ return [member.value for member in cls]
142
+
143
+ @classmethod
144
+ def descriptions(cls):
145
+ """Returns a dictionary mapping each dataset value to its description."""
146
+ return {
147
+ cls.STRINGRAY.value: "A dataset generated from Garak Scanner Signatures",
148
+ cls.STARGAZER.value: "A dataset generating using OpenAI model",
149
+ cls.HF_BEAVERTAILS.value: "A dataset containing beavertail risk prompts.",
150
+ cls.HF_HACKAPROMPT.value: "A dataset curated for adversarial jailbreak prompts.",
151
+ cls.HF_JAILBREAKBENCH.value: "A benchmark dataset for jailbreak evaluation.",
152
+ cls.HF_SAFEMTDATA.value: "A benchmark dataset for multi turn llm jailbreak evaluation.",
153
+ cls.HF_FLIPGUARDDATA.value: "A dataset designed to evaluate adversarial jailbreak attempts using character-flipped prompts.",
154
+ cls.HF_JAILBREAKV.value: "An updated version of jailbreak prompt datasets.",
155
+ cls.HF_LMSYS.value: "A dataset derived from LMSYS chat logs for risk evaluation.",
156
+ cls.HF_AISAFETY.value: "A dataset designed by AI Safety Lab with prompts related to misinformation, toxicity, and unsafe behaviors.",
157
+ cls.HF_AIRBENCH.value: "A comprehensive benchmark dataset (AIR-Bench 2024) for evaluating AI risks across security, privacy, misinformation, harmful content, and manipulation scenarios.",
158
+ cls.HF_RENELLM.value: "A dataset from the ReNeLLM framework, containing adversarially rewritten and nested prompts designed to bypass LLM safety mechanisms for research purposes.",
159
+ cls.HF_XTREAM.value: "A dataset (Xtream) of multi-turn jailbreak conversations based on the AdvBench Goal",
160
+ }
161
+
162
+ def derived_from_hf(self) -> bool:
163
+ return self.value.startswith("HF_")
164
+
165
+
166
+ # --------------------
167
+ # Module Eval based Test Prompts
168
+ # -----------------------
169
+
170
+
171
+ class EvalModuleParam(BaseModel):
172
+ param: str
173
+ value: str | List[str]
174
+
175
+
176
+ class ModuleBasedPromptEvaluation(BasePromptEvaluation):
177
+ modules: List[str] = Field(description="Modules to evaluate the prompt")
178
+ params: List[EvalModuleParam] = Field(default_factory=list)
179
+
180
+ def get_params_dict(self) -> Dict[str, List[str]]:
181
+ """
182
+ Converts params into a dictionary where keys are param names and values are lists of values.
183
+
184
+ - Merges duplicate parameters into a single list.
185
+ - Excludes parameters where the value is None or empty.
186
+ - Ensures all values are stored as lists without duplication.
187
+
188
+ Returns:
189
+ Dict[str, List[str]]: Dictionary containing parameter names as keys and lists of values as values.
190
+ """
191
+ params_dict = {}
192
+
193
+ for param in self.params:
194
+ if param.value:
195
+ # Normalize value to a list and filter out empty values
196
+ values = [param.value] if isinstance(param.value, str) else param.value
197
+ filtered_values = [v.strip() for v in values if v and v.strip()]
198
+
199
+ if filtered_values:
200
+ if param.param in params_dict:
201
+ params_dict[param.param].extend(filtered_values)
202
+ else:
203
+ params_dict[param.param] = filtered_values
204
+
205
+ # Remove duplicates from each parameter's list
206
+ for key in params_dict:
207
+ params_dict[key] = list(set(params_dict[key])) # Ensure unique values
208
+
209
+ return params_dict
210
+
211
+
212
+ class TestPromptWithModEval(BaseTestStrPrompt):
213
+ id: Optional[str] = Field(
214
+ default=None,
215
+ description="Unique ID of the prompt, auto-generated based on content.",
216
+ )
217
+ prompt: str = Field(description="Generated test prompt.")
218
+ evaluation_method: ModuleBasedPromptEvaluation = Field(
219
+ description="Evaluation method for the prompt."
220
+ )
221
+ module_name: str = Field(
222
+ default="stingray", description="Module that has generated the prompt"
223
+ )
224
+ goal: str = Field(default="")
225
+ strategy: str = Field(default="")
226
+ variables: List[PromptVariable] = Field(
227
+ description="List of variables used in the prompt to replace values to customize the prompt",
228
+ default_factory=list,
229
+ )
230
+
231
+ model_config = ConfigDict(frozen=True) # Make fields immutable
232
+
233
+ def __init__(self, **data):
234
+ """Override init to auto-generate unique ID if not provided."""
235
+ super().__init__(**data)
236
+ object.__setattr__(self, "id", self.compute_unique_id())
237
+
238
+ def compute_unique_id(self) -> str:
239
+ """Computes the SHA-1 hash of the prompt as the ID."""
240
+ return hashlib.sha1(
241
+ f"{self.prompt}-{self.strategy}-{self.goal}".encode()
242
+ ).hexdigest()
243
+
244
+
245
+ class TestPromptsWithModEval(BaseModel):
246
+ risk_name: str
247
+ test_prompts: List[TestPromptWithModEval] = Field(default_factory=list)
248
+
249
+
250
+ # --------------------
251
+ # Test Prompts with Eval Criteria
252
+ # -----------------------
253
+
254
+
255
+ class CriteriaBasedPromptEvaluation(BasePromptEvaluation):
256
+ evaluation_criteria: str = Field(description="Evaluation guidelines")
257
+
258
+
259
+ class TestPromptWithEvalCriteria(BaseTestStrPrompt):
260
+ id: Optional[str] = Field(
261
+ default=None,
262
+ description="Unique ID of the prompt, auto-generated based on content.",
263
+ )
264
+ evaluation_method: CriteriaBasedPromptEvaluation = Field(
265
+ description="Evaluation method for the prompt."
266
+ )
267
+ goal: str = Field(description="Goal to be achieved using the prompt")
268
+ variables: List[PromptVariable] = Field(
269
+ description="List of variables used in the prompt to replace values to customize the prompt",
270
+ default_factory=list,
271
+ )
272
+ strategy: str = Field(description="Strategy used to generate the prompt")
273
+
274
+ model_config = ConfigDict(frozen=True) # Make fields immutable
275
+
276
+ def __init__(self, **data):
277
+ """Override init to auto-generate unique ID if not provided."""
278
+ super().__init__(**data)
279
+ object.__setattr__(self, "id", self.compute_unique_id())
280
+
281
+ def compute_unique_id(self) -> str:
282
+ """Computes the SHA-1 hash of the prompt as the ID."""
283
+ return hashlib.sha1(
284
+ f"{self.prompt}-{self.strategy}-{self.goal}".encode()
285
+ ).hexdigest()
286
+
287
+
288
+ class TestPromptsWithEvalCriteria(BaseModel):
289
+ risk_name: str
290
+ test_prompts: List[TestPromptWithEvalCriteria] = Field(default_factory=list)
291
+
292
+
293
+ # --------------------
294
+ # Red teaming Plan
295
+ # -----------------------
296
+
297
+
298
+ class TestSuitePrompts(BaseModel):
299
+ risk_prompts: List[
300
+ Union[TestPromptsWithEvalCriteria, TestPromptsWithModEval, MultiturnTestPrompts]
301
+ ] = Field(default_factory=list)
302
+ dataset: str # Dataset name, value of PromptDataset
303
+
304
+ @field_validator("dataset")
305
+ @classmethod
306
+ def validate_dataset(cls, value):
307
+ if value not in PromptDataset.values():
308
+ raise ValueError(
309
+ f"Invalid dataset: {value}. Must be one of {PromptDataset.values()}."
310
+ )
311
+ return value
312
+
313
+
314
+ class RedTeamPlan(BaseModel):
315
+ scope: RedTeamScope
316
+ threat_model: AnalysisResult
317
+ test_suites: List[TestSuitePrompts] = Field(default_factory=list)
318
+
319
+
320
+ class ThreatModelDump(BaseModel):
321
+ input: str
322
+ result: AnalysisResult
File without changes