deeprails 1.6.1__tar.gz → 1.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deeprails might be problematic. Click here for more details.

Files changed (85) hide show
  1. deeprails-1.7.0/.release-please-manifest.json +3 -0
  2. {deeprails-1.6.1 → deeprails-1.7.0}/CHANGELOG.md +8 -0
  3. {deeprails-1.6.1 → deeprails-1.7.0}/PKG-INFO +1 -33
  4. {deeprails-1.6.1 → deeprails-1.7.0}/README.md +0 -32
  5. {deeprails-1.6.1 → deeprails-1.7.0}/pyproject.toml +1 -1
  6. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_version.py +1 -1
  7. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/resources/defend.py +43 -43
  8. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/resources/evaluate.py +2 -2
  9. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/resources/monitor.py +2 -2
  10. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/types/defend_create_workflow_params.py +19 -19
  11. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/types/defend_response.py +8 -8
  12. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/types/defend_submit_event_params.py +2 -2
  13. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/types/evaluate_create_params.py +2 -2
  14. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/types/evaluation.py +2 -2
  15. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/types/monitor_submit_event_params.py +1 -1
  16. {deeprails-1.6.1 → deeprails-1.7.0}/tests/api_resources/test_defend.py +14 -20
  17. {deeprails-1.6.1 → deeprails-1.7.0}/tests/test_client.py +10 -26
  18. deeprails-1.6.1/.release-please-manifest.json +0 -3
  19. {deeprails-1.6.1 → deeprails-1.7.0}/.gitignore +0 -0
  20. {deeprails-1.6.1 → deeprails-1.7.0}/CONTRIBUTING.md +0 -0
  21. {deeprails-1.6.1 → deeprails-1.7.0}/LICENSE +0 -0
  22. {deeprails-1.6.1 → deeprails-1.7.0}/api.md +0 -0
  23. {deeprails-1.6.1 → deeprails-1.7.0}/bin/check-release-environment +0 -0
  24. {deeprails-1.6.1 → deeprails-1.7.0}/bin/publish-pypi +0 -0
  25. {deeprails-1.6.1 → deeprails-1.7.0}/examples/.keep +0 -0
  26. {deeprails-1.6.1 → deeprails-1.7.0}/noxfile.py +0 -0
  27. {deeprails-1.6.1 → deeprails-1.7.0}/release-please-config.json +0 -0
  28. {deeprails-1.6.1 → deeprails-1.7.0}/requirements-dev.lock +0 -0
  29. {deeprails-1.6.1 → deeprails-1.7.0}/requirements.lock +0 -0
  30. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/__init__.py +0 -0
  31. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_base_client.py +0 -0
  32. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_client.py +0 -0
  33. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_compat.py +0 -0
  34. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_constants.py +0 -0
  35. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_exceptions.py +0 -0
  36. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_files.py +0 -0
  37. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_models.py +0 -0
  38. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_qs.py +0 -0
  39. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_resource.py +0 -0
  40. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_response.py +0 -0
  41. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_streaming.py +0 -0
  42. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_types.py +0 -0
  43. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_utils/__init__.py +0 -0
  44. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_utils/_compat.py +0 -0
  45. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_utils/_datetime_parse.py +0 -0
  46. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_utils/_logs.py +0 -0
  47. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_utils/_proxy.py +0 -0
  48. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_utils/_reflection.py +0 -0
  49. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_utils/_resources_proxy.py +0 -0
  50. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_utils/_streams.py +0 -0
  51. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_utils/_sync.py +0 -0
  52. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_utils/_transform.py +0 -0
  53. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_utils/_typing.py +0 -0
  54. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/_utils/_utils.py +0 -0
  55. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/lib/.keep +0 -0
  56. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/py.typed +0 -0
  57. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/resources/__init__.py +0 -0
  58. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/types/__init__.py +0 -0
  59. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/types/api_response.py +0 -0
  60. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/types/defend_update_workflow_params.py +0 -0
  61. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/types/monitor_create_params.py +0 -0
  62. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/types/monitor_retrieve_params.py +0 -0
  63. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/types/monitor_retrieve_response.py +0 -0
  64. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/types/monitor_submit_event_response.py +0 -0
  65. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/types/monitor_update_params.py +0 -0
  66. {deeprails-1.6.1 → deeprails-1.7.0}/src/deeprails/types/workflow_event_response.py +0 -0
  67. {deeprails-1.6.1 → deeprails-1.7.0}/tests/__init__.py +0 -0
  68. {deeprails-1.6.1 → deeprails-1.7.0}/tests/api_resources/__init__.py +0 -0
  69. {deeprails-1.6.1 → deeprails-1.7.0}/tests/api_resources/test_evaluate.py +0 -0
  70. {deeprails-1.6.1 → deeprails-1.7.0}/tests/api_resources/test_monitor.py +0 -0
  71. {deeprails-1.6.1 → deeprails-1.7.0}/tests/conftest.py +0 -0
  72. {deeprails-1.6.1 → deeprails-1.7.0}/tests/sample_file.txt +0 -0
  73. {deeprails-1.6.1 → deeprails-1.7.0}/tests/test_deepcopy.py +0 -0
  74. {deeprails-1.6.1 → deeprails-1.7.0}/tests/test_extract_files.py +0 -0
  75. {deeprails-1.6.1 → deeprails-1.7.0}/tests/test_files.py +0 -0
  76. {deeprails-1.6.1 → deeprails-1.7.0}/tests/test_models.py +0 -0
  77. {deeprails-1.6.1 → deeprails-1.7.0}/tests/test_qs.py +0 -0
  78. {deeprails-1.6.1 → deeprails-1.7.0}/tests/test_required_args.py +0 -0
  79. {deeprails-1.6.1 → deeprails-1.7.0}/tests/test_response.py +0 -0
  80. {deeprails-1.6.1 → deeprails-1.7.0}/tests/test_streaming.py +0 -0
  81. {deeprails-1.6.1 → deeprails-1.7.0}/tests/test_transform.py +0 -0
  82. {deeprails-1.6.1 → deeprails-1.7.0}/tests/test_utils/test_datetime_parse.py +0 -0
  83. {deeprails-1.6.1 → deeprails-1.7.0}/tests/test_utils/test_proxy.py +0 -0
  84. {deeprails-1.6.1 → deeprails-1.7.0}/tests/test_utils/test_typing.py +0 -0
  85. {deeprails-1.6.1 → deeprails-1.7.0}/tests/utils.py +0 -0
@@ -0,0 +1,3 @@
1
+ {
2
+ ".": "1.7.0"
3
+ }
@@ -1,5 +1,13 @@
1
1
  # Changelog
2
2
 
3
+ ## 1.7.0 (2025-10-22)
4
+
5
+ Full Changelog: [v1.6.1...v1.7.0](https://github.com/deeprails/deeprails-sdk-python/compare/v1.6.1...v1.7.0)
6
+
7
+ ### Features
8
+
9
+ * **api:** update defend naming and data structures ([ba934de](https://github.com/deeprails/deeprails-sdk-python/commit/ba934de8812d71da159a00fa8283876e682aeb7b))
10
+
3
11
  ## 1.6.1 (2025-10-22)
4
12
 
5
13
  Full Changelog: [v1.6.0...v1.6.1](https://github.com/deeprails/deeprails-sdk-python/compare/v1.6.0...v1.6.1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: deeprails
3
- Version: 1.6.1
3
+ Version: 1.7.0
4
4
  Summary: The official Python library for the deeprails API
5
5
  Project-URL: Homepage, https://docs.deeprails.com/
6
6
  Project-URL: Repository, https://github.com/deeprails/deeprails-sdk-python
@@ -67,10 +67,6 @@ client = Deeprails(
67
67
 
68
68
  defend_response = client.defend.create_workflow(
69
69
  improvement_action="fixit",
70
- metrics={
71
- "completeness": 0.7,
72
- "instruction_adherence": 0.75,
73
- },
74
70
  name="Push Alert Workflow",
75
71
  type="custom",
76
72
  )
@@ -99,10 +95,6 @@ client = AsyncDeeprails(
99
95
  async def main() -> None:
100
96
  defend_response = await client.defend.create_workflow(
101
97
  improvement_action="fixit",
102
- metrics={
103
- "completeness": 0.7,
104
- "instruction_adherence": 0.75,
105
- },
106
98
  name="Push Alert Workflow",
107
99
  type="custom",
108
100
  )
@@ -140,10 +132,6 @@ async def main() -> None:
140
132
  ) as client:
141
133
  defend_response = await client.defend.create_workflow(
142
134
  improvement_action="fixit",
143
- metrics={
144
- "completeness": 0.7,
145
- "instruction_adherence": 0.75,
146
- },
147
135
  name="Push Alert Workflow",
148
136
  type="custom",
149
137
  )
@@ -199,10 +187,6 @@ client = Deeprails()
199
187
  try:
200
188
  client.defend.create_workflow(
201
189
  improvement_action="fixit",
202
- metrics={
203
- "completeness": 0.7,
204
- "instruction_adherence": 0.75,
205
- },
206
190
  name="Push Alert Workflow",
207
191
  type="custom",
208
192
  )
@@ -250,10 +234,6 @@ client = Deeprails(
250
234
  # Or, configure per-request:
251
235
  client.with_options(max_retries=5).defend.create_workflow(
252
236
  improvement_action="fixit",
253
- metrics={
254
- "completeness": 0.7,
255
- "instruction_adherence": 0.75,
256
- },
257
237
  name="Push Alert Workflow",
258
238
  type="custom",
259
239
  )
@@ -281,10 +261,6 @@ client = Deeprails(
281
261
  # Override per-request:
282
262
  client.with_options(timeout=5.0).defend.create_workflow(
283
263
  improvement_action="fixit",
284
- metrics={
285
- "completeness": 0.7,
286
- "instruction_adherence": 0.75,
287
- },
288
264
  name="Push Alert Workflow",
289
265
  type="custom",
290
266
  )
@@ -330,10 +306,6 @@ from deeprails import Deeprails
330
306
  client = Deeprails()
331
307
  response = client.defend.with_raw_response.create_workflow(
332
308
  improvement_action="fixit",
333
- metrics={
334
- "completeness": 0.7,
335
- "instruction_adherence": 0.75,
336
- },
337
309
  name="Push Alert Workflow",
338
310
  type="custom",
339
311
  )
@@ -356,10 +328,6 @@ To stream the response body, use `.with_streaming_response` instead, which requi
356
328
  ```python
357
329
  with client.defend.with_streaming_response.create_workflow(
358
330
  improvement_action="fixit",
359
- metrics={
360
- "completeness": 0.7,
361
- "instruction_adherence": 0.75,
362
- },
363
331
  name="Push Alert Workflow",
364
332
  type="custom",
365
333
  ) as response:
@@ -32,10 +32,6 @@ client = Deeprails(
32
32
 
33
33
  defend_response = client.defend.create_workflow(
34
34
  improvement_action="fixit",
35
- metrics={
36
- "completeness": 0.7,
37
- "instruction_adherence": 0.75,
38
- },
39
35
  name="Push Alert Workflow",
40
36
  type="custom",
41
37
  )
@@ -64,10 +60,6 @@ client = AsyncDeeprails(
64
60
  async def main() -> None:
65
61
  defend_response = await client.defend.create_workflow(
66
62
  improvement_action="fixit",
67
- metrics={
68
- "completeness": 0.7,
69
- "instruction_adherence": 0.75,
70
- },
71
63
  name="Push Alert Workflow",
72
64
  type="custom",
73
65
  )
@@ -105,10 +97,6 @@ async def main() -> None:
105
97
  ) as client:
106
98
  defend_response = await client.defend.create_workflow(
107
99
  improvement_action="fixit",
108
- metrics={
109
- "completeness": 0.7,
110
- "instruction_adherence": 0.75,
111
- },
112
100
  name="Push Alert Workflow",
113
101
  type="custom",
114
102
  )
@@ -164,10 +152,6 @@ client = Deeprails()
164
152
  try:
165
153
  client.defend.create_workflow(
166
154
  improvement_action="fixit",
167
- metrics={
168
- "completeness": 0.7,
169
- "instruction_adherence": 0.75,
170
- },
171
155
  name="Push Alert Workflow",
172
156
  type="custom",
173
157
  )
@@ -215,10 +199,6 @@ client = Deeprails(
215
199
  # Or, configure per-request:
216
200
  client.with_options(max_retries=5).defend.create_workflow(
217
201
  improvement_action="fixit",
218
- metrics={
219
- "completeness": 0.7,
220
- "instruction_adherence": 0.75,
221
- },
222
202
  name="Push Alert Workflow",
223
203
  type="custom",
224
204
  )
@@ -246,10 +226,6 @@ client = Deeprails(
246
226
  # Override per-request:
247
227
  client.with_options(timeout=5.0).defend.create_workflow(
248
228
  improvement_action="fixit",
249
- metrics={
250
- "completeness": 0.7,
251
- "instruction_adherence": 0.75,
252
- },
253
229
  name="Push Alert Workflow",
254
230
  type="custom",
255
231
  )
@@ -295,10 +271,6 @@ from deeprails import Deeprails
295
271
  client = Deeprails()
296
272
  response = client.defend.with_raw_response.create_workflow(
297
273
  improvement_action="fixit",
298
- metrics={
299
- "completeness": 0.7,
300
- "instruction_adherence": 0.75,
301
- },
302
274
  name="Push Alert Workflow",
303
275
  type="custom",
304
276
  )
@@ -321,10 +293,6 @@ To stream the response body, use `.with_streaming_response` instead, which requi
321
293
  ```python
322
294
  with client.defend.with_streaming_response.create_workflow(
323
295
  improvement_action="fixit",
324
- metrics={
325
- "completeness": 0.7,
326
- "instruction_adherence": 0.75,
327
- },
328
296
  name="Push Alert Workflow",
329
297
  type="custom",
330
298
  ) as response:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "deeprails"
3
- version = "1.6.1"
3
+ version = "1.7.0"
4
4
  description = "The official Python library for the deeprails API"
5
5
  dynamic = ["readme"]
6
6
  license = "Apache-2.0"
@@ -1,4 +1,4 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
3
  __title__ = "deeprails"
4
- __version__ = "1.6.1" # x-release-please-version
4
+ __version__ = "1.7.0" # x-release-please-version
@@ -2,7 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import Dict, Optional
5
+ from typing import Dict
6
6
  from typing_extensions import Literal
7
7
 
8
8
  import httpx
@@ -48,13 +48,13 @@ class DefendResource(SyncAPIResource):
48
48
  def create_workflow(
49
49
  self,
50
50
  *,
51
- improvement_action: Optional[Literal["regenerate", "fixit"]],
52
- metrics: Dict[str, float],
51
+ improvement_action: Literal["regen", "fixit", "do_nothing"],
53
52
  name: str,
54
53
  type: Literal["automatic", "custom"],
55
- automatic_tolerance: Literal["low", "medium", "high"] | Omit = omit,
54
+ automatic_hallucination_tolerance_levels: Dict[str, Literal["low", "medium", "high"]] | Omit = omit,
55
+ custom_hallucination_threshold_values: Dict[str, float] | Omit = omit,
56
56
  description: str | Omit = omit,
57
- max_retries: int | Omit = omit,
57
+ max_improvement_attempt: int | Omit = omit,
58
58
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
59
59
  # The extra values given here take precedence over values defined on the client or passed to this method.
60
60
  extra_headers: Headers | None = None,
@@ -68,16 +68,10 @@ class DefendResource(SyncAPIResource):
68
68
 
69
69
  Args:
70
70
  improvement_action: The action used to improve outputs that fail one or guardrail metrics for the
71
- workflow events. May be `regenerate`, `fixit`, or null which represents “do
72
- nothing”. Regenerate runs the user's input prompt with minor induced variance.
73
- Fixit attempts to directly address the shortcomings of the output using the
74
- guardrail failure rationale. Do nothing does not attempt any improvement.
75
-
76
- metrics: Mapping of guardrail metrics to floating point threshold values. If the workflow
77
- type is automatic, only the metric names are used (`automatic_tolerance`
78
- determines thresholds). Possible metrics are `correctness`, `completeness`,
79
- `instruction_adherence`, `context_adherence`, `ground_truth_adherence`, or
80
- `comprehensive_safety`.
71
+ workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the user's
72
+ input prompt with minor induced variance. FixIt attempts to directly address the
73
+ shortcomings of the output using the guardrail failure rationale. Do Nothing
74
+ does not attempt any improvement.
81
75
 
82
76
  name: Name of the workflow.
83
77
 
@@ -87,12 +81,18 @@ class DefendResource(SyncAPIResource):
87
81
  set the threshold for each metric as a floating point number between 0.0 and
88
82
  1.0.
89
83
 
90
- automatic_tolerance: Hallucination tolerance for automatic workflows; may be `low`, `medium`, or
91
- `high`. Ignored if `type` is `custom`.
84
+ automatic_hallucination_tolerance_levels: Mapping of guardrail metrics to hallucination tolerance levels (either `low`,
85
+ `medium`, or `high`). Possible metrics are `completeness`,
86
+ `instruction_adherence`, `context_adherence`, `ground_truth_adherence`, or
87
+ `comprehensive_safety`.
88
+
89
+ custom_hallucination_threshold_values: Mapping of guardrail metrics to floating point threshold values. Possible
90
+ metrics are `correctness`, `completeness`, `instruction_adherence`,
91
+ `context_adherence`, `ground_truth_adherence`, or `comprehensive_safety`.
92
92
 
93
93
  description: Description for the workflow.
94
94
 
95
- max_retries: Max. number of improvement action retries until a given event passes the
95
+ max_improvement_attempt: Max. number of improvement action retries until a given event passes the
96
96
  guardrails. Defaults to 10.
97
97
 
98
98
  extra_headers: Send extra headers
@@ -108,12 +108,12 @@ class DefendResource(SyncAPIResource):
108
108
  body=maybe_transform(
109
109
  {
110
110
  "improvement_action": improvement_action,
111
- "metrics": metrics,
112
111
  "name": name,
113
112
  "type": type,
114
- "automatic_tolerance": automatic_tolerance,
113
+ "automatic_hallucination_tolerance_levels": automatic_hallucination_tolerance_levels,
114
+ "custom_hallucination_threshold_values": custom_hallucination_threshold_values,
115
115
  "description": description,
116
- "max_retries": max_retries,
116
+ "max_improvement_attempt": max_improvement_attempt,
117
117
  },
118
118
  defend_create_workflow_params.DefendCreateWorkflowParams,
119
119
  ),
@@ -214,7 +214,7 @@ class DefendResource(SyncAPIResource):
214
214
 
215
215
  Args:
216
216
  model_input: A dictionary of inputs sent to the LLM to generate output. The dictionary must
217
- contain at least one of `user_prompt` or `system_prompt`. For
217
+ contain at least `user_prompt` or `system_prompt` field. For
218
218
  ground_truth_aherence guadrail metric, `ground_truth` should be provided.
219
219
 
220
220
  model_output: Output generated by the LLM to be evaluated.
@@ -326,13 +326,13 @@ class AsyncDefendResource(AsyncAPIResource):
326
326
  async def create_workflow(
327
327
  self,
328
328
  *,
329
- improvement_action: Optional[Literal["regenerate", "fixit"]],
330
- metrics: Dict[str, float],
329
+ improvement_action: Literal["regen", "fixit", "do_nothing"],
331
330
  name: str,
332
331
  type: Literal["automatic", "custom"],
333
- automatic_tolerance: Literal["low", "medium", "high"] | Omit = omit,
332
+ automatic_hallucination_tolerance_levels: Dict[str, Literal["low", "medium", "high"]] | Omit = omit,
333
+ custom_hallucination_threshold_values: Dict[str, float] | Omit = omit,
334
334
  description: str | Omit = omit,
335
- max_retries: int | Omit = omit,
335
+ max_improvement_attempt: int | Omit = omit,
336
336
  # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
337
337
  # The extra values given here take precedence over values defined on the client or passed to this method.
338
338
  extra_headers: Headers | None = None,
@@ -346,16 +346,10 @@ class AsyncDefendResource(AsyncAPIResource):
346
346
 
347
347
  Args:
348
348
  improvement_action: The action used to improve outputs that fail one or guardrail metrics for the
349
- workflow events. May be `regenerate`, `fixit`, or null which represents “do
350
- nothing”. Regenerate runs the user's input prompt with minor induced variance.
351
- Fixit attempts to directly address the shortcomings of the output using the
352
- guardrail failure rationale. Do nothing does not attempt any improvement.
353
-
354
- metrics: Mapping of guardrail metrics to floating point threshold values. If the workflow
355
- type is automatic, only the metric names are used (`automatic_tolerance`
356
- determines thresholds). Possible metrics are `correctness`, `completeness`,
357
- `instruction_adherence`, `context_adherence`, `ground_truth_adherence`, or
358
- `comprehensive_safety`.
349
+ workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the user's
350
+ input prompt with minor induced variance. FixIt attempts to directly address the
351
+ shortcomings of the output using the guardrail failure rationale. Do Nothing
352
+ does not attempt any improvement.
359
353
 
360
354
  name: Name of the workflow.
361
355
 
@@ -365,12 +359,18 @@ class AsyncDefendResource(AsyncAPIResource):
365
359
  set the threshold for each metric as a floating point number between 0.0 and
366
360
  1.0.
367
361
 
368
- automatic_tolerance: Hallucination tolerance for automatic workflows; may be `low`, `medium`, or
369
- `high`. Ignored if `type` is `custom`.
362
+ automatic_hallucination_tolerance_levels: Mapping of guardrail metrics to hallucination tolerance levels (either `low`,
363
+ `medium`, or `high`). Possible metrics are `completeness`,
364
+ `instruction_adherence`, `context_adherence`, `ground_truth_adherence`, or
365
+ `comprehensive_safety`.
366
+
367
+ custom_hallucination_threshold_values: Mapping of guardrail metrics to floating point threshold values. Possible
368
+ metrics are `correctness`, `completeness`, `instruction_adherence`,
369
+ `context_adherence`, `ground_truth_adherence`, or `comprehensive_safety`.
370
370
 
371
371
  description: Description for the workflow.
372
372
 
373
- max_retries: Max. number of improvement action retries until a given event passes the
373
+ max_improvement_attempt: Max. number of improvement action retries until a given event passes the
374
374
  guardrails. Defaults to 10.
375
375
 
376
376
  extra_headers: Send extra headers
@@ -386,12 +386,12 @@ class AsyncDefendResource(AsyncAPIResource):
386
386
  body=await async_maybe_transform(
387
387
  {
388
388
  "improvement_action": improvement_action,
389
- "metrics": metrics,
390
389
  "name": name,
391
390
  "type": type,
392
- "automatic_tolerance": automatic_tolerance,
391
+ "automatic_hallucination_tolerance_levels": automatic_hallucination_tolerance_levels,
392
+ "custom_hallucination_threshold_values": custom_hallucination_threshold_values,
393
393
  "description": description,
394
- "max_retries": max_retries,
394
+ "max_improvement_attempt": max_improvement_attempt,
395
395
  },
396
396
  defend_create_workflow_params.DefendCreateWorkflowParams,
397
397
  ),
@@ -492,7 +492,7 @@ class AsyncDefendResource(AsyncAPIResource):
492
492
 
493
493
  Args:
494
494
  model_input: A dictionary of inputs sent to the LLM to generate output. The dictionary must
495
- contain at least one of `user_prompt` or `system_prompt`. For
495
+ contain at least `user_prompt` or `system_prompt` field. For
496
496
  ground_truth_aherence guadrail metric, `ground_truth` should be provided.
497
497
 
498
498
  model_output: Output generated by the LLM to be evaluated.
@@ -76,7 +76,7 @@ class EvaluateResource(SyncAPIResource):
76
76
 
77
77
  Args:
78
78
  model_input: A dictionary of inputs sent to the LLM to generate output. The dictionary must
79
- contain at least one of `user_prompt` or `system_prompt`. For
79
+ contain at least `user_prompt` or `system_prompt` field. For
80
80
  ground_truth_aherence guadrail metric, `ground_truth` should be provided.
81
81
 
82
82
  model_output: Output generated by the LLM to be evaluated.
@@ -207,7 +207,7 @@ class AsyncEvaluateResource(AsyncAPIResource):
207
207
 
208
208
  Args:
209
209
  model_input: A dictionary of inputs sent to the LLM to generate output. The dictionary must
210
- contain at least one of `user_prompt` or `system_prompt`. For
210
+ contain at least `user_prompt` or `system_prompt` field. For
211
211
  ground_truth_aherence guadrail metric, `ground_truth` should be provided.
212
212
 
213
213
  model_output: Output generated by the LLM to be evaluated.
@@ -220,7 +220,7 @@ class MonitorResource(SyncAPIResource):
220
220
  `ground_truth_adherence`, and/or `comprehensive_safety`.
221
221
 
222
222
  model_input: A dictionary of inputs sent to the LLM to generate output. The dictionary must
223
- contain at least one of `user_prompt` or `system_prompt`. For
223
+ contain at least a `user_prompt` or `system_prompt` field. For
224
224
  ground_truth_aherence guadrail metric, `ground_truth` should be provided.
225
225
 
226
226
  model_output: Output generated by the LLM to be evaluated.
@@ -458,7 +458,7 @@ class AsyncMonitorResource(AsyncAPIResource):
458
458
  `ground_truth_adherence`, and/or `comprehensive_safety`.
459
459
 
460
460
  model_input: A dictionary of inputs sent to the LLM to generate output. The dictionary must
461
- contain at least one of `user_prompt` or `system_prompt`. For
461
+ contain at least a `user_prompt` or `system_prompt` field. For
462
462
  ground_truth_aherence guadrail metric, `ground_truth` should be provided.
463
463
 
464
464
  model_output: Output generated by the LLM to be evaluated.
@@ -2,29 +2,20 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import Dict, Optional
5
+ from typing import Dict
6
6
  from typing_extensions import Literal, Required, TypedDict
7
7
 
8
8
  __all__ = ["DefendCreateWorkflowParams"]
9
9
 
10
10
 
11
11
  class DefendCreateWorkflowParams(TypedDict, total=False):
12
- improvement_action: Required[Optional[Literal["regenerate", "fixit"]]]
12
+ improvement_action: Required[Literal["regen", "fixit", "do_nothing"]]
13
13
  """
14
14
  The action used to improve outputs that fail one or guardrail metrics for the
15
- workflow events. May be `regenerate`, `fixit`, or null which represents “do
16
- nothing”. Regenerate runs the user's input prompt with minor induced variance.
17
- Fixit attempts to directly address the shortcomings of the output using the
18
- guardrail failure rationale. Do nothing does not attempt any improvement.
19
- """
20
-
21
- metrics: Required[Dict[str, float]]
22
- """Mapping of guardrail metrics to floating point threshold values.
23
-
24
- If the workflow type is automatic, only the metric names are used
25
- (`automatic_tolerance` determines thresholds). Possible metrics are
26
- `correctness`, `completeness`, `instruction_adherence`, `context_adherence`,
27
- `ground_truth_adherence`, or `comprehensive_safety`.
15
+ workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the user's
16
+ input prompt with minor induced variance. FixIt attempts to directly address the
17
+ shortcomings of the output using the guardrail failure rationale. Do Nothing
18
+ does not attempt any improvement.
28
19
  """
29
20
 
30
21
  name: Required[str]
@@ -39,16 +30,25 @@ class DefendCreateWorkflowParams(TypedDict, total=False):
39
30
  1.0.
40
31
  """
41
32
 
42
- automatic_tolerance: Literal["low", "medium", "high"]
33
+ automatic_hallucination_tolerance_levels: Dict[str, Literal["low", "medium", "high"]]
43
34
  """
44
- Hallucination tolerance for automatic workflows; may be `low`, `medium`, or
45
- `high`. Ignored if `type` is `custom`.
35
+ Mapping of guardrail metrics to hallucination tolerance levels (either `low`,
36
+ `medium`, or `high`). Possible metrics are `completeness`,
37
+ `instruction_adherence`, `context_adherence`, `ground_truth_adherence`, or
38
+ `comprehensive_safety`.
39
+ """
40
+
41
+ custom_hallucination_threshold_values: Dict[str, float]
42
+ """Mapping of guardrail metrics to floating point threshold values.
43
+
44
+ Possible metrics are `correctness`, `completeness`, `instruction_adherence`,
45
+ `context_adherence`, `ground_truth_adherence`, or `comprehensive_safety`.
46
46
  """
47
47
 
48
48
  description: str
49
49
  """Description for the workflow."""
50
50
 
51
- max_retries: int
51
+ max_improvement_attempt: int
52
52
  """Max.
53
53
 
54
54
  number of improvement action retries until a given event passes the guardrails.
@@ -22,16 +22,16 @@ class DefendResponse(BaseModel):
22
22
  description: Optional[str] = None
23
23
  """Description for the workflow."""
24
24
 
25
- improvement_action: Optional[Literal["regenerate", "fixit"]] = None
25
+ improvement_action: Optional[Literal["regen", "fixit", "do_nothing"]] = None
26
26
  """
27
27
  The action used to improve outputs that fail one or more guardrail metrics for
28
- the workflow events. May be `regenerate`, `fixit`, or null which represents “do
29
- nothing”. Regenerate runs the user's input prompt with minor induced variance.
30
- Fixit attempts to directly address the shortcomings of the output using the
31
- guardrail failure rationale. Do nothing does not attempt any improvement.
28
+ the workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the
29
+ user's input prompt with minor induced variance. FixIt attempts to directly
30
+ address the shortcomings of the output using the guardrail failure rationale. Do
31
+ Nothing does not attempt any improvement.
32
32
  """
33
33
 
34
- max_retries: Optional[int] = None
34
+ max_improvement_attempt: Optional[int] = None
35
35
  """Max.
36
36
 
37
37
  number of improvement action retries until a given event passes the guardrails.
@@ -40,10 +40,10 @@ class DefendResponse(BaseModel):
40
40
  modified_at: Optional[datetime] = None
41
41
  """The most recent time the workflow was modified in UTC."""
42
42
 
43
- status: Optional[Literal["archived", "active"]] = None
43
+ status: Optional[Literal["inactive", "active"]] = None
44
44
  """Status of the selected workflow.
45
45
 
46
- May be `archived` or `active`. Archived workflows will not accept events.
46
+ May be `inactive` or `active`. Inactive workflows will not accept events.
47
47
  """
48
48
 
49
49
  success_rate: Optional[float] = None
@@ -11,8 +11,8 @@ class DefendSubmitEventParams(TypedDict, total=False):
11
11
  model_input: Required[ModelInput]
12
12
  """A dictionary of inputs sent to the LLM to generate output.
13
13
 
14
- The dictionary must contain at least one of `user_prompt` or `system_prompt`.
15
- For ground_truth_aherence guadrail metric, `ground_truth` should be provided.
14
+ The dictionary must contain at least `user_prompt` or `system_prompt` field. For
15
+ ground_truth_aherence guadrail metric, `ground_truth` should be provided.
16
16
  """
17
17
 
18
18
  model_output: Required[str]
@@ -12,8 +12,8 @@ class EvaluateCreateParams(TypedDict, total=False):
12
12
  model_input: Required[ModelInput]
13
13
  """A dictionary of inputs sent to the LLM to generate output.
14
14
 
15
- The dictionary must contain at least one of `user_prompt` or `system_prompt`.
16
- For ground_truth_aherence guadrail metric, `ground_truth` should be provided.
15
+ The dictionary must contain at least `user_prompt` or `system_prompt` field. For
16
+ ground_truth_aherence guadrail metric, `ground_truth` should be provided.
17
17
  """
18
18
 
19
19
  model_output: Required[str]
@@ -32,8 +32,8 @@ class Evaluation(BaseModel):
32
32
  api_model_input: ModelInput = FieldInfo(alias="model_input")
33
33
  """A dictionary of inputs sent to the LLM to generate output.
34
34
 
35
- The dictionary must contain at least one of `user_prompt` or `system_prompt`.
36
- For ground_truth_aherence guadrail metric, `ground_truth` should be provided.
35
+ The dictionary must contain at least `user_prompt` or `system_prompt` field. For
36
+ ground_truth_aherence guadrail metric, `ground_truth` should be provided.
37
37
  """
38
38
 
39
39
  api_model_output: str = FieldInfo(alias="model_output")
@@ -31,7 +31,7 @@ class MonitorSubmitEventParams(TypedDict, total=False):
31
31
  model_input: Required[ModelInput]
32
32
  """A dictionary of inputs sent to the LLM to generate output.
33
33
 
34
- The dictionary must contain at least one of `user_prompt` or `system_prompt`.
34
+ The dictionary must contain at least a `user_prompt` or `system_prompt` field.
35
35
  For ground_truth_aherence guadrail metric, `ground_truth` should be provided.
36
36
  """
37
37
 
@@ -24,8 +24,7 @@ class TestDefend:
24
24
  @parametrize
25
25
  def test_method_create_workflow(self, client: Deeprails) -> None:
26
26
  defend = client.defend.create_workflow(
27
- improvement_action="regenerate",
28
- metrics={"foo": 0},
27
+ improvement_action="regen",
29
28
  name="name",
30
29
  type="automatic",
31
30
  )
@@ -35,13 +34,13 @@ class TestDefend:
35
34
  @parametrize
36
35
  def test_method_create_workflow_with_all_params(self, client: Deeprails) -> None:
37
36
  defend = client.defend.create_workflow(
38
- improvement_action="regenerate",
39
- metrics={"foo": 0},
37
+ improvement_action="regen",
40
38
  name="name",
41
39
  type="automatic",
42
- automatic_tolerance="low",
40
+ automatic_hallucination_tolerance_levels={"foo": "low"},
41
+ custom_hallucination_threshold_values={"foo": 0},
43
42
  description="description",
44
- max_retries=0,
43
+ max_improvement_attempt=0,
45
44
  )
46
45
  assert_matches_type(DefendResponse, defend, path=["response"])
47
46
 
@@ -49,8 +48,7 @@ class TestDefend:
49
48
  @parametrize
50
49
  def test_raw_response_create_workflow(self, client: Deeprails) -> None:
51
50
  response = client.defend.with_raw_response.create_workflow(
52
- improvement_action="regenerate",
53
- metrics={"foo": 0},
51
+ improvement_action="regen",
54
52
  name="name",
55
53
  type="automatic",
56
54
  )
@@ -64,8 +62,7 @@ class TestDefend:
64
62
  @parametrize
65
63
  def test_streaming_response_create_workflow(self, client: Deeprails) -> None:
66
64
  with client.defend.with_streaming_response.create_workflow(
67
- improvement_action="regenerate",
68
- metrics={"foo": 0},
65
+ improvement_action="regen",
69
66
  name="name",
70
67
  type="automatic",
71
68
  ) as response:
@@ -308,8 +305,7 @@ class TestAsyncDefend:
308
305
  @parametrize
309
306
  async def test_method_create_workflow(self, async_client: AsyncDeeprails) -> None:
310
307
  defend = await async_client.defend.create_workflow(
311
- improvement_action="regenerate",
312
- metrics={"foo": 0},
308
+ improvement_action="regen",
313
309
  name="name",
314
310
  type="automatic",
315
311
  )
@@ -319,13 +315,13 @@ class TestAsyncDefend:
319
315
  @parametrize
320
316
  async def test_method_create_workflow_with_all_params(self, async_client: AsyncDeeprails) -> None:
321
317
  defend = await async_client.defend.create_workflow(
322
- improvement_action="regenerate",
323
- metrics={"foo": 0},
318
+ improvement_action="regen",
324
319
  name="name",
325
320
  type="automatic",
326
- automatic_tolerance="low",
321
+ automatic_hallucination_tolerance_levels={"foo": "low"},
322
+ custom_hallucination_threshold_values={"foo": 0},
327
323
  description="description",
328
- max_retries=0,
324
+ max_improvement_attempt=0,
329
325
  )
330
326
  assert_matches_type(DefendResponse, defend, path=["response"])
331
327
 
@@ -333,8 +329,7 @@ class TestAsyncDefend:
333
329
  @parametrize
334
330
  async def test_raw_response_create_workflow(self, async_client: AsyncDeeprails) -> None:
335
331
  response = await async_client.defend.with_raw_response.create_workflow(
336
- improvement_action="regenerate",
337
- metrics={"foo": 0},
332
+ improvement_action="regen",
338
333
  name="name",
339
334
  type="automatic",
340
335
  )
@@ -348,8 +343,7 @@ class TestAsyncDefend:
348
343
  @parametrize
349
344
  async def test_streaming_response_create_workflow(self, async_client: AsyncDeeprails) -> None:
350
345
  async with async_client.defend.with_streaming_response.create_workflow(
351
- improvement_action="regenerate",
352
- metrics={"foo": 0},
346
+ improvement_action="regen",
353
347
  name="name",
354
348
  type="automatic",
355
349
  ) as response:
@@ -718,7 +718,7 @@ class TestDeeprails:
718
718
 
719
719
  with pytest.raises(APITimeoutError):
720
720
  client.defend.with_streaming_response.create_workflow(
721
- improvement_action="regenerate", metrics={"foo": 0}, name="name", type="automatic"
721
+ improvement_action="regen", name="name", type="automatic"
722
722
  ).__enter__()
723
723
 
724
724
  assert _get_open_connections(self.client) == 0
@@ -730,7 +730,7 @@ class TestDeeprails:
730
730
 
731
731
  with pytest.raises(APIStatusError):
732
732
  client.defend.with_streaming_response.create_workflow(
733
- improvement_action="regenerate", metrics={"foo": 0}, name="name", type="automatic"
733
+ improvement_action="regen", name="name", type="automatic"
734
734
  ).__enter__()
735
735
  assert _get_open_connections(self.client) == 0
736
736
 
@@ -761,7 +761,7 @@ class TestDeeprails:
761
761
  respx_mock.post("/defend").mock(side_effect=retry_handler)
762
762
 
763
763
  response = client.defend.with_raw_response.create_workflow(
764
- improvement_action="regenerate", metrics={"foo": 0}, name="name", type="automatic"
764
+ improvement_action="regen", name="name", type="automatic"
765
765
  )
766
766
 
767
767
  assert response.retries_taken == failures_before_success
@@ -787,11 +787,7 @@ class TestDeeprails:
787
787
  respx_mock.post("/defend").mock(side_effect=retry_handler)
788
788
 
789
789
  response = client.defend.with_raw_response.create_workflow(
790
- improvement_action="regenerate",
791
- metrics={"foo": 0},
792
- name="name",
793
- type="automatic",
794
- extra_headers={"x-stainless-retry-count": Omit()},
790
+ improvement_action="regen", name="name", type="automatic", extra_headers={"x-stainless-retry-count": Omit()}
795
791
  )
796
792
 
797
793
  assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
@@ -816,11 +812,7 @@ class TestDeeprails:
816
812
  respx_mock.post("/defend").mock(side_effect=retry_handler)
817
813
 
818
814
  response = client.defend.with_raw_response.create_workflow(
819
- improvement_action="regenerate",
820
- metrics={"foo": 0},
821
- name="name",
822
- type="automatic",
823
- extra_headers={"x-stainless-retry-count": "42"},
815
+ improvement_action="regen", name="name", type="automatic", extra_headers={"x-stainless-retry-count": "42"}
824
816
  )
825
817
 
826
818
  assert response.http_request.headers.get("x-stainless-retry-count") == "42"
@@ -1553,7 +1545,7 @@ class TestAsyncDeeprails:
1553
1545
 
1554
1546
  with pytest.raises(APITimeoutError):
1555
1547
  await async_client.defend.with_streaming_response.create_workflow(
1556
- improvement_action="regenerate", metrics={"foo": 0}, name="name", type="automatic"
1548
+ improvement_action="regen", name="name", type="automatic"
1557
1549
  ).__aenter__()
1558
1550
 
1559
1551
  assert _get_open_connections(self.client) == 0
@@ -1567,7 +1559,7 @@ class TestAsyncDeeprails:
1567
1559
 
1568
1560
  with pytest.raises(APIStatusError):
1569
1561
  await async_client.defend.with_streaming_response.create_workflow(
1570
- improvement_action="regenerate", metrics={"foo": 0}, name="name", type="automatic"
1562
+ improvement_action="regen", name="name", type="automatic"
1571
1563
  ).__aenter__()
1572
1564
  assert _get_open_connections(self.client) == 0
1573
1565
 
@@ -1599,7 +1591,7 @@ class TestAsyncDeeprails:
1599
1591
  respx_mock.post("/defend").mock(side_effect=retry_handler)
1600
1592
 
1601
1593
  response = await client.defend.with_raw_response.create_workflow(
1602
- improvement_action="regenerate", metrics={"foo": 0}, name="name", type="automatic"
1594
+ improvement_action="regen", name="name", type="automatic"
1603
1595
  )
1604
1596
 
1605
1597
  assert response.retries_taken == failures_before_success
@@ -1626,11 +1618,7 @@ class TestAsyncDeeprails:
1626
1618
  respx_mock.post("/defend").mock(side_effect=retry_handler)
1627
1619
 
1628
1620
  response = await client.defend.with_raw_response.create_workflow(
1629
- improvement_action="regenerate",
1630
- metrics={"foo": 0},
1631
- name="name",
1632
- type="automatic",
1633
- extra_headers={"x-stainless-retry-count": Omit()},
1621
+ improvement_action="regen", name="name", type="automatic", extra_headers={"x-stainless-retry-count": Omit()}
1634
1622
  )
1635
1623
 
1636
1624
  assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
@@ -1656,11 +1644,7 @@ class TestAsyncDeeprails:
1656
1644
  respx_mock.post("/defend").mock(side_effect=retry_handler)
1657
1645
 
1658
1646
  response = await client.defend.with_raw_response.create_workflow(
1659
- improvement_action="regenerate",
1660
- metrics={"foo": 0},
1661
- name="name",
1662
- type="automatic",
1663
- extra_headers={"x-stainless-retry-count": "42"},
1647
+ improvement_action="regen", name="name", type="automatic", extra_headers={"x-stainless-retry-count": "42"}
1664
1648
  )
1665
1649
 
1666
1650
  assert response.http_request.headers.get("x-stainless-retry-count") == "42"
@@ -1,3 +0,0 @@
1
- {
2
- ".": "1.6.1"
3
- }
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes