deeprails 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,27 +15,17 @@ module Deeprails
15
15
  end
16
16
 
17
17
  # The action used to improve outputs that fail one or guardrail metrics for the
18
- # workflow events. May be `regenerate`, `fixit`, or null which represents “do
19
- # nothing”. Regenerate runs the user's input prompt with minor induced variance.
20
- # Fixit attempts to directly address the shortcomings of the output using the
21
- # guardrail failure rationale. Do nothing does not attempt any improvement.
18
+ # workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the user's
19
+ # input prompt with minor induced variance. FixIt attempts to directly address the
20
+ # shortcomings of the output using the guardrail failure rationale. Do Nothing
21
+ # does not attempt any improvement.
22
22
  sig do
23
23
  returns(
24
- T.nilable(
25
- Deeprails::DefendCreateWorkflowParams::ImprovementAction::OrSymbol
26
- )
24
+ Deeprails::DefendCreateWorkflowParams::ImprovementAction::OrSymbol
27
25
  )
28
26
  end
29
27
  attr_accessor :improvement_action
30
28
 
31
- # Mapping of guardrail metrics to floating point threshold values. If the workflow
32
- # type is automatic, only the metric names are used (`automatic_tolerance`
33
- # determines thresholds). Possible metrics are `correctness`, `completeness`,
34
- # `instruction_adherence`, `context_adherence`, `ground_truth_adherence`, or
35
- # `comprehensive_safety`.
36
- sig { returns(T::Hash[Symbol, Float]) }
37
- attr_accessor :metrics
38
-
39
29
  # Name of the workflow.
40
30
  sig { returns(String) }
41
31
  attr_accessor :name
@@ -48,24 +38,45 @@ module Deeprails
48
38
  sig { returns(Deeprails::DefendCreateWorkflowParams::Type::OrSymbol) }
49
39
  attr_accessor :type
50
40
 
51
- # Hallucination tolerance for automatic workflows; may be `low`, `medium`, or
52
- # `high`. Ignored if `type` is `custom`.
41
+ # Mapping of guardrail metrics to hallucination tolerance levels (either `low`,
42
+ # `medium`, or `high`). Possible metrics are `completeness`,
43
+ # `instruction_adherence`, `context_adherence`, `ground_truth_adherence`, or
44
+ # `comprehensive_safety`.
53
45
  sig do
54
46
  returns(
55
47
  T.nilable(
56
- Deeprails::DefendCreateWorkflowParams::AutomaticTolerance::OrSymbol
48
+ T::Hash[
49
+ Symbol,
50
+ Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::OrSymbol
51
+ ]
57
52
  )
58
53
  )
59
54
  end
60
- attr_reader :automatic_tolerance
55
+ attr_reader :automatic_hallucination_tolerance_levels
61
56
 
62
57
  sig do
63
58
  params(
64
- automatic_tolerance:
65
- Deeprails::DefendCreateWorkflowParams::AutomaticTolerance::OrSymbol
59
+ automatic_hallucination_tolerance_levels:
60
+ T::Hash[
61
+ Symbol,
62
+ Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::OrSymbol
63
+ ]
66
64
  ).void
67
65
  end
68
- attr_writer :automatic_tolerance
66
+ attr_writer :automatic_hallucination_tolerance_levels
67
+
68
+ # Mapping of guardrail metrics to floating point threshold values. Possible
69
+ # metrics are `correctness`, `completeness`, `instruction_adherence`,
70
+ # `context_adherence`, `ground_truth_adherence`, or `comprehensive_safety`.
71
+ sig { returns(T.nilable(T::Hash[Symbol, Float])) }
72
+ attr_reader :custom_hallucination_threshold_values
73
+
74
+ sig do
75
+ params(
76
+ custom_hallucination_threshold_values: T::Hash[Symbol, Float]
77
+ ).void
78
+ end
79
+ attr_writer :custom_hallucination_threshold_values
69
80
 
70
81
  # Description for the workflow.
71
82
  sig { returns(T.nilable(String)) }
@@ -77,40 +88,35 @@ module Deeprails
77
88
  # Max. number of improvement action retries until a given event passes the
78
89
  # guardrails. Defaults to 10.
79
90
  sig { returns(T.nilable(Integer)) }
80
- attr_reader :max_retries
91
+ attr_reader :max_improvement_attempt
81
92
 
82
- sig { params(max_retries: Integer).void }
83
- attr_writer :max_retries
93
+ sig { params(max_improvement_attempt: Integer).void }
94
+ attr_writer :max_improvement_attempt
84
95
 
85
96
  sig do
86
97
  params(
87
98
  improvement_action:
88
- T.nilable(
89
- Deeprails::DefendCreateWorkflowParams::ImprovementAction::OrSymbol
90
- ),
91
- metrics: T::Hash[Symbol, Float],
99
+ Deeprails::DefendCreateWorkflowParams::ImprovementAction::OrSymbol,
92
100
  name: String,
93
101
  type: Deeprails::DefendCreateWorkflowParams::Type::OrSymbol,
94
- automatic_tolerance:
95
- Deeprails::DefendCreateWorkflowParams::AutomaticTolerance::OrSymbol,
102
+ automatic_hallucination_tolerance_levels:
103
+ T::Hash[
104
+ Symbol,
105
+ Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::OrSymbol
106
+ ],
107
+ custom_hallucination_threshold_values: T::Hash[Symbol, Float],
96
108
  description: String,
97
- max_retries: Integer,
109
+ max_improvement_attempt: Integer,
98
110
  request_options: Deeprails::RequestOptions::OrHash
99
111
  ).returns(T.attached_class)
100
112
  end
101
113
  def self.new(
102
114
  # The action used to improve outputs that fail one or guardrail metrics for the
103
- # workflow events. May be `regenerate`, `fixit`, or null which represents “do
104
- # nothing”. Regenerate runs the user's input prompt with minor induced variance.
105
- # Fixit attempts to directly address the shortcomings of the output using the
106
- # guardrail failure rationale. Do nothing does not attempt any improvement.
115
+ # workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the user's
116
+ # input prompt with minor induced variance. FixIt attempts to directly address the
117
+ # shortcomings of the output using the guardrail failure rationale. Do Nothing
118
+ # does not attempt any improvement.
107
119
  improvement_action:,
108
- # Mapping of guardrail metrics to floating point threshold values. If the workflow
109
- # type is automatic, only the metric names are used (`automatic_tolerance`
110
- # determines thresholds). Possible metrics are `correctness`, `completeness`,
111
- # `instruction_adherence`, `context_adherence`, `ground_truth_adherence`, or
112
- # `comprehensive_safety`.
113
- metrics:,
114
120
  # Name of the workflow.
115
121
  name:,
116
122
  # Type of thresholds to use for the workflow, either `automatic` or `custom`.
@@ -119,14 +125,20 @@ module Deeprails
119
125
  # set the threshold for each metric as a floating point number between 0.0 and
120
126
  # 1.0.
121
127
  type:,
122
- # Hallucination tolerance for automatic workflows; may be `low`, `medium`, or
123
- # `high`. Ignored if `type` is `custom`.
124
- automatic_tolerance: nil,
128
+ # Mapping of guardrail metrics to hallucination tolerance levels (either `low`,
129
+ # `medium`, or `high`). Possible metrics are `completeness`,
130
+ # `instruction_adherence`, `context_adherence`, `ground_truth_adherence`, or
131
+ # `comprehensive_safety`.
132
+ automatic_hallucination_tolerance_levels: nil,
133
+ # Mapping of guardrail metrics to floating point threshold values. Possible
134
+ # metrics are `correctness`, `completeness`, `instruction_adherence`,
135
+ # `context_adherence`, `ground_truth_adherence`, or `comprehensive_safety`.
136
+ custom_hallucination_threshold_values: nil,
125
137
  # Description for the workflow.
126
138
  description: nil,
127
139
  # Max. number of improvement action retries until a given event passes the
128
140
  # guardrails. Defaults to 10.
129
- max_retries: nil,
141
+ max_improvement_attempt: nil,
130
142
  request_options: {}
131
143
  )
132
144
  end
@@ -135,16 +147,17 @@ module Deeprails
135
147
  override.returns(
136
148
  {
137
149
  improvement_action:
138
- T.nilable(
139
- Deeprails::DefendCreateWorkflowParams::ImprovementAction::OrSymbol
140
- ),
141
- metrics: T::Hash[Symbol, Float],
150
+ Deeprails::DefendCreateWorkflowParams::ImprovementAction::OrSymbol,
142
151
  name: String,
143
152
  type: Deeprails::DefendCreateWorkflowParams::Type::OrSymbol,
144
- automatic_tolerance:
145
- Deeprails::DefendCreateWorkflowParams::AutomaticTolerance::OrSymbol,
153
+ automatic_hallucination_tolerance_levels:
154
+ T::Hash[
155
+ Symbol,
156
+ Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::OrSymbol
157
+ ],
158
+ custom_hallucination_threshold_values: T::Hash[Symbol, Float],
146
159
  description: String,
147
- max_retries: Integer,
160
+ max_improvement_attempt: Integer,
148
161
  request_options: Deeprails::RequestOptions
149
162
  }
150
163
  )
@@ -153,10 +166,10 @@ module Deeprails
153
166
  end
154
167
 
155
168
  # The action used to improve outputs that fail one or guardrail metrics for the
156
- # workflow events. May be `regenerate`, `fixit`, or null which represents “do
157
- # nothing”. Regenerate runs the user's input prompt with minor induced variance.
158
- # Fixit attempts to directly address the shortcomings of the output using the
159
- # guardrail failure rationale. Do nothing does not attempt any improvement.
169
+ # workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the user's
170
+ # input prompt with minor induced variance. FixIt attempts to directly address the
171
+ # shortcomings of the output using the guardrail failure rationale. Do Nothing
172
+ # does not attempt any improvement.
160
173
  module ImprovementAction
161
174
  extend Deeprails::Internal::Type::Enum
162
175
 
@@ -169,9 +182,9 @@ module Deeprails
169
182
  end
170
183
  OrSymbol = T.type_alias { T.any(Symbol, String) }
171
184
 
172
- REGENERATE =
185
+ REGEN =
173
186
  T.let(
174
- :regenerate,
187
+ :regen,
175
188
  Deeprails::DefendCreateWorkflowParams::ImprovementAction::TaggedSymbol
176
189
  )
177
190
  FIXIT =
@@ -179,6 +192,11 @@ module Deeprails
179
192
  :fixit,
180
193
  Deeprails::DefendCreateWorkflowParams::ImprovementAction::TaggedSymbol
181
194
  )
195
+ DO_NOTHING =
196
+ T.let(
197
+ :do_nothing,
198
+ Deeprails::DefendCreateWorkflowParams::ImprovementAction::TaggedSymbol
199
+ )
182
200
 
183
201
  sig do
184
202
  override.returns(
@@ -225,16 +243,14 @@ module Deeprails
225
243
  end
226
244
  end
227
245
 
228
- # Hallucination tolerance for automatic workflows; may be `low`, `medium`, or
229
- # `high`. Ignored if `type` is `custom`.
230
- module AutomaticTolerance
246
+ module AutomaticHallucinationToleranceLevel
231
247
  extend Deeprails::Internal::Type::Enum
232
248
 
233
249
  TaggedSymbol =
234
250
  T.type_alias do
235
251
  T.all(
236
252
  Symbol,
237
- Deeprails::DefendCreateWorkflowParams::AutomaticTolerance
253
+ Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel
238
254
  )
239
255
  end
240
256
  OrSymbol = T.type_alias { T.any(Symbol, String) }
@@ -242,23 +258,23 @@ module Deeprails
242
258
  LOW =
243
259
  T.let(
244
260
  :low,
245
- Deeprails::DefendCreateWorkflowParams::AutomaticTolerance::TaggedSymbol
261
+ Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::TaggedSymbol
246
262
  )
247
263
  MEDIUM =
248
264
  T.let(
249
265
  :medium,
250
- Deeprails::DefendCreateWorkflowParams::AutomaticTolerance::TaggedSymbol
266
+ Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::TaggedSymbol
251
267
  )
252
268
  HIGH =
253
269
  T.let(
254
270
  :high,
255
- Deeprails::DefendCreateWorkflowParams::AutomaticTolerance::TaggedSymbol
271
+ Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::TaggedSymbol
256
272
  )
257
273
 
258
274
  sig do
259
275
  override.returns(
260
276
  T::Array[
261
- Deeprails::DefendCreateWorkflowParams::AutomaticTolerance::TaggedSymbol
277
+ Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::TaggedSymbol
262
278
  ]
263
279
  )
264
280
  end
@@ -31,24 +31,32 @@ module Deeprails
31
31
  attr_writer :description
32
32
 
33
33
  # The action used to improve outputs that fail one or more guardrail metrics for
34
- # the workflow events. May be `regenerate`, `fixit`, or null which represents “do
35
- # nothing”. Regenerate runs the user's input prompt with minor induced variance.
36
- # Fixit attempts to directly address the shortcomings of the output using the
37
- # guardrail failure rationale. Do nothing does not attempt any improvement.
34
+ # the workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the
35
+ # user's input prompt with minor induced variance. FixIt attempts to directly
36
+ # address the shortcomings of the output using the guardrail failure rationale. Do
37
+ # Nothing does not attempt any improvement.
38
38
  sig do
39
39
  returns(
40
40
  T.nilable(Deeprails::DefendResponse::ImprovementAction::TaggedSymbol)
41
41
  )
42
42
  end
43
- attr_accessor :improvement_action
43
+ attr_reader :improvement_action
44
+
45
+ sig do
46
+ params(
47
+ improvement_action:
48
+ Deeprails::DefendResponse::ImprovementAction::OrSymbol
49
+ ).void
50
+ end
51
+ attr_writer :improvement_action
44
52
 
45
53
  # Max. number of improvement action retries until a given event passes the
46
54
  # guardrails.
47
55
  sig { returns(T.nilable(Integer)) }
48
- attr_reader :max_retries
56
+ attr_reader :max_improvement_attempt
49
57
 
50
- sig { params(max_retries: Integer).void }
51
- attr_writer :max_retries
58
+ sig { params(max_improvement_attempt: Integer).void }
59
+ attr_writer :max_improvement_attempt
52
60
 
53
61
  # The most recent time the workflow was modified in UTC.
54
62
  sig { returns(T.nilable(Time)) }
@@ -57,7 +65,7 @@ module Deeprails
57
65
  sig { params(modified_at: Time).void }
58
66
  attr_writer :modified_at
59
67
 
60
- # Status of the selected workflow. May be `archived` or `active`. Archived
68
+ # Status of the selected workflow. May be `inactive` or `active`. Inactive
61
69
  # workflows will not accept events.
62
70
  sig do
63
71
  returns(T.nilable(Deeprails::DefendResponse::Status::TaggedSymbol))
@@ -81,8 +89,8 @@ module Deeprails
81
89
  created_at: Time,
82
90
  description: String,
83
91
  improvement_action:
84
- T.nilable(Deeprails::DefendResponse::ImprovementAction::OrSymbol),
85
- max_retries: Integer,
92
+ Deeprails::DefendResponse::ImprovementAction::OrSymbol,
93
+ max_improvement_attempt: Integer,
86
94
  modified_at: Time,
87
95
  status: Deeprails::DefendResponse::Status::OrSymbol,
88
96
  success_rate: Float
@@ -98,17 +106,17 @@ module Deeprails
98
106
  # Description for the workflow.
99
107
  description: nil,
100
108
  # The action used to improve outputs that fail one or more guardrail metrics for
101
- # the workflow events. May be `regenerate`, `fixit`, or null which represents “do
102
- # nothing”. Regenerate runs the user's input prompt with minor induced variance.
103
- # Fixit attempts to directly address the shortcomings of the output using the
104
- # guardrail failure rationale. Do nothing does not attempt any improvement.
109
+ # the workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the
110
+ # user's input prompt with minor induced variance. FixIt attempts to directly
111
+ # address the shortcomings of the output using the guardrail failure rationale. Do
112
+ # Nothing does not attempt any improvement.
105
113
  improvement_action: nil,
106
114
  # Max. number of improvement action retries until a given event passes the
107
115
  # guardrails.
108
- max_retries: nil,
116
+ max_improvement_attempt: nil,
109
117
  # The most recent time the workflow was modified in UTC.
110
118
  modified_at: nil,
111
- # Status of the selected workflow. May be `archived` or `active`. Archived
119
+ # Status of the selected workflow. May be `inactive` or `active`. Inactive
112
120
  # workflows will not accept events.
113
121
  status: nil,
114
122
  # Rate of events associated with this workflow that passed evaluation.
@@ -124,10 +132,8 @@ module Deeprails
124
132
  created_at: Time,
125
133
  description: String,
126
134
  improvement_action:
127
- T.nilable(
128
- Deeprails::DefendResponse::ImprovementAction::TaggedSymbol
129
- ),
130
- max_retries: Integer,
135
+ Deeprails::DefendResponse::ImprovementAction::TaggedSymbol,
136
+ max_improvement_attempt: Integer,
131
137
  modified_at: Time,
132
138
  status: Deeprails::DefendResponse::Status::TaggedSymbol,
133
139
  success_rate: Float
@@ -138,10 +144,10 @@ module Deeprails
138
144
  end
139
145
 
140
146
  # The action used to improve outputs that fail one or more guardrail metrics for
141
- # the workflow events. May be `regenerate`, `fixit`, or null which represents “do
142
- # nothing”. Regenerate runs the user's input prompt with minor induced variance.
143
- # Fixit attempts to directly address the shortcomings of the output using the
144
- # guardrail failure rationale. Do nothing does not attempt any improvement.
147
+ # the workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the
148
+ # user's input prompt with minor induced variance. FixIt attempts to directly
149
+ # address the shortcomings of the output using the guardrail failure rationale. Do
150
+ # Nothing does not attempt any improvement.
145
151
  module ImprovementAction
146
152
  extend Deeprails::Internal::Type::Enum
147
153
 
@@ -151,9 +157,9 @@ module Deeprails
151
157
  end
152
158
  OrSymbol = T.type_alias { T.any(Symbol, String) }
153
159
 
154
- REGENERATE =
160
+ REGEN =
155
161
  T.let(
156
- :regenerate,
162
+ :regen,
157
163
  Deeprails::DefendResponse::ImprovementAction::TaggedSymbol
158
164
  )
159
165
  FIXIT =
@@ -161,6 +167,11 @@ module Deeprails
161
167
  :fixit,
162
168
  Deeprails::DefendResponse::ImprovementAction::TaggedSymbol
163
169
  )
170
+ DO_NOTHING =
171
+ T.let(
172
+ :do_nothing,
173
+ Deeprails::DefendResponse::ImprovementAction::TaggedSymbol
174
+ )
164
175
 
165
176
  sig do
166
177
  override.returns(
@@ -171,7 +182,7 @@ module Deeprails
171
182
  end
172
183
  end
173
184
 
174
- # Status of the selected workflow. May be `archived` or `active`. Archived
185
+ # Status of the selected workflow. May be `inactive` or `active`. Inactive
175
186
  # workflows will not accept events.
176
187
  module Status
177
188
  extend Deeprails::Internal::Type::Enum
@@ -180,8 +191,8 @@ module Deeprails
180
191
  T.type_alias { T.all(Symbol, Deeprails::DefendResponse::Status) }
181
192
  OrSymbol = T.type_alias { T.any(Symbol, String) }
182
193
 
183
- ARCHIVED =
184
- T.let(:archived, Deeprails::DefendResponse::Status::TaggedSymbol)
194
+ INACTIVE =
195
+ T.let(:inactive, Deeprails::DefendResponse::Status::TaggedSymbol)
185
196
  ACTIVE = T.let(:active, Deeprails::DefendResponse::Status::TaggedSymbol)
186
197
 
187
198
  sig do
@@ -15,8 +15,8 @@ module Deeprails
15
15
  end
16
16
 
17
17
  # A dictionary of inputs sent to the LLM to generate output. The dictionary must
18
- # contain at least one of `user_prompt` or `system_prompt`. For
19
- # ground_truth_aherence guadrail metric, `ground_truth` should be provided.
18
+ # contain at least `user_prompt` or `system_prompt` field. For the
19
+ # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
20
20
  sig { returns(Deeprails::DefendSubmitEventParams::ModelInput) }
21
21
  attr_reader :model_input
22
22
 
@@ -61,8 +61,8 @@ module Deeprails
61
61
  end
62
62
  def self.new(
63
63
  # A dictionary of inputs sent to the LLM to generate output. The dictionary must
64
- # contain at least one of `user_prompt` or `system_prompt`. For
65
- # ground_truth_aherence guadrail metric, `ground_truth` should be provided.
64
+ # contain at least `user_prompt` or `system_prompt` field. For the
65
+ # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
66
66
  model_input:,
67
67
  # Output generated by the LLM to be evaluated.
68
68
  model_output:,
@@ -125,8 +125,8 @@ module Deeprails
125
125
  attr_writer :user_prompt
126
126
 
127
127
  # A dictionary of inputs sent to the LLM to generate output. The dictionary must
128
- # contain at least one of `user_prompt` or `system_prompt`. For
129
- # ground_truth_aherence guadrail metric, `ground_truth` should be provided.
128
+ # contain at least `user_prompt` or `system_prompt` field. For the
129
+ # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
130
130
  sig do
131
131
  params(
132
132
  ground_truth: String,
@@ -12,8 +12,8 @@ module Deeprails
12
12
  end
13
13
 
14
14
  # A dictionary of inputs sent to the LLM to generate output. The dictionary must
15
- # contain at least one of `user_prompt` or `system_prompt`. For
16
- # ground_truth_aherence guadrail metric, `ground_truth` should be provided.
15
+ # contain at least `user_prompt` or `system_prompt` field. For
16
+ # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
17
17
  sig { returns(Deeprails::EvaluateCreateParams::ModelInput) }
18
18
  attr_reader :model_input
19
19
 
@@ -85,8 +85,8 @@ module Deeprails
85
85
  end
86
86
  def self.new(
87
87
  # A dictionary of inputs sent to the LLM to generate output. The dictionary must
88
- # contain at least one of `user_prompt` or `system_prompt`. For
89
- # ground_truth_aherence guadrail metric, `ground_truth` should be provided.
88
+ # contain at least `user_prompt` or `system_prompt` field. For
89
+ # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
90
90
  model_input:,
91
91
  # Output generated by the LLM to be evaluated.
92
92
  model_output:,
@@ -157,8 +157,8 @@ module Deeprails
157
157
  attr_writer :user_prompt
158
158
 
159
159
  # A dictionary of inputs sent to the LLM to generate output. The dictionary must
160
- # contain at least one of `user_prompt` or `system_prompt`. For
161
- # ground_truth_aherence guadrail metric, `ground_truth` should be provided.
160
+ # contain at least `user_prompt` or `system_prompt` field. For
161
+ # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
162
162
  sig do
163
163
  params(
164
164
  ground_truth: String,
@@ -17,8 +17,8 @@ module Deeprails
17
17
  attr_accessor :evaluation_status
18
18
 
19
19
  # A dictionary of inputs sent to the LLM to generate output. The dictionary must
20
- # contain at least one of `user_prompt` or `system_prompt`. For
21
- # ground_truth_aherence guadrail metric, `ground_truth` should be provided.
20
+ # contain at least `user_prompt` or `system_prompt` field. For
21
+ # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
22
22
  sig { returns(Deeprails::Evaluation::ModelInput) }
23
23
  attr_reader :model_input
24
24
 
@@ -162,8 +162,8 @@ module Deeprails
162
162
  # Status of the evaluation.
163
163
  evaluation_status:,
164
164
  # A dictionary of inputs sent to the LLM to generate output. The dictionary must
165
- # contain at least one of `user_prompt` or `system_prompt`. For
166
- # ground_truth_aherence guadrail metric, `ground_truth` should be provided.
165
+ # contain at least `user_prompt` or `system_prompt` field. For
166
+ # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
167
167
  model_input:,
168
168
  # Output generated by the LLM to be evaluated.
169
169
  model_output:,
@@ -298,8 +298,8 @@ module Deeprails
298
298
  attr_writer :user_prompt
299
299
 
300
300
  # A dictionary of inputs sent to the LLM to generate output. The dictionary must
301
- # contain at least one of `user_prompt` or `system_prompt`. For
302
- # ground_truth_aherence guadrail metric, `ground_truth` should be provided.
301
+ # contain at least `user_prompt` or `system_prompt` field. For
302
+ # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
303
303
  sig do
304
304
  params(
305
305
  ground_truth: String,
@@ -28,8 +28,8 @@ module Deeprails
28
28
  attr_accessor :guardrail_metrics
29
29
 
30
30
  # A dictionary of inputs sent to the LLM to generate output. The dictionary must
31
- # contain at least one of `user_prompt` or `system_prompt`. For
32
- # ground_truth_aherence guadrail metric, `ground_truth` should be provided.
31
+ # contain at least a `user_prompt` or `system_prompt` field. For
32
+ # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
33
33
  sig { returns(Deeprails::MonitorSubmitEventParams::ModelInput) }
34
34
  attr_reader :model_input
35
35
 
@@ -97,8 +97,8 @@ module Deeprails
97
97
  # `ground_truth_adherence`, and/or `comprehensive_safety`.
98
98
  guardrail_metrics:,
99
99
  # A dictionary of inputs sent to the LLM to generate output. The dictionary must
100
- # contain at least one of `user_prompt` or `system_prompt`. For
101
- # ground_truth_aherence guadrail metric, `ground_truth` should be provided.
100
+ # contain at least a `user_prompt` or `system_prompt` field. For
101
+ # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
102
102
  model_input:,
103
103
  # Output generated by the LLM to be evaluated.
104
104
  model_output:,
@@ -216,8 +216,8 @@ module Deeprails
216
216
  attr_writer :user_prompt
217
217
 
218
218
  # A dictionary of inputs sent to the LLM to generate output. The dictionary must
219
- # contain at least one of `user_prompt` or `system_prompt`. For
220
- # ground_truth_aherence guadrail metric, `ground_truth` should be provided.
219
+ # contain at least a `user_prompt` or `system_prompt` field. For
220
+ # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
221
221
  sig do
222
222
  params(
223
223
  ground_truth: String,
@@ -8,32 +8,27 @@ module Deeprails
8
8
  sig do
9
9
  params(
10
10
  improvement_action:
11
- T.nilable(
12
- Deeprails::DefendCreateWorkflowParams::ImprovementAction::OrSymbol
13
- ),
14
- metrics: T::Hash[Symbol, Float],
11
+ Deeprails::DefendCreateWorkflowParams::ImprovementAction::OrSymbol,
15
12
  name: String,
16
13
  type: Deeprails::DefendCreateWorkflowParams::Type::OrSymbol,
17
- automatic_tolerance:
18
- Deeprails::DefendCreateWorkflowParams::AutomaticTolerance::OrSymbol,
14
+ automatic_hallucination_tolerance_levels:
15
+ T::Hash[
16
+ Symbol,
17
+ Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::OrSymbol
18
+ ],
19
+ custom_hallucination_threshold_values: T::Hash[Symbol, Float],
19
20
  description: String,
20
- max_retries: Integer,
21
+ max_improvement_attempt: Integer,
21
22
  request_options: Deeprails::RequestOptions::OrHash
22
23
  ).returns(Deeprails::DefendResponse)
23
24
  end
24
25
  def create_workflow(
25
26
  # The action used to improve outputs that fail one or guardrail metrics for the
26
- # workflow events. May be `regenerate`, `fixit`, or null which represents “do
27
- # nothing”. Regenerate runs the user's input prompt with minor induced variance.
28
- # Fixit attempts to directly address the shortcomings of the output using the
29
- # guardrail failure rationale. Do nothing does not attempt any improvement.
27
+ # workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the user's
28
+ # input prompt with minor induced variance. FixIt attempts to directly address the
29
+ # shortcomings of the output using the guardrail failure rationale. Do Nothing
30
+ # does not attempt any improvement.
30
31
  improvement_action:,
31
- # Mapping of guardrail metrics to floating point threshold values. If the workflow
32
- # type is automatic, only the metric names are used (`automatic_tolerance`
33
- # determines thresholds). Possible metrics are `correctness`, `completeness`,
34
- # `instruction_adherence`, `context_adherence`, `ground_truth_adherence`, or
35
- # `comprehensive_safety`.
36
- metrics:,
37
32
  # Name of the workflow.
38
33
  name:,
39
34
  # Type of thresholds to use for the workflow, either `automatic` or `custom`.
@@ -42,14 +37,20 @@ module Deeprails
42
37
  # set the threshold for each metric as a floating point number between 0.0 and
43
38
  # 1.0.
44
39
  type:,
45
- # Hallucination tolerance for automatic workflows; may be `low`, `medium`, or
46
- # `high`. Ignored if `type` is `custom`.
47
- automatic_tolerance: nil,
40
+ # Mapping of guardrail metrics to hallucination tolerance levels (either `low`,
41
+ # `medium`, or `high`). Possible metrics are `completeness`,
42
+ # `instruction_adherence`, `context_adherence`, `ground_truth_adherence`, or
43
+ # `comprehensive_safety`.
44
+ automatic_hallucination_tolerance_levels: nil,
45
+ # Mapping of guardrail metrics to floating point threshold values. Possible
46
+ # metrics are `correctness`, `completeness`, `instruction_adherence`,
47
+ # `context_adherence`, `ground_truth_adherence`, or `comprehensive_safety`.
48
+ custom_hallucination_threshold_values: nil,
48
49
  # Description for the workflow.
49
50
  description: nil,
50
51
  # Max. number of improvement action retries until a given event passes the
51
52
  # guardrails. Defaults to 10.
52
- max_retries: nil,
53
+ max_improvement_attempt: nil,
53
54
  request_options: {}
54
55
  )
55
56
  end
@@ -102,8 +103,8 @@ module Deeprails
102
103
  # Workflow ID associated with this event.
103
104
  workflow_id,
104
105
  # A dictionary of inputs sent to the LLM to generate output. The dictionary must
105
- # contain at least one of `user_prompt` or `system_prompt`. For
106
- # ground_truth_aherence guadrail metric, `ground_truth` should be provided.
106
+ # contain at least `user_prompt` or `system_prompt` field. For the
107
+ # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
107
108
  model_input:,
108
109
  # Output generated by the LLM to be evaluated.
109
110
  model_output:,
@@ -21,8 +21,8 @@ module Deeprails
21
21
  end
22
22
  def create(
23
23
  # A dictionary of inputs sent to the LLM to generate output. The dictionary must
24
- # contain at least one of `user_prompt` or `system_prompt`. For
25
- # ground_truth_aherence guadrail metric, `ground_truth` should be provided.
24
+ # contain at least `user_prompt` or `system_prompt` field. For
25
+ # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
26
26
  model_input:,
27
27
  # Output generated by the LLM to be evaluated.
28
28
  model_output:,