deeprails 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +10 -34
- data/lib/deeprails/models/defend_create_workflow_params.rb +37 -39
- data/lib/deeprails/models/defend_response.rb +20 -19
- data/lib/deeprails/models/defend_submit_event_params.rb +2 -2
- data/lib/deeprails/models/evaluate_create_params.rb +2 -2
- data/lib/deeprails/models/evaluation.rb +2 -2
- data/lib/deeprails/models/monitor_submit_event_params.rb +2 -2
- data/lib/deeprails/resources/defend.rb +6 -6
- data/lib/deeprails/version.rb +1 -1
- data/rbi/deeprails/models/defend_create_workflow_params.rbi +83 -67
- data/rbi/deeprails/models/defend_response.rbi +41 -30
- data/rbi/deeprails/models/defend_submit_event_params.rbi +3 -3
- data/rbi/deeprails/models/evaluate_create_params.rbi +3 -3
- data/rbi/deeprails/models/evaluation.rbi +3 -3
- data/rbi/deeprails/models/monitor_submit_event_params.rbi +3 -3
- data/rbi/deeprails/resources/defend.rbi +23 -22
- data/rbi/deeprails/resources/evaluate.rbi +1 -1
- data/rbi/deeprails/resources/monitor.rbi +1 -1
- data/sig/deeprails/models/defend_create_workflow_params.rbs +31 -26
- data/sig/deeprails/models/defend_response.rbs +18 -13
- data/sig/deeprails/resources/defend.rbs +4 -4
- metadata +2 -2
|
@@ -15,27 +15,17 @@ module Deeprails
|
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
# The action used to improve outputs that fail one or guardrail metrics for the
|
|
18
|
-
# workflow events. May be `
|
|
19
|
-
#
|
|
20
|
-
#
|
|
21
|
-
#
|
|
18
|
+
# workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the user's
|
|
19
|
+
# input prompt with minor induced variance. FixIt attempts to directly address the
|
|
20
|
+
# shortcomings of the output using the guardrail failure rationale. Do Nothing
|
|
21
|
+
# does not attempt any improvement.
|
|
22
22
|
sig do
|
|
23
23
|
returns(
|
|
24
|
-
|
|
25
|
-
Deeprails::DefendCreateWorkflowParams::ImprovementAction::OrSymbol
|
|
26
|
-
)
|
|
24
|
+
Deeprails::DefendCreateWorkflowParams::ImprovementAction::OrSymbol
|
|
27
25
|
)
|
|
28
26
|
end
|
|
29
27
|
attr_accessor :improvement_action
|
|
30
28
|
|
|
31
|
-
# Mapping of guardrail metrics to floating point threshold values. If the workflow
|
|
32
|
-
# type is automatic, only the metric names are used (`automatic_tolerance`
|
|
33
|
-
# determines thresholds). Possible metrics are `correctness`, `completeness`,
|
|
34
|
-
# `instruction_adherence`, `context_adherence`, `ground_truth_adherence`, or
|
|
35
|
-
# `comprehensive_safety`.
|
|
36
|
-
sig { returns(T::Hash[Symbol, Float]) }
|
|
37
|
-
attr_accessor :metrics
|
|
38
|
-
|
|
39
29
|
# Name of the workflow.
|
|
40
30
|
sig { returns(String) }
|
|
41
31
|
attr_accessor :name
|
|
@@ -48,24 +38,45 @@ module Deeprails
|
|
|
48
38
|
sig { returns(Deeprails::DefendCreateWorkflowParams::Type::OrSymbol) }
|
|
49
39
|
attr_accessor :type
|
|
50
40
|
|
|
51
|
-
#
|
|
52
|
-
# `
|
|
41
|
+
# Mapping of guardrail metrics to hallucination tolerance levels (either `low`,
|
|
42
|
+
# `medium`, or `high`). Possible metrics are `completeness`,
|
|
43
|
+
# `instruction_adherence`, `context_adherence`, `ground_truth_adherence`, or
|
|
44
|
+
# `comprehensive_safety`.
|
|
53
45
|
sig do
|
|
54
46
|
returns(
|
|
55
47
|
T.nilable(
|
|
56
|
-
|
|
48
|
+
T::Hash[
|
|
49
|
+
Symbol,
|
|
50
|
+
Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::OrSymbol
|
|
51
|
+
]
|
|
57
52
|
)
|
|
58
53
|
)
|
|
59
54
|
end
|
|
60
|
-
attr_reader :
|
|
55
|
+
attr_reader :automatic_hallucination_tolerance_levels
|
|
61
56
|
|
|
62
57
|
sig do
|
|
63
58
|
params(
|
|
64
|
-
|
|
65
|
-
|
|
59
|
+
automatic_hallucination_tolerance_levels:
|
|
60
|
+
T::Hash[
|
|
61
|
+
Symbol,
|
|
62
|
+
Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::OrSymbol
|
|
63
|
+
]
|
|
66
64
|
).void
|
|
67
65
|
end
|
|
68
|
-
attr_writer :
|
|
66
|
+
attr_writer :automatic_hallucination_tolerance_levels
|
|
67
|
+
|
|
68
|
+
# Mapping of guardrail metrics to floating point threshold values. Possible
|
|
69
|
+
# metrics are `correctness`, `completeness`, `instruction_adherence`,
|
|
70
|
+
# `context_adherence`, `ground_truth_adherence`, or `comprehensive_safety`.
|
|
71
|
+
sig { returns(T.nilable(T::Hash[Symbol, Float])) }
|
|
72
|
+
attr_reader :custom_hallucination_threshold_values
|
|
73
|
+
|
|
74
|
+
sig do
|
|
75
|
+
params(
|
|
76
|
+
custom_hallucination_threshold_values: T::Hash[Symbol, Float]
|
|
77
|
+
).void
|
|
78
|
+
end
|
|
79
|
+
attr_writer :custom_hallucination_threshold_values
|
|
69
80
|
|
|
70
81
|
# Description for the workflow.
|
|
71
82
|
sig { returns(T.nilable(String)) }
|
|
@@ -77,40 +88,35 @@ module Deeprails
|
|
|
77
88
|
# Max. number of improvement action retries until a given event passes the
|
|
78
89
|
# guardrails. Defaults to 10.
|
|
79
90
|
sig { returns(T.nilable(Integer)) }
|
|
80
|
-
attr_reader :
|
|
91
|
+
attr_reader :max_improvement_attempt
|
|
81
92
|
|
|
82
|
-
sig { params(
|
|
83
|
-
attr_writer :
|
|
93
|
+
sig { params(max_improvement_attempt: Integer).void }
|
|
94
|
+
attr_writer :max_improvement_attempt
|
|
84
95
|
|
|
85
96
|
sig do
|
|
86
97
|
params(
|
|
87
98
|
improvement_action:
|
|
88
|
-
|
|
89
|
-
Deeprails::DefendCreateWorkflowParams::ImprovementAction::OrSymbol
|
|
90
|
-
),
|
|
91
|
-
metrics: T::Hash[Symbol, Float],
|
|
99
|
+
Deeprails::DefendCreateWorkflowParams::ImprovementAction::OrSymbol,
|
|
92
100
|
name: String,
|
|
93
101
|
type: Deeprails::DefendCreateWorkflowParams::Type::OrSymbol,
|
|
94
|
-
|
|
95
|
-
|
|
102
|
+
automatic_hallucination_tolerance_levels:
|
|
103
|
+
T::Hash[
|
|
104
|
+
Symbol,
|
|
105
|
+
Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::OrSymbol
|
|
106
|
+
],
|
|
107
|
+
custom_hallucination_threshold_values: T::Hash[Symbol, Float],
|
|
96
108
|
description: String,
|
|
97
|
-
|
|
109
|
+
max_improvement_attempt: Integer,
|
|
98
110
|
request_options: Deeprails::RequestOptions::OrHash
|
|
99
111
|
).returns(T.attached_class)
|
|
100
112
|
end
|
|
101
113
|
def self.new(
|
|
102
114
|
# The action used to improve outputs that fail one or guardrail metrics for the
|
|
103
|
-
# workflow events. May be `
|
|
104
|
-
#
|
|
105
|
-
#
|
|
106
|
-
#
|
|
115
|
+
# workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the user's
|
|
116
|
+
# input prompt with minor induced variance. FixIt attempts to directly address the
|
|
117
|
+
# shortcomings of the output using the guardrail failure rationale. Do Nothing
|
|
118
|
+
# does not attempt any improvement.
|
|
107
119
|
improvement_action:,
|
|
108
|
-
# Mapping of guardrail metrics to floating point threshold values. If the workflow
|
|
109
|
-
# type is automatic, only the metric names are used (`automatic_tolerance`
|
|
110
|
-
# determines thresholds). Possible metrics are `correctness`, `completeness`,
|
|
111
|
-
# `instruction_adherence`, `context_adherence`, `ground_truth_adherence`, or
|
|
112
|
-
# `comprehensive_safety`.
|
|
113
|
-
metrics:,
|
|
114
120
|
# Name of the workflow.
|
|
115
121
|
name:,
|
|
116
122
|
# Type of thresholds to use for the workflow, either `automatic` or `custom`.
|
|
@@ -119,14 +125,20 @@ module Deeprails
|
|
|
119
125
|
# set the threshold for each metric as a floating point number between 0.0 and
|
|
120
126
|
# 1.0.
|
|
121
127
|
type:,
|
|
122
|
-
#
|
|
123
|
-
# `
|
|
124
|
-
|
|
128
|
+
# Mapping of guardrail metrics to hallucination tolerance levels (either `low`,
|
|
129
|
+
# `medium`, or `high`). Possible metrics are `completeness`,
|
|
130
|
+
# `instruction_adherence`, `context_adherence`, `ground_truth_adherence`, or
|
|
131
|
+
# `comprehensive_safety`.
|
|
132
|
+
automatic_hallucination_tolerance_levels: nil,
|
|
133
|
+
# Mapping of guardrail metrics to floating point threshold values. Possible
|
|
134
|
+
# metrics are `correctness`, `completeness`, `instruction_adherence`,
|
|
135
|
+
# `context_adherence`, `ground_truth_adherence`, or `comprehensive_safety`.
|
|
136
|
+
custom_hallucination_threshold_values: nil,
|
|
125
137
|
# Description for the workflow.
|
|
126
138
|
description: nil,
|
|
127
139
|
# Max. number of improvement action retries until a given event passes the
|
|
128
140
|
# guardrails. Defaults to 10.
|
|
129
|
-
|
|
141
|
+
max_improvement_attempt: nil,
|
|
130
142
|
request_options: {}
|
|
131
143
|
)
|
|
132
144
|
end
|
|
@@ -135,16 +147,17 @@ module Deeprails
|
|
|
135
147
|
override.returns(
|
|
136
148
|
{
|
|
137
149
|
improvement_action:
|
|
138
|
-
|
|
139
|
-
Deeprails::DefendCreateWorkflowParams::ImprovementAction::OrSymbol
|
|
140
|
-
),
|
|
141
|
-
metrics: T::Hash[Symbol, Float],
|
|
150
|
+
Deeprails::DefendCreateWorkflowParams::ImprovementAction::OrSymbol,
|
|
142
151
|
name: String,
|
|
143
152
|
type: Deeprails::DefendCreateWorkflowParams::Type::OrSymbol,
|
|
144
|
-
|
|
145
|
-
|
|
153
|
+
automatic_hallucination_tolerance_levels:
|
|
154
|
+
T::Hash[
|
|
155
|
+
Symbol,
|
|
156
|
+
Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::OrSymbol
|
|
157
|
+
],
|
|
158
|
+
custom_hallucination_threshold_values: T::Hash[Symbol, Float],
|
|
146
159
|
description: String,
|
|
147
|
-
|
|
160
|
+
max_improvement_attempt: Integer,
|
|
148
161
|
request_options: Deeprails::RequestOptions
|
|
149
162
|
}
|
|
150
163
|
)
|
|
@@ -153,10 +166,10 @@ module Deeprails
|
|
|
153
166
|
end
|
|
154
167
|
|
|
155
168
|
# The action used to improve outputs that fail one or guardrail metrics for the
|
|
156
|
-
# workflow events. May be `
|
|
157
|
-
#
|
|
158
|
-
#
|
|
159
|
-
#
|
|
169
|
+
# workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the user's
|
|
170
|
+
# input prompt with minor induced variance. FixIt attempts to directly address the
|
|
171
|
+
# shortcomings of the output using the guardrail failure rationale. Do Nothing
|
|
172
|
+
# does not attempt any improvement.
|
|
160
173
|
module ImprovementAction
|
|
161
174
|
extend Deeprails::Internal::Type::Enum
|
|
162
175
|
|
|
@@ -169,9 +182,9 @@ module Deeprails
|
|
|
169
182
|
end
|
|
170
183
|
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
171
184
|
|
|
172
|
-
|
|
185
|
+
REGEN =
|
|
173
186
|
T.let(
|
|
174
|
-
:
|
|
187
|
+
:regen,
|
|
175
188
|
Deeprails::DefendCreateWorkflowParams::ImprovementAction::TaggedSymbol
|
|
176
189
|
)
|
|
177
190
|
FIXIT =
|
|
@@ -179,6 +192,11 @@ module Deeprails
|
|
|
179
192
|
:fixit,
|
|
180
193
|
Deeprails::DefendCreateWorkflowParams::ImprovementAction::TaggedSymbol
|
|
181
194
|
)
|
|
195
|
+
DO_NOTHING =
|
|
196
|
+
T.let(
|
|
197
|
+
:do_nothing,
|
|
198
|
+
Deeprails::DefendCreateWorkflowParams::ImprovementAction::TaggedSymbol
|
|
199
|
+
)
|
|
182
200
|
|
|
183
201
|
sig do
|
|
184
202
|
override.returns(
|
|
@@ -225,16 +243,14 @@ module Deeprails
|
|
|
225
243
|
end
|
|
226
244
|
end
|
|
227
245
|
|
|
228
|
-
|
|
229
|
-
# `high`. Ignored if `type` is `custom`.
|
|
230
|
-
module AutomaticTolerance
|
|
246
|
+
module AutomaticHallucinationToleranceLevel
|
|
231
247
|
extend Deeprails::Internal::Type::Enum
|
|
232
248
|
|
|
233
249
|
TaggedSymbol =
|
|
234
250
|
T.type_alias do
|
|
235
251
|
T.all(
|
|
236
252
|
Symbol,
|
|
237
|
-
Deeprails::DefendCreateWorkflowParams::
|
|
253
|
+
Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel
|
|
238
254
|
)
|
|
239
255
|
end
|
|
240
256
|
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
@@ -242,23 +258,23 @@ module Deeprails
|
|
|
242
258
|
LOW =
|
|
243
259
|
T.let(
|
|
244
260
|
:low,
|
|
245
|
-
Deeprails::DefendCreateWorkflowParams::
|
|
261
|
+
Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::TaggedSymbol
|
|
246
262
|
)
|
|
247
263
|
MEDIUM =
|
|
248
264
|
T.let(
|
|
249
265
|
:medium,
|
|
250
|
-
Deeprails::DefendCreateWorkflowParams::
|
|
266
|
+
Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::TaggedSymbol
|
|
251
267
|
)
|
|
252
268
|
HIGH =
|
|
253
269
|
T.let(
|
|
254
270
|
:high,
|
|
255
|
-
Deeprails::DefendCreateWorkflowParams::
|
|
271
|
+
Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::TaggedSymbol
|
|
256
272
|
)
|
|
257
273
|
|
|
258
274
|
sig do
|
|
259
275
|
override.returns(
|
|
260
276
|
T::Array[
|
|
261
|
-
Deeprails::DefendCreateWorkflowParams::
|
|
277
|
+
Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::TaggedSymbol
|
|
262
278
|
]
|
|
263
279
|
)
|
|
264
280
|
end
|
|
@@ -31,24 +31,32 @@ module Deeprails
|
|
|
31
31
|
attr_writer :description
|
|
32
32
|
|
|
33
33
|
# The action used to improve outputs that fail one or more guardrail metrics for
|
|
34
|
-
# the workflow events. May be `
|
|
35
|
-
#
|
|
36
|
-
#
|
|
37
|
-
#
|
|
34
|
+
# the workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the
|
|
35
|
+
# user's input prompt with minor induced variance. FixIt attempts to directly
|
|
36
|
+
# address the shortcomings of the output using the guardrail failure rationale. Do
|
|
37
|
+
# Nothing does not attempt any improvement.
|
|
38
38
|
sig do
|
|
39
39
|
returns(
|
|
40
40
|
T.nilable(Deeprails::DefendResponse::ImprovementAction::TaggedSymbol)
|
|
41
41
|
)
|
|
42
42
|
end
|
|
43
|
-
|
|
43
|
+
attr_reader :improvement_action
|
|
44
|
+
|
|
45
|
+
sig do
|
|
46
|
+
params(
|
|
47
|
+
improvement_action:
|
|
48
|
+
Deeprails::DefendResponse::ImprovementAction::OrSymbol
|
|
49
|
+
).void
|
|
50
|
+
end
|
|
51
|
+
attr_writer :improvement_action
|
|
44
52
|
|
|
45
53
|
# Max. number of improvement action retries until a given event passes the
|
|
46
54
|
# guardrails.
|
|
47
55
|
sig { returns(T.nilable(Integer)) }
|
|
48
|
-
attr_reader :
|
|
56
|
+
attr_reader :max_improvement_attempt
|
|
49
57
|
|
|
50
|
-
sig { params(
|
|
51
|
-
attr_writer :
|
|
58
|
+
sig { params(max_improvement_attempt: Integer).void }
|
|
59
|
+
attr_writer :max_improvement_attempt
|
|
52
60
|
|
|
53
61
|
# The most recent time the workflow was modified in UTC.
|
|
54
62
|
sig { returns(T.nilable(Time)) }
|
|
@@ -57,7 +65,7 @@ module Deeprails
|
|
|
57
65
|
sig { params(modified_at: Time).void }
|
|
58
66
|
attr_writer :modified_at
|
|
59
67
|
|
|
60
|
-
# Status of the selected workflow. May be `
|
|
68
|
+
# Status of the selected workflow. May be `inactive` or `active`. Inactive
|
|
61
69
|
# workflows will not accept events.
|
|
62
70
|
sig do
|
|
63
71
|
returns(T.nilable(Deeprails::DefendResponse::Status::TaggedSymbol))
|
|
@@ -81,8 +89,8 @@ module Deeprails
|
|
|
81
89
|
created_at: Time,
|
|
82
90
|
description: String,
|
|
83
91
|
improvement_action:
|
|
84
|
-
|
|
85
|
-
|
|
92
|
+
Deeprails::DefendResponse::ImprovementAction::OrSymbol,
|
|
93
|
+
max_improvement_attempt: Integer,
|
|
86
94
|
modified_at: Time,
|
|
87
95
|
status: Deeprails::DefendResponse::Status::OrSymbol,
|
|
88
96
|
success_rate: Float
|
|
@@ -98,17 +106,17 @@ module Deeprails
|
|
|
98
106
|
# Description for the workflow.
|
|
99
107
|
description: nil,
|
|
100
108
|
# The action used to improve outputs that fail one or more guardrail metrics for
|
|
101
|
-
# the workflow events. May be `
|
|
102
|
-
#
|
|
103
|
-
#
|
|
104
|
-
#
|
|
109
|
+
# the workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the
|
|
110
|
+
# user's input prompt with minor induced variance. FixIt attempts to directly
|
|
111
|
+
# address the shortcomings of the output using the guardrail failure rationale. Do
|
|
112
|
+
# Nothing does not attempt any improvement.
|
|
105
113
|
improvement_action: nil,
|
|
106
114
|
# Max. number of improvement action retries until a given event passes the
|
|
107
115
|
# guardrails.
|
|
108
|
-
|
|
116
|
+
max_improvement_attempt: nil,
|
|
109
117
|
# The most recent time the workflow was modified in UTC.
|
|
110
118
|
modified_at: nil,
|
|
111
|
-
# Status of the selected workflow. May be `
|
|
119
|
+
# Status of the selected workflow. May be `inactive` or `active`. Inactive
|
|
112
120
|
# workflows will not accept events.
|
|
113
121
|
status: nil,
|
|
114
122
|
# Rate of events associated with this workflow that passed evaluation.
|
|
@@ -124,10 +132,8 @@ module Deeprails
|
|
|
124
132
|
created_at: Time,
|
|
125
133
|
description: String,
|
|
126
134
|
improvement_action:
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
),
|
|
130
|
-
max_retries: Integer,
|
|
135
|
+
Deeprails::DefendResponse::ImprovementAction::TaggedSymbol,
|
|
136
|
+
max_improvement_attempt: Integer,
|
|
131
137
|
modified_at: Time,
|
|
132
138
|
status: Deeprails::DefendResponse::Status::TaggedSymbol,
|
|
133
139
|
success_rate: Float
|
|
@@ -138,10 +144,10 @@ module Deeprails
|
|
|
138
144
|
end
|
|
139
145
|
|
|
140
146
|
# The action used to improve outputs that fail one or more guardrail metrics for
|
|
141
|
-
# the workflow events. May be `
|
|
142
|
-
#
|
|
143
|
-
#
|
|
144
|
-
#
|
|
147
|
+
# the workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the
|
|
148
|
+
# user's input prompt with minor induced variance. FixIt attempts to directly
|
|
149
|
+
# address the shortcomings of the output using the guardrail failure rationale. Do
|
|
150
|
+
# Nothing does not attempt any improvement.
|
|
145
151
|
module ImprovementAction
|
|
146
152
|
extend Deeprails::Internal::Type::Enum
|
|
147
153
|
|
|
@@ -151,9 +157,9 @@ module Deeprails
|
|
|
151
157
|
end
|
|
152
158
|
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
153
159
|
|
|
154
|
-
|
|
160
|
+
REGEN =
|
|
155
161
|
T.let(
|
|
156
|
-
:
|
|
162
|
+
:regen,
|
|
157
163
|
Deeprails::DefendResponse::ImprovementAction::TaggedSymbol
|
|
158
164
|
)
|
|
159
165
|
FIXIT =
|
|
@@ -161,6 +167,11 @@ module Deeprails
|
|
|
161
167
|
:fixit,
|
|
162
168
|
Deeprails::DefendResponse::ImprovementAction::TaggedSymbol
|
|
163
169
|
)
|
|
170
|
+
DO_NOTHING =
|
|
171
|
+
T.let(
|
|
172
|
+
:do_nothing,
|
|
173
|
+
Deeprails::DefendResponse::ImprovementAction::TaggedSymbol
|
|
174
|
+
)
|
|
164
175
|
|
|
165
176
|
sig do
|
|
166
177
|
override.returns(
|
|
@@ -171,7 +182,7 @@ module Deeprails
|
|
|
171
182
|
end
|
|
172
183
|
end
|
|
173
184
|
|
|
174
|
-
# Status of the selected workflow. May be `
|
|
185
|
+
# Status of the selected workflow. May be `inactive` or `active`. Inactive
|
|
175
186
|
# workflows will not accept events.
|
|
176
187
|
module Status
|
|
177
188
|
extend Deeprails::Internal::Type::Enum
|
|
@@ -180,8 +191,8 @@ module Deeprails
|
|
|
180
191
|
T.type_alias { T.all(Symbol, Deeprails::DefendResponse::Status) }
|
|
181
192
|
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
182
193
|
|
|
183
|
-
|
|
184
|
-
T.let(:
|
|
194
|
+
INACTIVE =
|
|
195
|
+
T.let(:inactive, Deeprails::DefendResponse::Status::TaggedSymbol)
|
|
185
196
|
ACTIVE = T.let(:active, Deeprails::DefendResponse::Status::TaggedSymbol)
|
|
186
197
|
|
|
187
198
|
sig do
|
|
@@ -15,7 +15,7 @@ module Deeprails
|
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
18
|
-
# contain at least
|
|
18
|
+
# contain at least `user_prompt` or `system_prompt` field. For
|
|
19
19
|
# ground_truth_aherence guadrail metric, `ground_truth` should be provided.
|
|
20
20
|
sig { returns(Deeprails::DefendSubmitEventParams::ModelInput) }
|
|
21
21
|
attr_reader :model_input
|
|
@@ -61,7 +61,7 @@ module Deeprails
|
|
|
61
61
|
end
|
|
62
62
|
def self.new(
|
|
63
63
|
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
64
|
-
# contain at least
|
|
64
|
+
# contain at least `user_prompt` or `system_prompt` field. For
|
|
65
65
|
# ground_truth_aherence guadrail metric, `ground_truth` should be provided.
|
|
66
66
|
model_input:,
|
|
67
67
|
# Output generated by the LLM to be evaluated.
|
|
@@ -125,7 +125,7 @@ module Deeprails
|
|
|
125
125
|
attr_writer :user_prompt
|
|
126
126
|
|
|
127
127
|
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
128
|
-
# contain at least
|
|
128
|
+
# contain at least `user_prompt` or `system_prompt` field. For
|
|
129
129
|
# ground_truth_aherence guadrail metric, `ground_truth` should be provided.
|
|
130
130
|
sig do
|
|
131
131
|
params(
|
|
@@ -12,7 +12,7 @@ module Deeprails
|
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
15
|
-
# contain at least
|
|
15
|
+
# contain at least `user_prompt` or `system_prompt` field. For
|
|
16
16
|
# ground_truth_aherence guadrail metric, `ground_truth` should be provided.
|
|
17
17
|
sig { returns(Deeprails::EvaluateCreateParams::ModelInput) }
|
|
18
18
|
attr_reader :model_input
|
|
@@ -85,7 +85,7 @@ module Deeprails
|
|
|
85
85
|
end
|
|
86
86
|
def self.new(
|
|
87
87
|
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
88
|
-
# contain at least
|
|
88
|
+
# contain at least `user_prompt` or `system_prompt` field. For
|
|
89
89
|
# ground_truth_aherence guadrail metric, `ground_truth` should be provided.
|
|
90
90
|
model_input:,
|
|
91
91
|
# Output generated by the LLM to be evaluated.
|
|
@@ -157,7 +157,7 @@ module Deeprails
|
|
|
157
157
|
attr_writer :user_prompt
|
|
158
158
|
|
|
159
159
|
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
160
|
-
# contain at least
|
|
160
|
+
# contain at least `user_prompt` or `system_prompt` field. For
|
|
161
161
|
# ground_truth_aherence guadrail metric, `ground_truth` should be provided.
|
|
162
162
|
sig do
|
|
163
163
|
params(
|
|
@@ -17,7 +17,7 @@ module Deeprails
|
|
|
17
17
|
attr_accessor :evaluation_status
|
|
18
18
|
|
|
19
19
|
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
20
|
-
# contain at least
|
|
20
|
+
# contain at least `user_prompt` or `system_prompt` field. For
|
|
21
21
|
# ground_truth_aherence guadrail metric, `ground_truth` should be provided.
|
|
22
22
|
sig { returns(Deeprails::Evaluation::ModelInput) }
|
|
23
23
|
attr_reader :model_input
|
|
@@ -162,7 +162,7 @@ module Deeprails
|
|
|
162
162
|
# Status of the evaluation.
|
|
163
163
|
evaluation_status:,
|
|
164
164
|
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
165
|
-
# contain at least
|
|
165
|
+
# contain at least `user_prompt` or `system_prompt` field. For
|
|
166
166
|
# ground_truth_aherence guadrail metric, `ground_truth` should be provided.
|
|
167
167
|
model_input:,
|
|
168
168
|
# Output generated by the LLM to be evaluated.
|
|
@@ -298,7 +298,7 @@ module Deeprails
|
|
|
298
298
|
attr_writer :user_prompt
|
|
299
299
|
|
|
300
300
|
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
301
|
-
# contain at least
|
|
301
|
+
# contain at least `user_prompt` or `system_prompt` field. For
|
|
302
302
|
# ground_truth_aherence guadrail metric, `ground_truth` should be provided.
|
|
303
303
|
sig do
|
|
304
304
|
params(
|
|
@@ -28,7 +28,7 @@ module Deeprails
|
|
|
28
28
|
attr_accessor :guardrail_metrics
|
|
29
29
|
|
|
30
30
|
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
31
|
-
# contain at least
|
|
31
|
+
# contain at least a `user_prompt` or `system_prompt` field. For
|
|
32
32
|
# ground_truth_aherence guadrail metric, `ground_truth` should be provided.
|
|
33
33
|
sig { returns(Deeprails::MonitorSubmitEventParams::ModelInput) }
|
|
34
34
|
attr_reader :model_input
|
|
@@ -97,7 +97,7 @@ module Deeprails
|
|
|
97
97
|
# `ground_truth_adherence`, and/or `comprehensive_safety`.
|
|
98
98
|
guardrail_metrics:,
|
|
99
99
|
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
100
|
-
# contain at least
|
|
100
|
+
# contain at least a `user_prompt` or `system_prompt` field. For
|
|
101
101
|
# ground_truth_aherence guadrail metric, `ground_truth` should be provided.
|
|
102
102
|
model_input:,
|
|
103
103
|
# Output generated by the LLM to be evaluated.
|
|
@@ -216,7 +216,7 @@ module Deeprails
|
|
|
216
216
|
attr_writer :user_prompt
|
|
217
217
|
|
|
218
218
|
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
219
|
-
# contain at least
|
|
219
|
+
# contain at least a `user_prompt` or `system_prompt` field. For
|
|
220
220
|
# ground_truth_aherence guadrail metric, `ground_truth` should be provided.
|
|
221
221
|
sig do
|
|
222
222
|
params(
|
|
@@ -8,32 +8,27 @@ module Deeprails
|
|
|
8
8
|
sig do
|
|
9
9
|
params(
|
|
10
10
|
improvement_action:
|
|
11
|
-
|
|
12
|
-
Deeprails::DefendCreateWorkflowParams::ImprovementAction::OrSymbol
|
|
13
|
-
),
|
|
14
|
-
metrics: T::Hash[Symbol, Float],
|
|
11
|
+
Deeprails::DefendCreateWorkflowParams::ImprovementAction::OrSymbol,
|
|
15
12
|
name: String,
|
|
16
13
|
type: Deeprails::DefendCreateWorkflowParams::Type::OrSymbol,
|
|
17
|
-
|
|
18
|
-
|
|
14
|
+
automatic_hallucination_tolerance_levels:
|
|
15
|
+
T::Hash[
|
|
16
|
+
Symbol,
|
|
17
|
+
Deeprails::DefendCreateWorkflowParams::AutomaticHallucinationToleranceLevel::OrSymbol
|
|
18
|
+
],
|
|
19
|
+
custom_hallucination_threshold_values: T::Hash[Symbol, Float],
|
|
19
20
|
description: String,
|
|
20
|
-
|
|
21
|
+
max_improvement_attempt: Integer,
|
|
21
22
|
request_options: Deeprails::RequestOptions::OrHash
|
|
22
23
|
).returns(Deeprails::DefendResponse)
|
|
23
24
|
end
|
|
24
25
|
def create_workflow(
|
|
25
26
|
# The action used to improve outputs that fail one or guardrail metrics for the
|
|
26
|
-
# workflow events. May be `
|
|
27
|
-
#
|
|
28
|
-
#
|
|
29
|
-
#
|
|
27
|
+
# workflow events. May be `regen`, `fixit`, or `do_nothing`. ReGen runs the user's
|
|
28
|
+
# input prompt with minor induced variance. FixIt attempts to directly address the
|
|
29
|
+
# shortcomings of the output using the guardrail failure rationale. Do Nothing
|
|
30
|
+
# does not attempt any improvement.
|
|
30
31
|
improvement_action:,
|
|
31
|
-
# Mapping of guardrail metrics to floating point threshold values. If the workflow
|
|
32
|
-
# type is automatic, only the metric names are used (`automatic_tolerance`
|
|
33
|
-
# determines thresholds). Possible metrics are `correctness`, `completeness`,
|
|
34
|
-
# `instruction_adherence`, `context_adherence`, `ground_truth_adherence`, or
|
|
35
|
-
# `comprehensive_safety`.
|
|
36
|
-
metrics:,
|
|
37
32
|
# Name of the workflow.
|
|
38
33
|
name:,
|
|
39
34
|
# Type of thresholds to use for the workflow, either `automatic` or `custom`.
|
|
@@ -42,14 +37,20 @@ module Deeprails
|
|
|
42
37
|
# set the threshold for each metric as a floating point number between 0.0 and
|
|
43
38
|
# 1.0.
|
|
44
39
|
type:,
|
|
45
|
-
#
|
|
46
|
-
# `
|
|
47
|
-
|
|
40
|
+
# Mapping of guardrail metrics to hallucination tolerance levels (either `low`,
|
|
41
|
+
# `medium`, or `high`). Possible metrics are `completeness`,
|
|
42
|
+
# `instruction_adherence`, `context_adherence`, `ground_truth_adherence`, or
|
|
43
|
+
# `comprehensive_safety`.
|
|
44
|
+
automatic_hallucination_tolerance_levels: nil,
|
|
45
|
+
# Mapping of guardrail metrics to floating point threshold values. Possible
|
|
46
|
+
# metrics are `correctness`, `completeness`, `instruction_adherence`,
|
|
47
|
+
# `context_adherence`, `ground_truth_adherence`, or `comprehensive_safety`.
|
|
48
|
+
custom_hallucination_threshold_values: nil,
|
|
48
49
|
# Description for the workflow.
|
|
49
50
|
description: nil,
|
|
50
51
|
# Max. number of improvement action retries until a given event passes the
|
|
51
52
|
# guardrails. Defaults to 10.
|
|
52
|
-
|
|
53
|
+
max_improvement_attempt: nil,
|
|
53
54
|
request_options: {}
|
|
54
55
|
)
|
|
55
56
|
end
|
|
@@ -102,7 +103,7 @@ module Deeprails
|
|
|
102
103
|
# Workflow ID associated with this event.
|
|
103
104
|
workflow_id,
|
|
104
105
|
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
105
|
-
# contain at least
|
|
106
|
+
# contain at least `user_prompt` or `system_prompt` field. For
|
|
106
107
|
# ground_truth_aherence guadrail metric, `ground_truth` should be provided.
|
|
107
108
|
model_input:,
|
|
108
109
|
# Output generated by the LLM to be evaluated.
|
|
@@ -21,7 +21,7 @@ module Deeprails
|
|
|
21
21
|
end
|
|
22
22
|
def create(
|
|
23
23
|
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
24
|
-
# contain at least
|
|
24
|
+
# contain at least `user_prompt` or `system_prompt` field. For
|
|
25
25
|
# ground_truth_aherence guadrail metric, `ground_truth` should be provided.
|
|
26
26
|
model_input:,
|
|
27
27
|
# Output generated by the LLM to be evaluated.
|
|
@@ -91,7 +91,7 @@ module Deeprails
|
|
|
91
91
|
# `ground_truth_adherence`, and/or `comprehensive_safety`.
|
|
92
92
|
guardrail_metrics:,
|
|
93
93
|
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
94
|
-
# contain at least
|
|
94
|
+
# contain at least a `user_prompt` or `system_prompt` field. For
|
|
95
95
|
# ground_truth_aherence guadrail metric, `ground_truth` should be provided.
|
|
96
96
|
model_input:,
|
|
97
97
|
# Output generated by the LLM to be evaluated.
|