deeprails 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +1 -1
- data/lib/deeprails/client.rb +0 -4
- data/lib/deeprails/models/monitor_detail_response.rb +231 -3
- data/lib/deeprails/models.rb +0 -6
- data/lib/deeprails/version.rb +1 -1
- data/lib/deeprails.rb +0 -4
- data/rbi/deeprails/client.rbi +0 -3
- data/rbi/deeprails/models/monitor_detail_response.rbi +483 -4
- data/rbi/deeprails/models.rbi +0 -6
- data/sig/deeprails/client.rbs +0 -2
- data/sig/deeprails/models/monitor_detail_response.rbs +207 -6
- data/sig/deeprails/models.rbs +0 -6
- metadata +2 -14
- data/lib/deeprails/models/evaluate_create_params.rb +0 -134
- data/lib/deeprails/models/evaluate_retrieve_params.rb +0 -14
- data/lib/deeprails/models/evaluation.rb +0 -233
- data/lib/deeprails/resources/evaluate.rb +0 -70
- data/rbi/deeprails/models/evaluate_create_params.rbi +0 -280
- data/rbi/deeprails/models/evaluate_retrieve_params.rbi +0 -27
- data/rbi/deeprails/models/evaluation.rbi +0 -402
- data/rbi/deeprails/resources/evaluate.rbi +0 -66
- data/sig/deeprails/models/evaluate_create_params.rbs +0 -122
- data/sig/deeprails/models/evaluate_retrieve_params.rbs +0 -15
- data/sig/deeprails/models/evaluation.rbs +0 -204
- data/sig/deeprails/resources/evaluate.rbs +0 -22
|
@@ -1,402 +0,0 @@
|
|
|
1
|
-
# typed: strong
|
|
2
|
-
|
|
3
|
-
module Deeprails
|
|
4
|
-
module Models
|
|
5
|
-
class Evaluation < Deeprails::Internal::Type::BaseModel
|
|
6
|
-
OrHash =
|
|
7
|
-
T.type_alias do
|
|
8
|
-
T.any(Deeprails::Evaluation, Deeprails::Internal::AnyHash)
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
# A unique evaluation ID.
|
|
12
|
-
sig { returns(String) }
|
|
13
|
-
attr_accessor :eval_id
|
|
14
|
-
|
|
15
|
-
# Status of the evaluation.
|
|
16
|
-
sig { returns(Deeprails::Evaluation::EvaluationStatus::TaggedSymbol) }
|
|
17
|
-
attr_accessor :evaluation_status
|
|
18
|
-
|
|
19
|
-
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
20
|
-
# contain at least a `user_prompt` field or a `system_prompt` field. For
|
|
21
|
-
# ground_truth_adherence guardrail metric, `ground_truth` should be provided.
|
|
22
|
-
sig { returns(Deeprails::Evaluation::ModelInput) }
|
|
23
|
-
attr_reader :model_input
|
|
24
|
-
|
|
25
|
-
sig do
|
|
26
|
-
params(model_input: Deeprails::Evaluation::ModelInput::OrHash).void
|
|
27
|
-
end
|
|
28
|
-
attr_writer :model_input
|
|
29
|
-
|
|
30
|
-
# Output generated by the LLM to be evaluated.
|
|
31
|
-
sig { returns(String) }
|
|
32
|
-
attr_accessor :model_output
|
|
33
|
-
|
|
34
|
-
# Run mode for the evaluation. The run mode allows the user to optimize for speed,
|
|
35
|
-
# accuracy, and cost by determining which models are used to evaluate the event.
|
|
36
|
-
sig { returns(Deeprails::Evaluation::RunMode::TaggedSymbol) }
|
|
37
|
-
attr_accessor :run_mode
|
|
38
|
-
|
|
39
|
-
# The time the evaluation was created in UTC.
|
|
40
|
-
sig { returns(T.nilable(Time)) }
|
|
41
|
-
attr_reader :created_at
|
|
42
|
-
|
|
43
|
-
sig { params(created_at: Time).void }
|
|
44
|
-
attr_writer :created_at
|
|
45
|
-
|
|
46
|
-
# The time the evaluation completed in UTC.
|
|
47
|
-
sig { returns(T.nilable(Time)) }
|
|
48
|
-
attr_reader :end_timestamp
|
|
49
|
-
|
|
50
|
-
sig { params(end_timestamp: Time).void }
|
|
51
|
-
attr_writer :end_timestamp
|
|
52
|
-
|
|
53
|
-
# Description of the error causing the evaluation to fail, if any.
|
|
54
|
-
sig { returns(T.nilable(String)) }
|
|
55
|
-
attr_reader :error_message
|
|
56
|
-
|
|
57
|
-
sig { params(error_message: String).void }
|
|
58
|
-
attr_writer :error_message
|
|
59
|
-
|
|
60
|
-
# The time the error causing the evaluation to fail was recorded.
|
|
61
|
-
sig { returns(T.nilable(Time)) }
|
|
62
|
-
attr_reader :error_timestamp
|
|
63
|
-
|
|
64
|
-
sig { params(error_timestamp: Time).void }
|
|
65
|
-
attr_writer :error_timestamp
|
|
66
|
-
|
|
67
|
-
# Evaluation result consisting of average scores and rationales for each of the
|
|
68
|
-
# evaluated guardrail metrics.
|
|
69
|
-
sig { returns(T.nilable(T::Hash[Symbol, T.anything])) }
|
|
70
|
-
attr_reader :evaluation_result
|
|
71
|
-
|
|
72
|
-
sig { params(evaluation_result: T::Hash[Symbol, T.anything]).void }
|
|
73
|
-
attr_writer :evaluation_result
|
|
74
|
-
|
|
75
|
-
# Total cost of the evaluation.
|
|
76
|
-
sig { returns(T.nilable(Float)) }
|
|
77
|
-
attr_reader :evaluation_total_cost
|
|
78
|
-
|
|
79
|
-
sig { params(evaluation_total_cost: Float).void }
|
|
80
|
-
attr_writer :evaluation_total_cost
|
|
81
|
-
|
|
82
|
-
# An array of guardrail metrics that the model input and output pair will be
|
|
83
|
-
# evaluated on.
|
|
84
|
-
sig do
|
|
85
|
-
returns(
|
|
86
|
-
T.nilable(
|
|
87
|
-
T::Array[Deeprails::Evaluation::GuardrailMetric::TaggedSymbol]
|
|
88
|
-
)
|
|
89
|
-
)
|
|
90
|
-
end
|
|
91
|
-
attr_reader :guardrail_metrics
|
|
92
|
-
|
|
93
|
-
sig do
|
|
94
|
-
params(
|
|
95
|
-
guardrail_metrics:
|
|
96
|
-
T::Array[Deeprails::Evaluation::GuardrailMetric::OrSymbol]
|
|
97
|
-
).void
|
|
98
|
-
end
|
|
99
|
-
attr_writer :guardrail_metrics
|
|
100
|
-
|
|
101
|
-
# Model ID used to generate the output, like `gpt-4o` or `o3`.
|
|
102
|
-
sig { returns(T.nilable(String)) }
|
|
103
|
-
attr_reader :model_used
|
|
104
|
-
|
|
105
|
-
sig { params(model_used: String).void }
|
|
106
|
-
attr_writer :model_used
|
|
107
|
-
|
|
108
|
-
# The most recent time the evaluation was modified in UTC.
|
|
109
|
-
sig { returns(T.nilable(Time)) }
|
|
110
|
-
attr_reader :modified_at
|
|
111
|
-
|
|
112
|
-
sig { params(modified_at: Time).void }
|
|
113
|
-
attr_writer :modified_at
|
|
114
|
-
|
|
115
|
-
# An optional, user-defined tag for the evaluation.
|
|
116
|
-
sig { returns(T.nilable(String)) }
|
|
117
|
-
attr_reader :nametag
|
|
118
|
-
|
|
119
|
-
sig { params(nametag: String).void }
|
|
120
|
-
attr_writer :nametag
|
|
121
|
-
|
|
122
|
-
# Evaluation progress. Values range between 0 and 100; 100 corresponds to a
|
|
123
|
-
# completed `evaluation_status`.
|
|
124
|
-
sig { returns(T.nilable(Integer)) }
|
|
125
|
-
attr_reader :progress
|
|
126
|
-
|
|
127
|
-
sig { params(progress: Integer).void }
|
|
128
|
-
attr_writer :progress
|
|
129
|
-
|
|
130
|
-
# The time the evaluation started in UTC.
|
|
131
|
-
sig { returns(T.nilable(Time)) }
|
|
132
|
-
attr_reader :start_timestamp
|
|
133
|
-
|
|
134
|
-
sig { params(start_timestamp: Time).void }
|
|
135
|
-
attr_writer :start_timestamp
|
|
136
|
-
|
|
137
|
-
sig do
|
|
138
|
-
params(
|
|
139
|
-
eval_id: String,
|
|
140
|
-
evaluation_status: Deeprails::Evaluation::EvaluationStatus::OrSymbol,
|
|
141
|
-
model_input: Deeprails::Evaluation::ModelInput::OrHash,
|
|
142
|
-
model_output: String,
|
|
143
|
-
run_mode: Deeprails::Evaluation::RunMode::OrSymbol,
|
|
144
|
-
created_at: Time,
|
|
145
|
-
end_timestamp: Time,
|
|
146
|
-
error_message: String,
|
|
147
|
-
error_timestamp: Time,
|
|
148
|
-
evaluation_result: T::Hash[Symbol, T.anything],
|
|
149
|
-
evaluation_total_cost: Float,
|
|
150
|
-
guardrail_metrics:
|
|
151
|
-
T::Array[Deeprails::Evaluation::GuardrailMetric::OrSymbol],
|
|
152
|
-
model_used: String,
|
|
153
|
-
modified_at: Time,
|
|
154
|
-
nametag: String,
|
|
155
|
-
progress: Integer,
|
|
156
|
-
start_timestamp: Time
|
|
157
|
-
).returns(T.attached_class)
|
|
158
|
-
end
|
|
159
|
-
def self.new(
|
|
160
|
-
# A unique evaluation ID.
|
|
161
|
-
eval_id:,
|
|
162
|
-
# Status of the evaluation.
|
|
163
|
-
evaluation_status:,
|
|
164
|
-
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
165
|
-
# contain at least a `user_prompt` field or a `system_prompt` field. For
|
|
166
|
-
# ground_truth_adherence guardrail metric, `ground_truth` should be provided.
|
|
167
|
-
model_input:,
|
|
168
|
-
# Output generated by the LLM to be evaluated.
|
|
169
|
-
model_output:,
|
|
170
|
-
# Run mode for the evaluation. The run mode allows the user to optimize for speed,
|
|
171
|
-
# accuracy, and cost by determining which models are used to evaluate the event.
|
|
172
|
-
run_mode:,
|
|
173
|
-
# The time the evaluation was created in UTC.
|
|
174
|
-
created_at: nil,
|
|
175
|
-
# The time the evaluation completed in UTC.
|
|
176
|
-
end_timestamp: nil,
|
|
177
|
-
# Description of the error causing the evaluation to fail, if any.
|
|
178
|
-
error_message: nil,
|
|
179
|
-
# The time the error causing the evaluation to fail was recorded.
|
|
180
|
-
error_timestamp: nil,
|
|
181
|
-
# Evaluation result consisting of average scores and rationales for each of the
|
|
182
|
-
# evaluated guardrail metrics.
|
|
183
|
-
evaluation_result: nil,
|
|
184
|
-
# Total cost of the evaluation.
|
|
185
|
-
evaluation_total_cost: nil,
|
|
186
|
-
# An array of guardrail metrics that the model input and output pair will be
|
|
187
|
-
# evaluated on.
|
|
188
|
-
guardrail_metrics: nil,
|
|
189
|
-
# Model ID used to generate the output, like `gpt-4o` or `o3`.
|
|
190
|
-
model_used: nil,
|
|
191
|
-
# The most recent time the evaluation was modified in UTC.
|
|
192
|
-
modified_at: nil,
|
|
193
|
-
# An optional, user-defined tag for the evaluation.
|
|
194
|
-
nametag: nil,
|
|
195
|
-
# Evaluation progress. Values range between 0 and 100; 100 corresponds to a
|
|
196
|
-
# completed `evaluation_status`.
|
|
197
|
-
progress: nil,
|
|
198
|
-
# The time the evaluation started in UTC.
|
|
199
|
-
start_timestamp: nil
|
|
200
|
-
)
|
|
201
|
-
end
|
|
202
|
-
|
|
203
|
-
sig do
|
|
204
|
-
override.returns(
|
|
205
|
-
{
|
|
206
|
-
eval_id: String,
|
|
207
|
-
evaluation_status:
|
|
208
|
-
Deeprails::Evaluation::EvaluationStatus::TaggedSymbol,
|
|
209
|
-
model_input: Deeprails::Evaluation::ModelInput,
|
|
210
|
-
model_output: String,
|
|
211
|
-
run_mode: Deeprails::Evaluation::RunMode::TaggedSymbol,
|
|
212
|
-
created_at: Time,
|
|
213
|
-
end_timestamp: Time,
|
|
214
|
-
error_message: String,
|
|
215
|
-
error_timestamp: Time,
|
|
216
|
-
evaluation_result: T::Hash[Symbol, T.anything],
|
|
217
|
-
evaluation_total_cost: Float,
|
|
218
|
-
guardrail_metrics:
|
|
219
|
-
T::Array[Deeprails::Evaluation::GuardrailMetric::TaggedSymbol],
|
|
220
|
-
model_used: String,
|
|
221
|
-
modified_at: Time,
|
|
222
|
-
nametag: String,
|
|
223
|
-
progress: Integer,
|
|
224
|
-
start_timestamp: Time
|
|
225
|
-
}
|
|
226
|
-
)
|
|
227
|
-
end
|
|
228
|
-
def to_hash
|
|
229
|
-
end
|
|
230
|
-
|
|
231
|
-
# Status of the evaluation.
|
|
232
|
-
module EvaluationStatus
|
|
233
|
-
extend Deeprails::Internal::Type::Enum
|
|
234
|
-
|
|
235
|
-
TaggedSymbol =
|
|
236
|
-
T.type_alias do
|
|
237
|
-
T.all(Symbol, Deeprails::Evaluation::EvaluationStatus)
|
|
238
|
-
end
|
|
239
|
-
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
240
|
-
|
|
241
|
-
IN_PROGRESS =
|
|
242
|
-
T.let(
|
|
243
|
-
:in_progress,
|
|
244
|
-
Deeprails::Evaluation::EvaluationStatus::TaggedSymbol
|
|
245
|
-
)
|
|
246
|
-
COMPLETED =
|
|
247
|
-
T.let(
|
|
248
|
-
:completed,
|
|
249
|
-
Deeprails::Evaluation::EvaluationStatus::TaggedSymbol
|
|
250
|
-
)
|
|
251
|
-
CANCELED =
|
|
252
|
-
T.let(
|
|
253
|
-
:canceled,
|
|
254
|
-
Deeprails::Evaluation::EvaluationStatus::TaggedSymbol
|
|
255
|
-
)
|
|
256
|
-
QUEUED =
|
|
257
|
-
T.let(:queued, Deeprails::Evaluation::EvaluationStatus::TaggedSymbol)
|
|
258
|
-
FAILED =
|
|
259
|
-
T.let(:failed, Deeprails::Evaluation::EvaluationStatus::TaggedSymbol)
|
|
260
|
-
|
|
261
|
-
sig do
|
|
262
|
-
override.returns(
|
|
263
|
-
T::Array[Deeprails::Evaluation::EvaluationStatus::TaggedSymbol]
|
|
264
|
-
)
|
|
265
|
-
end
|
|
266
|
-
def self.values
|
|
267
|
-
end
|
|
268
|
-
end
|
|
269
|
-
|
|
270
|
-
class ModelInput < Deeprails::Internal::Type::BaseModel
|
|
271
|
-
OrHash =
|
|
272
|
-
T.type_alias do
|
|
273
|
-
T.any(
|
|
274
|
-
Deeprails::Evaluation::ModelInput,
|
|
275
|
-
Deeprails::Internal::AnyHash
|
|
276
|
-
)
|
|
277
|
-
end
|
|
278
|
-
|
|
279
|
-
# The ground truth for evaluating Ground Truth Adherence guardrail.
|
|
280
|
-
sig { returns(T.nilable(String)) }
|
|
281
|
-
attr_reader :ground_truth
|
|
282
|
-
|
|
283
|
-
sig { params(ground_truth: String).void }
|
|
284
|
-
attr_writer :ground_truth
|
|
285
|
-
|
|
286
|
-
# The system prompt used to generate the output.
|
|
287
|
-
sig { returns(T.nilable(String)) }
|
|
288
|
-
attr_reader :system_prompt
|
|
289
|
-
|
|
290
|
-
sig { params(system_prompt: String).void }
|
|
291
|
-
attr_writer :system_prompt
|
|
292
|
-
|
|
293
|
-
# The user prompt used to generate the output.
|
|
294
|
-
sig { returns(T.nilable(String)) }
|
|
295
|
-
attr_reader :user_prompt
|
|
296
|
-
|
|
297
|
-
sig { params(user_prompt: String).void }
|
|
298
|
-
attr_writer :user_prompt
|
|
299
|
-
|
|
300
|
-
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
301
|
-
# contain at least a `user_prompt` field or a `system_prompt` field. For
|
|
302
|
-
# ground_truth_adherence guardrail metric, `ground_truth` should be provided.
|
|
303
|
-
sig do
|
|
304
|
-
params(
|
|
305
|
-
ground_truth: String,
|
|
306
|
-
system_prompt: String,
|
|
307
|
-
user_prompt: String
|
|
308
|
-
).returns(T.attached_class)
|
|
309
|
-
end
|
|
310
|
-
def self.new(
|
|
311
|
-
# The ground truth for evaluating Ground Truth Adherence guardrail.
|
|
312
|
-
ground_truth: nil,
|
|
313
|
-
# The system prompt used to generate the output.
|
|
314
|
-
system_prompt: nil,
|
|
315
|
-
# The user prompt used to generate the output.
|
|
316
|
-
user_prompt: nil
|
|
317
|
-
)
|
|
318
|
-
end
|
|
319
|
-
|
|
320
|
-
sig do
|
|
321
|
-
override.returns(
|
|
322
|
-
{ ground_truth: String, system_prompt: String, user_prompt: String }
|
|
323
|
-
)
|
|
324
|
-
end
|
|
325
|
-
def to_hash
|
|
326
|
-
end
|
|
327
|
-
end
|
|
328
|
-
|
|
329
|
-
# Run mode for the evaluation. The run mode allows the user to optimize for speed,
|
|
330
|
-
# accuracy, and cost by determining which models are used to evaluate the event.
|
|
331
|
-
module RunMode
|
|
332
|
-
extend Deeprails::Internal::Type::Enum
|
|
333
|
-
|
|
334
|
-
TaggedSymbol =
|
|
335
|
-
T.type_alias { T.all(Symbol, Deeprails::Evaluation::RunMode) }
|
|
336
|
-
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
337
|
-
|
|
338
|
-
PRECISION_PLUS =
|
|
339
|
-
T.let(:precision_plus, Deeprails::Evaluation::RunMode::TaggedSymbol)
|
|
340
|
-
PRECISION =
|
|
341
|
-
T.let(:precision, Deeprails::Evaluation::RunMode::TaggedSymbol)
|
|
342
|
-
SMART = T.let(:smart, Deeprails::Evaluation::RunMode::TaggedSymbol)
|
|
343
|
-
ECONOMY = T.let(:economy, Deeprails::Evaluation::RunMode::TaggedSymbol)
|
|
344
|
-
|
|
345
|
-
sig do
|
|
346
|
-
override.returns(
|
|
347
|
-
T::Array[Deeprails::Evaluation::RunMode::TaggedSymbol]
|
|
348
|
-
)
|
|
349
|
-
end
|
|
350
|
-
def self.values
|
|
351
|
-
end
|
|
352
|
-
end
|
|
353
|
-
|
|
354
|
-
module GuardrailMetric
|
|
355
|
-
extend Deeprails::Internal::Type::Enum
|
|
356
|
-
|
|
357
|
-
TaggedSymbol =
|
|
358
|
-
T.type_alias { T.all(Symbol, Deeprails::Evaluation::GuardrailMetric) }
|
|
359
|
-
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
360
|
-
|
|
361
|
-
CORRECTNESS =
|
|
362
|
-
T.let(
|
|
363
|
-
:correctness,
|
|
364
|
-
Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
|
|
365
|
-
)
|
|
366
|
-
COMPLETENESS =
|
|
367
|
-
T.let(
|
|
368
|
-
:completeness,
|
|
369
|
-
Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
|
|
370
|
-
)
|
|
371
|
-
INSTRUCTION_ADHERENCE =
|
|
372
|
-
T.let(
|
|
373
|
-
:instruction_adherence,
|
|
374
|
-
Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
|
|
375
|
-
)
|
|
376
|
-
CONTEXT_ADHERENCE =
|
|
377
|
-
T.let(
|
|
378
|
-
:context_adherence,
|
|
379
|
-
Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
|
|
380
|
-
)
|
|
381
|
-
GROUND_TRUTH_ADHERENCE =
|
|
382
|
-
T.let(
|
|
383
|
-
:ground_truth_adherence,
|
|
384
|
-
Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
|
|
385
|
-
)
|
|
386
|
-
COMPREHENSIVE_SAFETY =
|
|
387
|
-
T.let(
|
|
388
|
-
:comprehensive_safety,
|
|
389
|
-
Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
|
|
390
|
-
)
|
|
391
|
-
|
|
392
|
-
sig do
|
|
393
|
-
override.returns(
|
|
394
|
-
T::Array[Deeprails::Evaluation::GuardrailMetric::TaggedSymbol]
|
|
395
|
-
)
|
|
396
|
-
end
|
|
397
|
-
def self.values
|
|
398
|
-
end
|
|
399
|
-
end
|
|
400
|
-
end
|
|
401
|
-
end
|
|
402
|
-
end
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
# typed: strong
|
|
2
|
-
|
|
3
|
-
module Deeprails
|
|
4
|
-
module Resources
|
|
5
|
-
class Evaluate
|
|
6
|
-
# Use this endpoint to evaluate a model's input and output pair against selected
|
|
7
|
-
# guardrail metrics
|
|
8
|
-
sig do
|
|
9
|
-
params(
|
|
10
|
-
model_input: Deeprails::EvaluateCreateParams::ModelInput::OrHash,
|
|
11
|
-
model_output: String,
|
|
12
|
-
run_mode: Deeprails::EvaluateCreateParams::RunMode::OrSymbol,
|
|
13
|
-
guardrail_metrics:
|
|
14
|
-
T::Array[
|
|
15
|
-
Deeprails::EvaluateCreateParams::GuardrailMetric::OrSymbol
|
|
16
|
-
],
|
|
17
|
-
model_used: String,
|
|
18
|
-
nametag: String,
|
|
19
|
-
request_options: Deeprails::RequestOptions::OrHash
|
|
20
|
-
).returns(Deeprails::Evaluation)
|
|
21
|
-
end
|
|
22
|
-
def create(
|
|
23
|
-
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
24
|
-
# contain at least a `user_prompt` field or a `system_prompt` field. For
|
|
25
|
-
# ground_truth_adherence guardrail metric, `ground_truth` should be provided.
|
|
26
|
-
model_input:,
|
|
27
|
-
# Output generated by the LLM to be evaluated.
|
|
28
|
-
model_output:,
|
|
29
|
-
# Run mode for the evaluation. The run mode allows the user to optimize for speed,
|
|
30
|
-
# accuracy, and cost by determining which models are used to evaluate the event.
|
|
31
|
-
# Available run modes include `precision_plus`, `precision`, `smart`, and
|
|
32
|
-
# `economy`. Defaults to `smart`.
|
|
33
|
-
run_mode:,
|
|
34
|
-
# An array of guardrail metrics that the model input and output pair will be
|
|
35
|
-
# evaluated on. For non-enterprise users, these will be limited to the allowed
|
|
36
|
-
# guardrail metrics.
|
|
37
|
-
guardrail_metrics: nil,
|
|
38
|
-
# Model ID used to generate the output, like `gpt-4o` or `o3`.
|
|
39
|
-
model_used: nil,
|
|
40
|
-
# An optional, user-defined tag for the evaluation.
|
|
41
|
-
nametag: nil,
|
|
42
|
-
request_options: {}
|
|
43
|
-
)
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
# Use this endpoint to retrieve the evaluation record for a given evaluation ID
|
|
47
|
-
sig do
|
|
48
|
-
params(
|
|
49
|
-
eval_id: String,
|
|
50
|
-
request_options: Deeprails::RequestOptions::OrHash
|
|
51
|
-
).returns(Deeprails::Evaluation)
|
|
52
|
-
end
|
|
53
|
-
def retrieve(
|
|
54
|
-
# The ID of the evaluation to retrieve.
|
|
55
|
-
eval_id,
|
|
56
|
-
request_options: {}
|
|
57
|
-
)
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
# @api private
|
|
61
|
-
sig { params(client: Deeprails::Client).returns(T.attached_class) }
|
|
62
|
-
def self.new(client:)
|
|
63
|
-
end
|
|
64
|
-
end
|
|
65
|
-
end
|
|
66
|
-
end
|
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
module Deeprails
|
|
2
|
-
module Models
|
|
3
|
-
type evaluate_create_params =
|
|
4
|
-
{
|
|
5
|
-
model_input: Deeprails::EvaluateCreateParams::ModelInput,
|
|
6
|
-
model_output: String,
|
|
7
|
-
run_mode: Deeprails::Models::EvaluateCreateParams::run_mode,
|
|
8
|
-
guardrail_metrics: ::Array[Deeprails::Models::EvaluateCreateParams::guardrail_metric],
|
|
9
|
-
model_used: String,
|
|
10
|
-
nametag: String
|
|
11
|
-
}
|
|
12
|
-
& Deeprails::Internal::Type::request_parameters
|
|
13
|
-
|
|
14
|
-
class EvaluateCreateParams < Deeprails::Internal::Type::BaseModel
|
|
15
|
-
extend Deeprails::Internal::Type::RequestParameters::Converter
|
|
16
|
-
include Deeprails::Internal::Type::RequestParameters
|
|
17
|
-
|
|
18
|
-
attr_accessor model_input: Deeprails::EvaluateCreateParams::ModelInput
|
|
19
|
-
|
|
20
|
-
attr_accessor model_output: String
|
|
21
|
-
|
|
22
|
-
attr_accessor run_mode: Deeprails::Models::EvaluateCreateParams::run_mode
|
|
23
|
-
|
|
24
|
-
attr_reader guardrail_metrics: ::Array[Deeprails::Models::EvaluateCreateParams::guardrail_metric]?
|
|
25
|
-
|
|
26
|
-
def guardrail_metrics=: (
|
|
27
|
-
::Array[Deeprails::Models::EvaluateCreateParams::guardrail_metric]
|
|
28
|
-
) -> ::Array[Deeprails::Models::EvaluateCreateParams::guardrail_metric]
|
|
29
|
-
|
|
30
|
-
attr_reader model_used: String?
|
|
31
|
-
|
|
32
|
-
def model_used=: (String) -> String
|
|
33
|
-
|
|
34
|
-
attr_reader nametag: String?
|
|
35
|
-
|
|
36
|
-
def nametag=: (String) -> String
|
|
37
|
-
|
|
38
|
-
def initialize: (
|
|
39
|
-
model_input: Deeprails::EvaluateCreateParams::ModelInput,
|
|
40
|
-
model_output: String,
|
|
41
|
-
run_mode: Deeprails::Models::EvaluateCreateParams::run_mode,
|
|
42
|
-
?guardrail_metrics: ::Array[Deeprails::Models::EvaluateCreateParams::guardrail_metric],
|
|
43
|
-
?model_used: String,
|
|
44
|
-
?nametag: String,
|
|
45
|
-
?request_options: Deeprails::request_opts
|
|
46
|
-
) -> void
|
|
47
|
-
|
|
48
|
-
def to_hash: -> {
|
|
49
|
-
model_input: Deeprails::EvaluateCreateParams::ModelInput,
|
|
50
|
-
model_output: String,
|
|
51
|
-
run_mode: Deeprails::Models::EvaluateCreateParams::run_mode,
|
|
52
|
-
guardrail_metrics: ::Array[Deeprails::Models::EvaluateCreateParams::guardrail_metric],
|
|
53
|
-
model_used: String,
|
|
54
|
-
nametag: String,
|
|
55
|
-
request_options: Deeprails::RequestOptions
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
type model_input =
|
|
59
|
-
{ ground_truth: String, system_prompt: String, user_prompt: String }
|
|
60
|
-
|
|
61
|
-
class ModelInput < Deeprails::Internal::Type::BaseModel
|
|
62
|
-
attr_reader ground_truth: String?
|
|
63
|
-
|
|
64
|
-
def ground_truth=: (String) -> String
|
|
65
|
-
|
|
66
|
-
attr_reader system_prompt: String?
|
|
67
|
-
|
|
68
|
-
def system_prompt=: (String) -> String
|
|
69
|
-
|
|
70
|
-
attr_reader user_prompt: String?
|
|
71
|
-
|
|
72
|
-
def user_prompt=: (String) -> String
|
|
73
|
-
|
|
74
|
-
def initialize: (
|
|
75
|
-
?ground_truth: String,
|
|
76
|
-
?system_prompt: String,
|
|
77
|
-
?user_prompt: String
|
|
78
|
-
) -> void
|
|
79
|
-
|
|
80
|
-
def to_hash: -> {
|
|
81
|
-
ground_truth: String,
|
|
82
|
-
system_prompt: String,
|
|
83
|
-
user_prompt: String
|
|
84
|
-
}
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
type run_mode = :precision_plus | :precision | :smart | :economy
|
|
88
|
-
|
|
89
|
-
module RunMode
|
|
90
|
-
extend Deeprails::Internal::Type::Enum
|
|
91
|
-
|
|
92
|
-
PRECISION_PLUS: :precision_plus
|
|
93
|
-
PRECISION: :precision
|
|
94
|
-
SMART: :smart
|
|
95
|
-
ECONOMY: :economy
|
|
96
|
-
|
|
97
|
-
def self?.values: -> ::Array[Deeprails::Models::EvaluateCreateParams::run_mode]
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
type guardrail_metric =
|
|
101
|
-
:correctness
|
|
102
|
-
| :completeness
|
|
103
|
-
| :instruction_adherence
|
|
104
|
-
| :context_adherence
|
|
105
|
-
| :ground_truth_adherence
|
|
106
|
-
| :comprehensive_safety
|
|
107
|
-
|
|
108
|
-
module GuardrailMetric
|
|
109
|
-
extend Deeprails::Internal::Type::Enum
|
|
110
|
-
|
|
111
|
-
CORRECTNESS: :correctness
|
|
112
|
-
COMPLETENESS: :completeness
|
|
113
|
-
INSTRUCTION_ADHERENCE: :instruction_adherence
|
|
114
|
-
CONTEXT_ADHERENCE: :context_adherence
|
|
115
|
-
GROUND_TRUTH_ADHERENCE: :ground_truth_adherence
|
|
116
|
-
COMPREHENSIVE_SAFETY: :comprehensive_safety
|
|
117
|
-
|
|
118
|
-
def self?.values: -> ::Array[Deeprails::Models::EvaluateCreateParams::guardrail_metric]
|
|
119
|
-
end
|
|
120
|
-
end
|
|
121
|
-
end
|
|
122
|
-
end
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
module Deeprails
|
|
2
|
-
module Models
|
|
3
|
-
type evaluate_retrieve_params =
|
|
4
|
-
{ } & Deeprails::Internal::Type::request_parameters
|
|
5
|
-
|
|
6
|
-
class EvaluateRetrieveParams < Deeprails::Internal::Type::BaseModel
|
|
7
|
-
extend Deeprails::Internal::Type::RequestParameters::Converter
|
|
8
|
-
include Deeprails::Internal::Type::RequestParameters
|
|
9
|
-
|
|
10
|
-
def initialize: (?request_options: Deeprails::request_opts) -> void
|
|
11
|
-
|
|
12
|
-
def to_hash: -> { request_options: Deeprails::RequestOptions }
|
|
13
|
-
end
|
|
14
|
-
end
|
|
15
|
-
end
|