deeprails 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +1 -1
- data/lib/deeprails/client.rb +0 -4
- data/lib/deeprails/models/monitor_detail_response.rb +231 -3
- data/lib/deeprails/models.rb +0 -6
- data/lib/deeprails/version.rb +1 -1
- data/lib/deeprails.rb +0 -4
- data/rbi/deeprails/client.rbi +0 -3
- data/rbi/deeprails/models/monitor_detail_response.rbi +483 -4
- data/rbi/deeprails/models.rbi +0 -6
- data/sig/deeprails/client.rbs +0 -2
- data/sig/deeprails/models/monitor_detail_response.rbs +207 -6
- data/sig/deeprails/models.rbs +0 -6
- metadata +2 -14
- data/lib/deeprails/models/evaluate_create_params.rb +0 -134
- data/lib/deeprails/models/evaluate_retrieve_params.rb +0 -14
- data/lib/deeprails/models/evaluation.rb +0 -233
- data/lib/deeprails/resources/evaluate.rb +0 -70
- data/rbi/deeprails/models/evaluate_create_params.rbi +0 -280
- data/rbi/deeprails/models/evaluate_retrieve_params.rbi +0 -27
- data/rbi/deeprails/models/evaluation.rbi +0 -402
- data/rbi/deeprails/resources/evaluate.rbi +0 -66
- data/sig/deeprails/models/evaluate_create_params.rbs +0 -122
- data/sig/deeprails/models/evaluate_retrieve_params.rbs +0 -15
- data/sig/deeprails/models/evaluation.rbs +0 -204
- data/sig/deeprails/resources/evaluate.rbs +0 -22
|
@@ -39,10 +39,19 @@ module Deeprails
|
|
|
39
39
|
|
|
40
40
|
# An array of all evaluations performed by this monitor. Each one corresponds to a
|
|
41
41
|
# separate monitor event.
|
|
42
|
-
sig
|
|
42
|
+
sig do
|
|
43
|
+
returns(
|
|
44
|
+
T.nilable(T::Array[Deeprails::MonitorDetailResponse::Evaluation])
|
|
45
|
+
)
|
|
46
|
+
end
|
|
43
47
|
attr_reader :evaluations
|
|
44
48
|
|
|
45
|
-
sig
|
|
49
|
+
sig do
|
|
50
|
+
params(
|
|
51
|
+
evaluations:
|
|
52
|
+
T::Array[Deeprails::MonitorDetailResponse::Evaluation::OrHash]
|
|
53
|
+
).void
|
|
54
|
+
end
|
|
46
55
|
attr_writer :evaluations
|
|
47
56
|
|
|
48
57
|
# Contains five fields used for stats of this monitor: total evaluations,
|
|
@@ -78,7 +87,8 @@ module Deeprails
|
|
|
78
87
|
name: String,
|
|
79
88
|
created_at: Time,
|
|
80
89
|
description: String,
|
|
81
|
-
evaluations:
|
|
90
|
+
evaluations:
|
|
91
|
+
T::Array[Deeprails::MonitorDetailResponse::Evaluation::OrHash],
|
|
82
92
|
stats: Deeprails::MonitorDetailResponse::Stats::OrHash,
|
|
83
93
|
updated_at: Time,
|
|
84
94
|
user_id: String
|
|
@@ -119,7 +129,7 @@ module Deeprails
|
|
|
119
129
|
name: String,
|
|
120
130
|
created_at: Time,
|
|
121
131
|
description: String,
|
|
122
|
-
evaluations: T::Array[Deeprails::Evaluation],
|
|
132
|
+
evaluations: T::Array[Deeprails::MonitorDetailResponse::Evaluation],
|
|
123
133
|
stats: Deeprails::MonitorDetailResponse::Stats,
|
|
124
134
|
updated_at: Time,
|
|
125
135
|
user_id: String
|
|
@@ -162,6 +172,475 @@ module Deeprails
|
|
|
162
172
|
end
|
|
163
173
|
end
|
|
164
174
|
|
|
175
|
+
class Evaluation < Deeprails::Internal::Type::BaseModel
|
|
176
|
+
OrHash =
|
|
177
|
+
T.type_alias do
|
|
178
|
+
T.any(
|
|
179
|
+
Deeprails::MonitorDetailResponse::Evaluation,
|
|
180
|
+
Deeprails::Internal::AnyHash
|
|
181
|
+
)
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# A unique evaluation ID.
|
|
185
|
+
sig { returns(String) }
|
|
186
|
+
attr_accessor :eval_id
|
|
187
|
+
|
|
188
|
+
# Status of the evaluation.
|
|
189
|
+
sig do
|
|
190
|
+
returns(
|
|
191
|
+
Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::TaggedSymbol
|
|
192
|
+
)
|
|
193
|
+
end
|
|
194
|
+
attr_accessor :evaluation_status
|
|
195
|
+
|
|
196
|
+
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
197
|
+
# contain at least a `user_prompt` field or a `system_prompt` field. For
|
|
198
|
+
# ground_truth_adherence guardrail metric, `ground_truth` should be provided.
|
|
199
|
+
sig do
|
|
200
|
+
returns(Deeprails::MonitorDetailResponse::Evaluation::ModelInput)
|
|
201
|
+
end
|
|
202
|
+
attr_reader :model_input
|
|
203
|
+
|
|
204
|
+
sig do
|
|
205
|
+
params(
|
|
206
|
+
model_input:
|
|
207
|
+
Deeprails::MonitorDetailResponse::Evaluation::ModelInput::OrHash
|
|
208
|
+
).void
|
|
209
|
+
end
|
|
210
|
+
attr_writer :model_input
|
|
211
|
+
|
|
212
|
+
# Output generated by the LLM to be evaluated.
|
|
213
|
+
sig { returns(String) }
|
|
214
|
+
attr_accessor :model_output
|
|
215
|
+
|
|
216
|
+
# Run mode for the evaluation. The run mode allows the user to optimize for speed,
|
|
217
|
+
# accuracy, and cost by determining which models are used to evaluate the event.
|
|
218
|
+
sig do
|
|
219
|
+
returns(
|
|
220
|
+
Deeprails::MonitorDetailResponse::Evaluation::RunMode::TaggedSymbol
|
|
221
|
+
)
|
|
222
|
+
end
|
|
223
|
+
attr_accessor :run_mode
|
|
224
|
+
|
|
225
|
+
# The time the evaluation was created in UTC.
|
|
226
|
+
sig { returns(T.nilable(Time)) }
|
|
227
|
+
attr_reader :created_at
|
|
228
|
+
|
|
229
|
+
sig { params(created_at: Time).void }
|
|
230
|
+
attr_writer :created_at
|
|
231
|
+
|
|
232
|
+
# The time the evaluation completed in UTC.
|
|
233
|
+
sig { returns(T.nilable(Time)) }
|
|
234
|
+
attr_reader :end_timestamp
|
|
235
|
+
|
|
236
|
+
sig { params(end_timestamp: Time).void }
|
|
237
|
+
attr_writer :end_timestamp
|
|
238
|
+
|
|
239
|
+
# Description of the error causing the evaluation to fail, if any.
|
|
240
|
+
sig { returns(T.nilable(String)) }
|
|
241
|
+
attr_reader :error_message
|
|
242
|
+
|
|
243
|
+
sig { params(error_message: String).void }
|
|
244
|
+
attr_writer :error_message
|
|
245
|
+
|
|
246
|
+
# The time the error causing the evaluation to fail was recorded.
|
|
247
|
+
sig { returns(T.nilable(Time)) }
|
|
248
|
+
attr_reader :error_timestamp
|
|
249
|
+
|
|
250
|
+
sig { params(error_timestamp: Time).void }
|
|
251
|
+
attr_writer :error_timestamp
|
|
252
|
+
|
|
253
|
+
# Evaluation result consisting of average scores and rationales for each of the
|
|
254
|
+
# evaluated guardrail metrics.
|
|
255
|
+
sig { returns(T.nilable(T::Hash[Symbol, T.anything])) }
|
|
256
|
+
attr_reader :evaluation_result
|
|
257
|
+
|
|
258
|
+
sig { params(evaluation_result: T::Hash[Symbol, T.anything]).void }
|
|
259
|
+
attr_writer :evaluation_result
|
|
260
|
+
|
|
261
|
+
# Total cost of the evaluation.
|
|
262
|
+
sig { returns(T.nilable(Float)) }
|
|
263
|
+
attr_reader :evaluation_total_cost
|
|
264
|
+
|
|
265
|
+
sig { params(evaluation_total_cost: Float).void }
|
|
266
|
+
attr_writer :evaluation_total_cost
|
|
267
|
+
|
|
268
|
+
# An array of guardrail metrics that the model input and output pair will be
|
|
269
|
+
# evaluated on.
|
|
270
|
+
sig do
|
|
271
|
+
returns(
|
|
272
|
+
T.nilable(
|
|
273
|
+
T::Array[
|
|
274
|
+
Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
|
|
275
|
+
]
|
|
276
|
+
)
|
|
277
|
+
)
|
|
278
|
+
end
|
|
279
|
+
attr_reader :guardrail_metrics
|
|
280
|
+
|
|
281
|
+
sig do
|
|
282
|
+
params(
|
|
283
|
+
guardrail_metrics:
|
|
284
|
+
T::Array[
|
|
285
|
+
Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::OrSymbol
|
|
286
|
+
]
|
|
287
|
+
).void
|
|
288
|
+
end
|
|
289
|
+
attr_writer :guardrail_metrics
|
|
290
|
+
|
|
291
|
+
# Model ID used to generate the output, like `gpt-4o` or `o3`.
|
|
292
|
+
sig { returns(T.nilable(String)) }
|
|
293
|
+
attr_reader :model_used
|
|
294
|
+
|
|
295
|
+
sig { params(model_used: String).void }
|
|
296
|
+
attr_writer :model_used
|
|
297
|
+
|
|
298
|
+
# The most recent time the evaluation was modified in UTC.
|
|
299
|
+
sig { returns(T.nilable(Time)) }
|
|
300
|
+
attr_reader :modified_at
|
|
301
|
+
|
|
302
|
+
sig { params(modified_at: Time).void }
|
|
303
|
+
attr_writer :modified_at
|
|
304
|
+
|
|
305
|
+
# An optional, user-defined tag for the evaluation.
|
|
306
|
+
sig { returns(T.nilable(String)) }
|
|
307
|
+
attr_reader :nametag
|
|
308
|
+
|
|
309
|
+
sig { params(nametag: String).void }
|
|
310
|
+
attr_writer :nametag
|
|
311
|
+
|
|
312
|
+
# Evaluation progress. Values range between 0 and 100; 100 corresponds to a
|
|
313
|
+
# completed `evaluation_status`.
|
|
314
|
+
sig { returns(T.nilable(Integer)) }
|
|
315
|
+
attr_reader :progress
|
|
316
|
+
|
|
317
|
+
sig { params(progress: Integer).void }
|
|
318
|
+
attr_writer :progress
|
|
319
|
+
|
|
320
|
+
# The time the evaluation started in UTC.
|
|
321
|
+
sig { returns(T.nilable(Time)) }
|
|
322
|
+
attr_reader :start_timestamp
|
|
323
|
+
|
|
324
|
+
sig { params(start_timestamp: Time).void }
|
|
325
|
+
attr_writer :start_timestamp
|
|
326
|
+
|
|
327
|
+
sig do
|
|
328
|
+
params(
|
|
329
|
+
eval_id: String,
|
|
330
|
+
evaluation_status:
|
|
331
|
+
Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::OrSymbol,
|
|
332
|
+
model_input:
|
|
333
|
+
Deeprails::MonitorDetailResponse::Evaluation::ModelInput::OrHash,
|
|
334
|
+
model_output: String,
|
|
335
|
+
run_mode:
|
|
336
|
+
Deeprails::MonitorDetailResponse::Evaluation::RunMode::OrSymbol,
|
|
337
|
+
created_at: Time,
|
|
338
|
+
end_timestamp: Time,
|
|
339
|
+
error_message: String,
|
|
340
|
+
error_timestamp: Time,
|
|
341
|
+
evaluation_result: T::Hash[Symbol, T.anything],
|
|
342
|
+
evaluation_total_cost: Float,
|
|
343
|
+
guardrail_metrics:
|
|
344
|
+
T::Array[
|
|
345
|
+
Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::OrSymbol
|
|
346
|
+
],
|
|
347
|
+
model_used: String,
|
|
348
|
+
modified_at: Time,
|
|
349
|
+
nametag: String,
|
|
350
|
+
progress: Integer,
|
|
351
|
+
start_timestamp: Time
|
|
352
|
+
).returns(T.attached_class)
|
|
353
|
+
end
|
|
354
|
+
def self.new(
|
|
355
|
+
# A unique evaluation ID.
|
|
356
|
+
eval_id:,
|
|
357
|
+
# Status of the evaluation.
|
|
358
|
+
evaluation_status:,
|
|
359
|
+
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
360
|
+
# contain at least a `user_prompt` field or a `system_prompt` field. For
|
|
361
|
+
# ground_truth_adherence guardrail metric, `ground_truth` should be provided.
|
|
362
|
+
model_input:,
|
|
363
|
+
# Output generated by the LLM to be evaluated.
|
|
364
|
+
model_output:,
|
|
365
|
+
# Run mode for the evaluation. The run mode allows the user to optimize for speed,
|
|
366
|
+
# accuracy, and cost by determining which models are used to evaluate the event.
|
|
367
|
+
run_mode:,
|
|
368
|
+
# The time the evaluation was created in UTC.
|
|
369
|
+
created_at: nil,
|
|
370
|
+
# The time the evaluation completed in UTC.
|
|
371
|
+
end_timestamp: nil,
|
|
372
|
+
# Description of the error causing the evaluation to fail, if any.
|
|
373
|
+
error_message: nil,
|
|
374
|
+
# The time the error causing the evaluation to fail was recorded.
|
|
375
|
+
error_timestamp: nil,
|
|
376
|
+
# Evaluation result consisting of average scores and rationales for each of the
|
|
377
|
+
# evaluated guardrail metrics.
|
|
378
|
+
evaluation_result: nil,
|
|
379
|
+
# Total cost of the evaluation.
|
|
380
|
+
evaluation_total_cost: nil,
|
|
381
|
+
# An array of guardrail metrics that the model input and output pair will be
|
|
382
|
+
# evaluated on.
|
|
383
|
+
guardrail_metrics: nil,
|
|
384
|
+
# Model ID used to generate the output, like `gpt-4o` or `o3`.
|
|
385
|
+
model_used: nil,
|
|
386
|
+
# The most recent time the evaluation was modified in UTC.
|
|
387
|
+
modified_at: nil,
|
|
388
|
+
# An optional, user-defined tag for the evaluation.
|
|
389
|
+
nametag: nil,
|
|
390
|
+
# Evaluation progress. Values range between 0 and 100; 100 corresponds to a
|
|
391
|
+
# completed `evaluation_status`.
|
|
392
|
+
progress: nil,
|
|
393
|
+
# The time the evaluation started in UTC.
|
|
394
|
+
start_timestamp: nil
|
|
395
|
+
)
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
sig do
|
|
399
|
+
override.returns(
|
|
400
|
+
{
|
|
401
|
+
eval_id: String,
|
|
402
|
+
evaluation_status:
|
|
403
|
+
Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::TaggedSymbol,
|
|
404
|
+
model_input:
|
|
405
|
+
Deeprails::MonitorDetailResponse::Evaluation::ModelInput,
|
|
406
|
+
model_output: String,
|
|
407
|
+
run_mode:
|
|
408
|
+
Deeprails::MonitorDetailResponse::Evaluation::RunMode::TaggedSymbol,
|
|
409
|
+
created_at: Time,
|
|
410
|
+
end_timestamp: Time,
|
|
411
|
+
error_message: String,
|
|
412
|
+
error_timestamp: Time,
|
|
413
|
+
evaluation_result: T::Hash[Symbol, T.anything],
|
|
414
|
+
evaluation_total_cost: Float,
|
|
415
|
+
guardrail_metrics:
|
|
416
|
+
T::Array[
|
|
417
|
+
Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
|
|
418
|
+
],
|
|
419
|
+
model_used: String,
|
|
420
|
+
modified_at: Time,
|
|
421
|
+
nametag: String,
|
|
422
|
+
progress: Integer,
|
|
423
|
+
start_timestamp: Time
|
|
424
|
+
}
|
|
425
|
+
)
|
|
426
|
+
end
|
|
427
|
+
def to_hash
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
# Status of the evaluation.
|
|
431
|
+
module EvaluationStatus
|
|
432
|
+
extend Deeprails::Internal::Type::Enum
|
|
433
|
+
|
|
434
|
+
TaggedSymbol =
|
|
435
|
+
T.type_alias do
|
|
436
|
+
T.all(
|
|
437
|
+
Symbol,
|
|
438
|
+
Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus
|
|
439
|
+
)
|
|
440
|
+
end
|
|
441
|
+
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
442
|
+
|
|
443
|
+
IN_PROGRESS =
|
|
444
|
+
T.let(
|
|
445
|
+
:in_progress,
|
|
446
|
+
Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::TaggedSymbol
|
|
447
|
+
)
|
|
448
|
+
COMPLETED =
|
|
449
|
+
T.let(
|
|
450
|
+
:completed,
|
|
451
|
+
Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::TaggedSymbol
|
|
452
|
+
)
|
|
453
|
+
CANCELED =
|
|
454
|
+
T.let(
|
|
455
|
+
:canceled,
|
|
456
|
+
Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::TaggedSymbol
|
|
457
|
+
)
|
|
458
|
+
QUEUED =
|
|
459
|
+
T.let(
|
|
460
|
+
:queued,
|
|
461
|
+
Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::TaggedSymbol
|
|
462
|
+
)
|
|
463
|
+
FAILED =
|
|
464
|
+
T.let(
|
|
465
|
+
:failed,
|
|
466
|
+
Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::TaggedSymbol
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
sig do
|
|
470
|
+
override.returns(
|
|
471
|
+
T::Array[
|
|
472
|
+
Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::TaggedSymbol
|
|
473
|
+
]
|
|
474
|
+
)
|
|
475
|
+
end
|
|
476
|
+
def self.values
|
|
477
|
+
end
|
|
478
|
+
end
|
|
479
|
+
|
|
480
|
+
class ModelInput < Deeprails::Internal::Type::BaseModel
|
|
481
|
+
OrHash =
|
|
482
|
+
T.type_alias do
|
|
483
|
+
T.any(
|
|
484
|
+
Deeprails::MonitorDetailResponse::Evaluation::ModelInput,
|
|
485
|
+
Deeprails::Internal::AnyHash
|
|
486
|
+
)
|
|
487
|
+
end
|
|
488
|
+
|
|
489
|
+
# The ground truth for evaluating Ground Truth Adherence guardrail.
|
|
490
|
+
sig { returns(T.nilable(String)) }
|
|
491
|
+
attr_reader :ground_truth
|
|
492
|
+
|
|
493
|
+
sig { params(ground_truth: String).void }
|
|
494
|
+
attr_writer :ground_truth
|
|
495
|
+
|
|
496
|
+
# The system prompt used to generate the output.
|
|
497
|
+
sig { returns(T.nilable(String)) }
|
|
498
|
+
attr_reader :system_prompt
|
|
499
|
+
|
|
500
|
+
sig { params(system_prompt: String).void }
|
|
501
|
+
attr_writer :system_prompt
|
|
502
|
+
|
|
503
|
+
# The user prompt used to generate the output.
|
|
504
|
+
sig { returns(T.nilable(String)) }
|
|
505
|
+
attr_reader :user_prompt
|
|
506
|
+
|
|
507
|
+
sig { params(user_prompt: String).void }
|
|
508
|
+
attr_writer :user_prompt
|
|
509
|
+
|
|
510
|
+
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
511
|
+
# contain at least a `user_prompt` field or a `system_prompt` field. For
|
|
512
|
+
# ground_truth_adherence guardrail metric, `ground_truth` should be provided.
|
|
513
|
+
sig do
|
|
514
|
+
params(
|
|
515
|
+
ground_truth: String,
|
|
516
|
+
system_prompt: String,
|
|
517
|
+
user_prompt: String
|
|
518
|
+
).returns(T.attached_class)
|
|
519
|
+
end
|
|
520
|
+
def self.new(
|
|
521
|
+
# The ground truth for evaluating Ground Truth Adherence guardrail.
|
|
522
|
+
ground_truth: nil,
|
|
523
|
+
# The system prompt used to generate the output.
|
|
524
|
+
system_prompt: nil,
|
|
525
|
+
# The user prompt used to generate the output.
|
|
526
|
+
user_prompt: nil
|
|
527
|
+
)
|
|
528
|
+
end
|
|
529
|
+
|
|
530
|
+
sig do
|
|
531
|
+
override.returns(
|
|
532
|
+
{
|
|
533
|
+
ground_truth: String,
|
|
534
|
+
system_prompt: String,
|
|
535
|
+
user_prompt: String
|
|
536
|
+
}
|
|
537
|
+
)
|
|
538
|
+
end
|
|
539
|
+
def to_hash
|
|
540
|
+
end
|
|
541
|
+
end
|
|
542
|
+
|
|
543
|
+
# Run mode for the evaluation. The run mode allows the user to optimize for speed,
|
|
544
|
+
# accuracy, and cost by determining which models are used to evaluate the event.
|
|
545
|
+
module RunMode
|
|
546
|
+
extend Deeprails::Internal::Type::Enum
|
|
547
|
+
|
|
548
|
+
TaggedSymbol =
|
|
549
|
+
T.type_alias do
|
|
550
|
+
T.all(
|
|
551
|
+
Symbol,
|
|
552
|
+
Deeprails::MonitorDetailResponse::Evaluation::RunMode
|
|
553
|
+
)
|
|
554
|
+
end
|
|
555
|
+
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
556
|
+
|
|
557
|
+
PRECISION_PLUS =
|
|
558
|
+
T.let(
|
|
559
|
+
:precision_plus,
|
|
560
|
+
Deeprails::MonitorDetailResponse::Evaluation::RunMode::TaggedSymbol
|
|
561
|
+
)
|
|
562
|
+
PRECISION =
|
|
563
|
+
T.let(
|
|
564
|
+
:precision,
|
|
565
|
+
Deeprails::MonitorDetailResponse::Evaluation::RunMode::TaggedSymbol
|
|
566
|
+
)
|
|
567
|
+
SMART =
|
|
568
|
+
T.let(
|
|
569
|
+
:smart,
|
|
570
|
+
Deeprails::MonitorDetailResponse::Evaluation::RunMode::TaggedSymbol
|
|
571
|
+
)
|
|
572
|
+
ECONOMY =
|
|
573
|
+
T.let(
|
|
574
|
+
:economy,
|
|
575
|
+
Deeprails::MonitorDetailResponse::Evaluation::RunMode::TaggedSymbol
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
sig do
|
|
579
|
+
override.returns(
|
|
580
|
+
T::Array[
|
|
581
|
+
Deeprails::MonitorDetailResponse::Evaluation::RunMode::TaggedSymbol
|
|
582
|
+
]
|
|
583
|
+
)
|
|
584
|
+
end
|
|
585
|
+
def self.values
|
|
586
|
+
end
|
|
587
|
+
end
|
|
588
|
+
|
|
589
|
+
module GuardrailMetric
|
|
590
|
+
extend Deeprails::Internal::Type::Enum
|
|
591
|
+
|
|
592
|
+
TaggedSymbol =
|
|
593
|
+
T.type_alias do
|
|
594
|
+
T.all(
|
|
595
|
+
Symbol,
|
|
596
|
+
Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric
|
|
597
|
+
)
|
|
598
|
+
end
|
|
599
|
+
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
600
|
+
|
|
601
|
+
CORRECTNESS =
|
|
602
|
+
T.let(
|
|
603
|
+
:correctness,
|
|
604
|
+
Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
|
|
605
|
+
)
|
|
606
|
+
COMPLETENESS =
|
|
607
|
+
T.let(
|
|
608
|
+
:completeness,
|
|
609
|
+
Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
|
|
610
|
+
)
|
|
611
|
+
INSTRUCTION_ADHERENCE =
|
|
612
|
+
T.let(
|
|
613
|
+
:instruction_adherence,
|
|
614
|
+
Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
|
|
615
|
+
)
|
|
616
|
+
CONTEXT_ADHERENCE =
|
|
617
|
+
T.let(
|
|
618
|
+
:context_adherence,
|
|
619
|
+
Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
|
|
620
|
+
)
|
|
621
|
+
GROUND_TRUTH_ADHERENCE =
|
|
622
|
+
T.let(
|
|
623
|
+
:ground_truth_adherence,
|
|
624
|
+
Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
|
|
625
|
+
)
|
|
626
|
+
COMPREHENSIVE_SAFETY =
|
|
627
|
+
T.let(
|
|
628
|
+
:comprehensive_safety,
|
|
629
|
+
Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
|
|
630
|
+
)
|
|
631
|
+
|
|
632
|
+
sig do
|
|
633
|
+
override.returns(
|
|
634
|
+
T::Array[
|
|
635
|
+
Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
|
|
636
|
+
]
|
|
637
|
+
)
|
|
638
|
+
end
|
|
639
|
+
def self.values
|
|
640
|
+
end
|
|
641
|
+
end
|
|
642
|
+
end
|
|
643
|
+
|
|
165
644
|
class Stats < Deeprails::Internal::Type::BaseModel
|
|
166
645
|
OrHash =
|
|
167
646
|
T.type_alias do
|
data/rbi/deeprails/models.rbi
CHANGED
|
@@ -13,12 +13,6 @@ module Deeprails
|
|
|
13
13
|
|
|
14
14
|
DefendUpdateWorkflowParams = Deeprails::Models::DefendUpdateWorkflowParams
|
|
15
15
|
|
|
16
|
-
EvaluateCreateParams = Deeprails::Models::EvaluateCreateParams
|
|
17
|
-
|
|
18
|
-
EvaluateRetrieveParams = Deeprails::Models::EvaluateRetrieveParams
|
|
19
|
-
|
|
20
|
-
Evaluation = Deeprails::Models::Evaluation
|
|
21
|
-
|
|
22
16
|
MonitorCreateParams = Deeprails::Models::MonitorCreateParams
|
|
23
17
|
|
|
24
18
|
MonitorDetailResponse = Deeprails::Models::MonitorDetailResponse
|