deeprails 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39,10 +39,19 @@ module Deeprails
39
39
 
40
40
  # An array of all evaluations performed by this monitor. Each one corresponds to a
41
41
  # separate monitor event.
42
- sig { returns(T.nilable(T::Array[Deeprails::Evaluation])) }
42
+ sig do
43
+ returns(
44
+ T.nilable(T::Array[Deeprails::MonitorDetailResponse::Evaluation])
45
+ )
46
+ end
43
47
  attr_reader :evaluations
44
48
 
45
- sig { params(evaluations: T::Array[Deeprails::Evaluation::OrHash]).void }
49
+ sig do
50
+ params(
51
+ evaluations:
52
+ T::Array[Deeprails::MonitorDetailResponse::Evaluation::OrHash]
53
+ ).void
54
+ end
46
55
  attr_writer :evaluations
47
56
 
48
57
  # Contains five fields used for stats of this monitor: total evaluations,
@@ -78,7 +87,8 @@ module Deeprails
78
87
  name: String,
79
88
  created_at: Time,
80
89
  description: String,
81
- evaluations: T::Array[Deeprails::Evaluation::OrHash],
90
+ evaluations:
91
+ T::Array[Deeprails::MonitorDetailResponse::Evaluation::OrHash],
82
92
  stats: Deeprails::MonitorDetailResponse::Stats::OrHash,
83
93
  updated_at: Time,
84
94
  user_id: String
@@ -119,7 +129,7 @@ module Deeprails
119
129
  name: String,
120
130
  created_at: Time,
121
131
  description: String,
122
- evaluations: T::Array[Deeprails::Evaluation],
132
+ evaluations: T::Array[Deeprails::MonitorDetailResponse::Evaluation],
123
133
  stats: Deeprails::MonitorDetailResponse::Stats,
124
134
  updated_at: Time,
125
135
  user_id: String
@@ -162,6 +172,475 @@ module Deeprails
162
172
  end
163
173
  end
164
174
 
175
+ class Evaluation < Deeprails::Internal::Type::BaseModel
176
+ OrHash =
177
+ T.type_alias do
178
+ T.any(
179
+ Deeprails::MonitorDetailResponse::Evaluation,
180
+ Deeprails::Internal::AnyHash
181
+ )
182
+ end
183
+
184
+ # A unique evaluation ID.
185
+ sig { returns(String) }
186
+ attr_accessor :eval_id
187
+
188
+ # Status of the evaluation.
189
+ sig do
190
+ returns(
191
+ Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::TaggedSymbol
192
+ )
193
+ end
194
+ attr_accessor :evaluation_status
195
+
196
+ # A dictionary of inputs sent to the LLM to generate output. The dictionary must
197
+ # contain at least a `user_prompt` field or a `system_prompt` field. For
198
+ # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
199
+ sig do
200
+ returns(Deeprails::MonitorDetailResponse::Evaluation::ModelInput)
201
+ end
202
+ attr_reader :model_input
203
+
204
+ sig do
205
+ params(
206
+ model_input:
207
+ Deeprails::MonitorDetailResponse::Evaluation::ModelInput::OrHash
208
+ ).void
209
+ end
210
+ attr_writer :model_input
211
+
212
+ # Output generated by the LLM to be evaluated.
213
+ sig { returns(String) }
214
+ attr_accessor :model_output
215
+
216
+ # Run mode for the evaluation. The run mode allows the user to optimize for speed,
217
+ # accuracy, and cost by determining which models are used to evaluate the event.
218
+ sig do
219
+ returns(
220
+ Deeprails::MonitorDetailResponse::Evaluation::RunMode::TaggedSymbol
221
+ )
222
+ end
223
+ attr_accessor :run_mode
224
+
225
+ # The time the evaluation was created in UTC.
226
+ sig { returns(T.nilable(Time)) }
227
+ attr_reader :created_at
228
+
229
+ sig { params(created_at: Time).void }
230
+ attr_writer :created_at
231
+
232
+ # The time the evaluation completed in UTC.
233
+ sig { returns(T.nilable(Time)) }
234
+ attr_reader :end_timestamp
235
+
236
+ sig { params(end_timestamp: Time).void }
237
+ attr_writer :end_timestamp
238
+
239
+ # Description of the error causing the evaluation to fail, if any.
240
+ sig { returns(T.nilable(String)) }
241
+ attr_reader :error_message
242
+
243
+ sig { params(error_message: String).void }
244
+ attr_writer :error_message
245
+
246
+ # The time the error causing the evaluation to fail was recorded.
247
+ sig { returns(T.nilable(Time)) }
248
+ attr_reader :error_timestamp
249
+
250
+ sig { params(error_timestamp: Time).void }
251
+ attr_writer :error_timestamp
252
+
253
+ # Evaluation result consisting of average scores and rationales for each of the
254
+ # evaluated guardrail metrics.
255
+ sig { returns(T.nilable(T::Hash[Symbol, T.anything])) }
256
+ attr_reader :evaluation_result
257
+
258
+ sig { params(evaluation_result: T::Hash[Symbol, T.anything]).void }
259
+ attr_writer :evaluation_result
260
+
261
+ # Total cost of the evaluation.
262
+ sig { returns(T.nilable(Float)) }
263
+ attr_reader :evaluation_total_cost
264
+
265
+ sig { params(evaluation_total_cost: Float).void }
266
+ attr_writer :evaluation_total_cost
267
+
268
+ # An array of guardrail metrics that the model input and output pair will be
269
+ # evaluated on.
270
+ sig do
271
+ returns(
272
+ T.nilable(
273
+ T::Array[
274
+ Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
275
+ ]
276
+ )
277
+ )
278
+ end
279
+ attr_reader :guardrail_metrics
280
+
281
+ sig do
282
+ params(
283
+ guardrail_metrics:
284
+ T::Array[
285
+ Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::OrSymbol
286
+ ]
287
+ ).void
288
+ end
289
+ attr_writer :guardrail_metrics
290
+
291
+ # Model ID used to generate the output, like `gpt-4o` or `o3`.
292
+ sig { returns(T.nilable(String)) }
293
+ attr_reader :model_used
294
+
295
+ sig { params(model_used: String).void }
296
+ attr_writer :model_used
297
+
298
+ # The most recent time the evaluation was modified in UTC.
299
+ sig { returns(T.nilable(Time)) }
300
+ attr_reader :modified_at
301
+
302
+ sig { params(modified_at: Time).void }
303
+ attr_writer :modified_at
304
+
305
+ # An optional, user-defined tag for the evaluation.
306
+ sig { returns(T.nilable(String)) }
307
+ attr_reader :nametag
308
+
309
+ sig { params(nametag: String).void }
310
+ attr_writer :nametag
311
+
312
+ # Evaluation progress. Values range between 0 and 100; 100 corresponds to a
313
+ # completed `evaluation_status`.
314
+ sig { returns(T.nilable(Integer)) }
315
+ attr_reader :progress
316
+
317
+ sig { params(progress: Integer).void }
318
+ attr_writer :progress
319
+
320
+ # The time the evaluation started in UTC.
321
+ sig { returns(T.nilable(Time)) }
322
+ attr_reader :start_timestamp
323
+
324
+ sig { params(start_timestamp: Time).void }
325
+ attr_writer :start_timestamp
326
+
327
+ sig do
328
+ params(
329
+ eval_id: String,
330
+ evaluation_status:
331
+ Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::OrSymbol,
332
+ model_input:
333
+ Deeprails::MonitorDetailResponse::Evaluation::ModelInput::OrHash,
334
+ model_output: String,
335
+ run_mode:
336
+ Deeprails::MonitorDetailResponse::Evaluation::RunMode::OrSymbol,
337
+ created_at: Time,
338
+ end_timestamp: Time,
339
+ error_message: String,
340
+ error_timestamp: Time,
341
+ evaluation_result: T::Hash[Symbol, T.anything],
342
+ evaluation_total_cost: Float,
343
+ guardrail_metrics:
344
+ T::Array[
345
+ Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::OrSymbol
346
+ ],
347
+ model_used: String,
348
+ modified_at: Time,
349
+ nametag: String,
350
+ progress: Integer,
351
+ start_timestamp: Time
352
+ ).returns(T.attached_class)
353
+ end
354
+ def self.new(
355
+ # A unique evaluation ID.
356
+ eval_id:,
357
+ # Status of the evaluation.
358
+ evaluation_status:,
359
+ # A dictionary of inputs sent to the LLM to generate output. The dictionary must
360
+ # contain at least a `user_prompt` field or a `system_prompt` field. For
361
+ # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
362
+ model_input:,
363
+ # Output generated by the LLM to be evaluated.
364
+ model_output:,
365
+ # Run mode for the evaluation. The run mode allows the user to optimize for speed,
366
+ # accuracy, and cost by determining which models are used to evaluate the event.
367
+ run_mode:,
368
+ # The time the evaluation was created in UTC.
369
+ created_at: nil,
370
+ # The time the evaluation completed in UTC.
371
+ end_timestamp: nil,
372
+ # Description of the error causing the evaluation to fail, if any.
373
+ error_message: nil,
374
+ # The time the error causing the evaluation to fail was recorded.
375
+ error_timestamp: nil,
376
+ # Evaluation result consisting of average scores and rationales for each of the
377
+ # evaluated guardrail metrics.
378
+ evaluation_result: nil,
379
+ # Total cost of the evaluation.
380
+ evaluation_total_cost: nil,
381
+ # An array of guardrail metrics that the model input and output pair will be
382
+ # evaluated on.
383
+ guardrail_metrics: nil,
384
+ # Model ID used to generate the output, like `gpt-4o` or `o3`.
385
+ model_used: nil,
386
+ # The most recent time the evaluation was modified in UTC.
387
+ modified_at: nil,
388
+ # An optional, user-defined tag for the evaluation.
389
+ nametag: nil,
390
+ # Evaluation progress. Values range between 0 and 100; 100 corresponds to a
391
+ # completed `evaluation_status`.
392
+ progress: nil,
393
+ # The time the evaluation started in UTC.
394
+ start_timestamp: nil
395
+ )
396
+ end
397
+
398
+ sig do
399
+ override.returns(
400
+ {
401
+ eval_id: String,
402
+ evaluation_status:
403
+ Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::TaggedSymbol,
404
+ model_input:
405
+ Deeprails::MonitorDetailResponse::Evaluation::ModelInput,
406
+ model_output: String,
407
+ run_mode:
408
+ Deeprails::MonitorDetailResponse::Evaluation::RunMode::TaggedSymbol,
409
+ created_at: Time,
410
+ end_timestamp: Time,
411
+ error_message: String,
412
+ error_timestamp: Time,
413
+ evaluation_result: T::Hash[Symbol, T.anything],
414
+ evaluation_total_cost: Float,
415
+ guardrail_metrics:
416
+ T::Array[
417
+ Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
418
+ ],
419
+ model_used: String,
420
+ modified_at: Time,
421
+ nametag: String,
422
+ progress: Integer,
423
+ start_timestamp: Time
424
+ }
425
+ )
426
+ end
427
+ def to_hash
428
+ end
429
+
430
+ # Status of the evaluation.
431
+ module EvaluationStatus
432
+ extend Deeprails::Internal::Type::Enum
433
+
434
+ TaggedSymbol =
435
+ T.type_alias do
436
+ T.all(
437
+ Symbol,
438
+ Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus
439
+ )
440
+ end
441
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
442
+
443
+ IN_PROGRESS =
444
+ T.let(
445
+ :in_progress,
446
+ Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::TaggedSymbol
447
+ )
448
+ COMPLETED =
449
+ T.let(
450
+ :completed,
451
+ Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::TaggedSymbol
452
+ )
453
+ CANCELED =
454
+ T.let(
455
+ :canceled,
456
+ Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::TaggedSymbol
457
+ )
458
+ QUEUED =
459
+ T.let(
460
+ :queued,
461
+ Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::TaggedSymbol
462
+ )
463
+ FAILED =
464
+ T.let(
465
+ :failed,
466
+ Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::TaggedSymbol
467
+ )
468
+
469
+ sig do
470
+ override.returns(
471
+ T::Array[
472
+ Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus::TaggedSymbol
473
+ ]
474
+ )
475
+ end
476
+ def self.values
477
+ end
478
+ end
479
+
480
+ class ModelInput < Deeprails::Internal::Type::BaseModel
481
+ OrHash =
482
+ T.type_alias do
483
+ T.any(
484
+ Deeprails::MonitorDetailResponse::Evaluation::ModelInput,
485
+ Deeprails::Internal::AnyHash
486
+ )
487
+ end
488
+
489
+ # The ground truth for evaluating Ground Truth Adherence guardrail.
490
+ sig { returns(T.nilable(String)) }
491
+ attr_reader :ground_truth
492
+
493
+ sig { params(ground_truth: String).void }
494
+ attr_writer :ground_truth
495
+
496
+ # The system prompt used to generate the output.
497
+ sig { returns(T.nilable(String)) }
498
+ attr_reader :system_prompt
499
+
500
+ sig { params(system_prompt: String).void }
501
+ attr_writer :system_prompt
502
+
503
+ # The user prompt used to generate the output.
504
+ sig { returns(T.nilable(String)) }
505
+ attr_reader :user_prompt
506
+
507
+ sig { params(user_prompt: String).void }
508
+ attr_writer :user_prompt
509
+
510
+ # A dictionary of inputs sent to the LLM to generate output. The dictionary must
511
+ # contain at least a `user_prompt` field or a `system_prompt` field. For
512
+ # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
513
+ sig do
514
+ params(
515
+ ground_truth: String,
516
+ system_prompt: String,
517
+ user_prompt: String
518
+ ).returns(T.attached_class)
519
+ end
520
+ def self.new(
521
+ # The ground truth for evaluating Ground Truth Adherence guardrail.
522
+ ground_truth: nil,
523
+ # The system prompt used to generate the output.
524
+ system_prompt: nil,
525
+ # The user prompt used to generate the output.
526
+ user_prompt: nil
527
+ )
528
+ end
529
+
530
+ sig do
531
+ override.returns(
532
+ {
533
+ ground_truth: String,
534
+ system_prompt: String,
535
+ user_prompt: String
536
+ }
537
+ )
538
+ end
539
+ def to_hash
540
+ end
541
+ end
542
+
543
+ # Run mode for the evaluation. The run mode allows the user to optimize for speed,
544
+ # accuracy, and cost by determining which models are used to evaluate the event.
545
+ module RunMode
546
+ extend Deeprails::Internal::Type::Enum
547
+
548
+ TaggedSymbol =
549
+ T.type_alias do
550
+ T.all(
551
+ Symbol,
552
+ Deeprails::MonitorDetailResponse::Evaluation::RunMode
553
+ )
554
+ end
555
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
556
+
557
+ PRECISION_PLUS =
558
+ T.let(
559
+ :precision_plus,
560
+ Deeprails::MonitorDetailResponse::Evaluation::RunMode::TaggedSymbol
561
+ )
562
+ PRECISION =
563
+ T.let(
564
+ :precision,
565
+ Deeprails::MonitorDetailResponse::Evaluation::RunMode::TaggedSymbol
566
+ )
567
+ SMART =
568
+ T.let(
569
+ :smart,
570
+ Deeprails::MonitorDetailResponse::Evaluation::RunMode::TaggedSymbol
571
+ )
572
+ ECONOMY =
573
+ T.let(
574
+ :economy,
575
+ Deeprails::MonitorDetailResponse::Evaluation::RunMode::TaggedSymbol
576
+ )
577
+
578
+ sig do
579
+ override.returns(
580
+ T::Array[
581
+ Deeprails::MonitorDetailResponse::Evaluation::RunMode::TaggedSymbol
582
+ ]
583
+ )
584
+ end
585
+ def self.values
586
+ end
587
+ end
588
+
589
+ module GuardrailMetric
590
+ extend Deeprails::Internal::Type::Enum
591
+
592
+ TaggedSymbol =
593
+ T.type_alias do
594
+ T.all(
595
+ Symbol,
596
+ Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric
597
+ )
598
+ end
599
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
600
+
601
+ CORRECTNESS =
602
+ T.let(
603
+ :correctness,
604
+ Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
605
+ )
606
+ COMPLETENESS =
607
+ T.let(
608
+ :completeness,
609
+ Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
610
+ )
611
+ INSTRUCTION_ADHERENCE =
612
+ T.let(
613
+ :instruction_adherence,
614
+ Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
615
+ )
616
+ CONTEXT_ADHERENCE =
617
+ T.let(
618
+ :context_adherence,
619
+ Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
620
+ )
621
+ GROUND_TRUTH_ADHERENCE =
622
+ T.let(
623
+ :ground_truth_adherence,
624
+ Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
625
+ )
626
+ COMPREHENSIVE_SAFETY =
627
+ T.let(
628
+ :comprehensive_safety,
629
+ Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
630
+ )
631
+
632
+ sig do
633
+ override.returns(
634
+ T::Array[
635
+ Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric::TaggedSymbol
636
+ ]
637
+ )
638
+ end
639
+ def self.values
640
+ end
641
+ end
642
+ end
643
+
165
644
  class Stats < Deeprails::Internal::Type::BaseModel
166
645
  OrHash =
167
646
  T.type_alias do
@@ -13,12 +13,6 @@ module Deeprails
13
13
 
14
14
  DefendUpdateWorkflowParams = Deeprails::Models::DefendUpdateWorkflowParams
15
15
 
16
- EvaluateCreateParams = Deeprails::Models::EvaluateCreateParams
17
-
18
- EvaluateRetrieveParams = Deeprails::Models::EvaluateRetrieveParams
19
-
20
- Evaluation = Deeprails::Models::Evaluation
21
-
22
16
  MonitorCreateParams = Deeprails::Models::MonitorCreateParams
23
17
 
24
18
  MonitorDetailResponse = Deeprails::Models::MonitorDetailResponse
@@ -14,8 +14,6 @@ module Deeprails
14
14
 
15
15
  attr_reader monitor: Deeprails::Resources::Monitor
16
16
 
17
- attr_reader evaluate: Deeprails::Resources::Evaluate
18
-
19
17
  private def auth_headers: -> ::Hash[String, String]
20
18
 
21
19
  def initialize: (