deeprails 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,402 +0,0 @@
1
- # typed: strong
2
-
3
- module Deeprails
4
- module Models
5
- class Evaluation < Deeprails::Internal::Type::BaseModel
6
- OrHash =
7
- T.type_alias do
8
- T.any(Deeprails::Evaluation, Deeprails::Internal::AnyHash)
9
- end
10
-
11
- # A unique evaluation ID.
12
- sig { returns(String) }
13
- attr_accessor :eval_id
14
-
15
- # Status of the evaluation.
16
- sig { returns(Deeprails::Evaluation::EvaluationStatus::TaggedSymbol) }
17
- attr_accessor :evaluation_status
18
-
19
- # A dictionary of inputs sent to the LLM to generate output. The dictionary must
20
- # contain at least a `user_prompt` field or a `system_prompt` field. For
21
- # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
22
- sig { returns(Deeprails::Evaluation::ModelInput) }
23
- attr_reader :model_input
24
-
25
- sig do
26
- params(model_input: Deeprails::Evaluation::ModelInput::OrHash).void
27
- end
28
- attr_writer :model_input
29
-
30
- # Output generated by the LLM to be evaluated.
31
- sig { returns(String) }
32
- attr_accessor :model_output
33
-
34
- # Run mode for the evaluation. The run mode allows the user to optimize for speed,
35
- # accuracy, and cost by determining which models are used to evaluate the event.
36
- sig { returns(Deeprails::Evaluation::RunMode::TaggedSymbol) }
37
- attr_accessor :run_mode
38
-
39
- # The time the evaluation was created in UTC.
40
- sig { returns(T.nilable(Time)) }
41
- attr_reader :created_at
42
-
43
- sig { params(created_at: Time).void }
44
- attr_writer :created_at
45
-
46
- # The time the evaluation completed in UTC.
47
- sig { returns(T.nilable(Time)) }
48
- attr_reader :end_timestamp
49
-
50
- sig { params(end_timestamp: Time).void }
51
- attr_writer :end_timestamp
52
-
53
- # Description of the error causing the evaluation to fail, if any.
54
- sig { returns(T.nilable(String)) }
55
- attr_reader :error_message
56
-
57
- sig { params(error_message: String).void }
58
- attr_writer :error_message
59
-
60
- # The time the error causing the evaluation to fail was recorded.
61
- sig { returns(T.nilable(Time)) }
62
- attr_reader :error_timestamp
63
-
64
- sig { params(error_timestamp: Time).void }
65
- attr_writer :error_timestamp
66
-
67
- # Evaluation result consisting of average scores and rationales for each of the
68
- # evaluated guardrail metrics.
69
- sig { returns(T.nilable(T::Hash[Symbol, T.anything])) }
70
- attr_reader :evaluation_result
71
-
72
- sig { params(evaluation_result: T::Hash[Symbol, T.anything]).void }
73
- attr_writer :evaluation_result
74
-
75
- # Total cost of the evaluation.
76
- sig { returns(T.nilable(Float)) }
77
- attr_reader :evaluation_total_cost
78
-
79
- sig { params(evaluation_total_cost: Float).void }
80
- attr_writer :evaluation_total_cost
81
-
82
- # An array of guardrail metrics that the model input and output pair will be
83
- # evaluated on.
84
- sig do
85
- returns(
86
- T.nilable(
87
- T::Array[Deeprails::Evaluation::GuardrailMetric::TaggedSymbol]
88
- )
89
- )
90
- end
91
- attr_reader :guardrail_metrics
92
-
93
- sig do
94
- params(
95
- guardrail_metrics:
96
- T::Array[Deeprails::Evaluation::GuardrailMetric::OrSymbol]
97
- ).void
98
- end
99
- attr_writer :guardrail_metrics
100
-
101
- # Model ID used to generate the output, like `gpt-4o` or `o3`.
102
- sig { returns(T.nilable(String)) }
103
- attr_reader :model_used
104
-
105
- sig { params(model_used: String).void }
106
- attr_writer :model_used
107
-
108
- # The most recent time the evaluation was modified in UTC.
109
- sig { returns(T.nilable(Time)) }
110
- attr_reader :modified_at
111
-
112
- sig { params(modified_at: Time).void }
113
- attr_writer :modified_at
114
-
115
- # An optional, user-defined tag for the evaluation.
116
- sig { returns(T.nilable(String)) }
117
- attr_reader :nametag
118
-
119
- sig { params(nametag: String).void }
120
- attr_writer :nametag
121
-
122
- # Evaluation progress. Values range between 0 and 100; 100 corresponds to a
123
- # completed `evaluation_status`.
124
- sig { returns(T.nilable(Integer)) }
125
- attr_reader :progress
126
-
127
- sig { params(progress: Integer).void }
128
- attr_writer :progress
129
-
130
- # The time the evaluation started in UTC.
131
- sig { returns(T.nilable(Time)) }
132
- attr_reader :start_timestamp
133
-
134
- sig { params(start_timestamp: Time).void }
135
- attr_writer :start_timestamp
136
-
137
- sig do
138
- params(
139
- eval_id: String,
140
- evaluation_status: Deeprails::Evaluation::EvaluationStatus::OrSymbol,
141
- model_input: Deeprails::Evaluation::ModelInput::OrHash,
142
- model_output: String,
143
- run_mode: Deeprails::Evaluation::RunMode::OrSymbol,
144
- created_at: Time,
145
- end_timestamp: Time,
146
- error_message: String,
147
- error_timestamp: Time,
148
- evaluation_result: T::Hash[Symbol, T.anything],
149
- evaluation_total_cost: Float,
150
- guardrail_metrics:
151
- T::Array[Deeprails::Evaluation::GuardrailMetric::OrSymbol],
152
- model_used: String,
153
- modified_at: Time,
154
- nametag: String,
155
- progress: Integer,
156
- start_timestamp: Time
157
- ).returns(T.attached_class)
158
- end
159
- def self.new(
160
- # A unique evaluation ID.
161
- eval_id:,
162
- # Status of the evaluation.
163
- evaluation_status:,
164
- # A dictionary of inputs sent to the LLM to generate output. The dictionary must
165
- # contain at least a `user_prompt` field or a `system_prompt` field. For
166
- # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
167
- model_input:,
168
- # Output generated by the LLM to be evaluated.
169
- model_output:,
170
- # Run mode for the evaluation. The run mode allows the user to optimize for speed,
171
- # accuracy, and cost by determining which models are used to evaluate the event.
172
- run_mode:,
173
- # The time the evaluation was created in UTC.
174
- created_at: nil,
175
- # The time the evaluation completed in UTC.
176
- end_timestamp: nil,
177
- # Description of the error causing the evaluation to fail, if any.
178
- error_message: nil,
179
- # The time the error causing the evaluation to fail was recorded.
180
- error_timestamp: nil,
181
- # Evaluation result consisting of average scores and rationales for each of the
182
- # evaluated guardrail metrics.
183
- evaluation_result: nil,
184
- # Total cost of the evaluation.
185
- evaluation_total_cost: nil,
186
- # An array of guardrail metrics that the model input and output pair will be
187
- # evaluated on.
188
- guardrail_metrics: nil,
189
- # Model ID used to generate the output, like `gpt-4o` or `o3`.
190
- model_used: nil,
191
- # The most recent time the evaluation was modified in UTC.
192
- modified_at: nil,
193
- # An optional, user-defined tag for the evaluation.
194
- nametag: nil,
195
- # Evaluation progress. Values range between 0 and 100; 100 corresponds to a
196
- # completed `evaluation_status`.
197
- progress: nil,
198
- # The time the evaluation started in UTC.
199
- start_timestamp: nil
200
- )
201
- end
202
-
203
- sig do
204
- override.returns(
205
- {
206
- eval_id: String,
207
- evaluation_status:
208
- Deeprails::Evaluation::EvaluationStatus::TaggedSymbol,
209
- model_input: Deeprails::Evaluation::ModelInput,
210
- model_output: String,
211
- run_mode: Deeprails::Evaluation::RunMode::TaggedSymbol,
212
- created_at: Time,
213
- end_timestamp: Time,
214
- error_message: String,
215
- error_timestamp: Time,
216
- evaluation_result: T::Hash[Symbol, T.anything],
217
- evaluation_total_cost: Float,
218
- guardrail_metrics:
219
- T::Array[Deeprails::Evaluation::GuardrailMetric::TaggedSymbol],
220
- model_used: String,
221
- modified_at: Time,
222
- nametag: String,
223
- progress: Integer,
224
- start_timestamp: Time
225
- }
226
- )
227
- end
228
- def to_hash
229
- end
230
-
231
- # Status of the evaluation.
232
- module EvaluationStatus
233
- extend Deeprails::Internal::Type::Enum
234
-
235
- TaggedSymbol =
236
- T.type_alias do
237
- T.all(Symbol, Deeprails::Evaluation::EvaluationStatus)
238
- end
239
- OrSymbol = T.type_alias { T.any(Symbol, String) }
240
-
241
- IN_PROGRESS =
242
- T.let(
243
- :in_progress,
244
- Deeprails::Evaluation::EvaluationStatus::TaggedSymbol
245
- )
246
- COMPLETED =
247
- T.let(
248
- :completed,
249
- Deeprails::Evaluation::EvaluationStatus::TaggedSymbol
250
- )
251
- CANCELED =
252
- T.let(
253
- :canceled,
254
- Deeprails::Evaluation::EvaluationStatus::TaggedSymbol
255
- )
256
- QUEUED =
257
- T.let(:queued, Deeprails::Evaluation::EvaluationStatus::TaggedSymbol)
258
- FAILED =
259
- T.let(:failed, Deeprails::Evaluation::EvaluationStatus::TaggedSymbol)
260
-
261
- sig do
262
- override.returns(
263
- T::Array[Deeprails::Evaluation::EvaluationStatus::TaggedSymbol]
264
- )
265
- end
266
- def self.values
267
- end
268
- end
269
-
270
- class ModelInput < Deeprails::Internal::Type::BaseModel
271
- OrHash =
272
- T.type_alias do
273
- T.any(
274
- Deeprails::Evaluation::ModelInput,
275
- Deeprails::Internal::AnyHash
276
- )
277
- end
278
-
279
- # The ground truth for evaluating Ground Truth Adherence guardrail.
280
- sig { returns(T.nilable(String)) }
281
- attr_reader :ground_truth
282
-
283
- sig { params(ground_truth: String).void }
284
- attr_writer :ground_truth
285
-
286
- # The system prompt used to generate the output.
287
- sig { returns(T.nilable(String)) }
288
- attr_reader :system_prompt
289
-
290
- sig { params(system_prompt: String).void }
291
- attr_writer :system_prompt
292
-
293
- # The user prompt used to generate the output.
294
- sig { returns(T.nilable(String)) }
295
- attr_reader :user_prompt
296
-
297
- sig { params(user_prompt: String).void }
298
- attr_writer :user_prompt
299
-
300
- # A dictionary of inputs sent to the LLM to generate output. The dictionary must
301
- # contain at least a `user_prompt` field or a `system_prompt` field. For
302
- # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
303
- sig do
304
- params(
305
- ground_truth: String,
306
- system_prompt: String,
307
- user_prompt: String
308
- ).returns(T.attached_class)
309
- end
310
- def self.new(
311
- # The ground truth for evaluating Ground Truth Adherence guardrail.
312
- ground_truth: nil,
313
- # The system prompt used to generate the output.
314
- system_prompt: nil,
315
- # The user prompt used to generate the output.
316
- user_prompt: nil
317
- )
318
- end
319
-
320
- sig do
321
- override.returns(
322
- { ground_truth: String, system_prompt: String, user_prompt: String }
323
- )
324
- end
325
- def to_hash
326
- end
327
- end
328
-
329
- # Run mode for the evaluation. The run mode allows the user to optimize for speed,
330
- # accuracy, and cost by determining which models are used to evaluate the event.
331
- module RunMode
332
- extend Deeprails::Internal::Type::Enum
333
-
334
- TaggedSymbol =
335
- T.type_alias { T.all(Symbol, Deeprails::Evaluation::RunMode) }
336
- OrSymbol = T.type_alias { T.any(Symbol, String) }
337
-
338
- PRECISION_PLUS =
339
- T.let(:precision_plus, Deeprails::Evaluation::RunMode::TaggedSymbol)
340
- PRECISION =
341
- T.let(:precision, Deeprails::Evaluation::RunMode::TaggedSymbol)
342
- SMART = T.let(:smart, Deeprails::Evaluation::RunMode::TaggedSymbol)
343
- ECONOMY = T.let(:economy, Deeprails::Evaluation::RunMode::TaggedSymbol)
344
-
345
- sig do
346
- override.returns(
347
- T::Array[Deeprails::Evaluation::RunMode::TaggedSymbol]
348
- )
349
- end
350
- def self.values
351
- end
352
- end
353
-
354
- module GuardrailMetric
355
- extend Deeprails::Internal::Type::Enum
356
-
357
- TaggedSymbol =
358
- T.type_alias { T.all(Symbol, Deeprails::Evaluation::GuardrailMetric) }
359
- OrSymbol = T.type_alias { T.any(Symbol, String) }
360
-
361
- CORRECTNESS =
362
- T.let(
363
- :correctness,
364
- Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
365
- )
366
- COMPLETENESS =
367
- T.let(
368
- :completeness,
369
- Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
370
- )
371
- INSTRUCTION_ADHERENCE =
372
- T.let(
373
- :instruction_adherence,
374
- Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
375
- )
376
- CONTEXT_ADHERENCE =
377
- T.let(
378
- :context_adherence,
379
- Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
380
- )
381
- GROUND_TRUTH_ADHERENCE =
382
- T.let(
383
- :ground_truth_adherence,
384
- Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
385
- )
386
- COMPREHENSIVE_SAFETY =
387
- T.let(
388
- :comprehensive_safety,
389
- Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
390
- )
391
-
392
- sig do
393
- override.returns(
394
- T::Array[Deeprails::Evaluation::GuardrailMetric::TaggedSymbol]
395
- )
396
- end
397
- def self.values
398
- end
399
- end
400
- end
401
- end
402
- end
@@ -1,66 +0,0 @@
1
- # typed: strong
2
-
3
- module Deeprails
4
- module Resources
5
- class Evaluate
6
- # Use this endpoint to evaluate a model's input and output pair against selected
7
- # guardrail metrics
8
- sig do
9
- params(
10
- model_input: Deeprails::EvaluateCreateParams::ModelInput::OrHash,
11
- model_output: String,
12
- run_mode: Deeprails::EvaluateCreateParams::RunMode::OrSymbol,
13
- guardrail_metrics:
14
- T::Array[
15
- Deeprails::EvaluateCreateParams::GuardrailMetric::OrSymbol
16
- ],
17
- model_used: String,
18
- nametag: String,
19
- request_options: Deeprails::RequestOptions::OrHash
20
- ).returns(Deeprails::Evaluation)
21
- end
22
- def create(
23
- # A dictionary of inputs sent to the LLM to generate output. The dictionary must
24
- # contain at least a `user_prompt` field or a `system_prompt` field. For
25
- # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
26
- model_input:,
27
- # Output generated by the LLM to be evaluated.
28
- model_output:,
29
- # Run mode for the evaluation. The run mode allows the user to optimize for speed,
30
- # accuracy, and cost by determining which models are used to evaluate the event.
31
- # Available run modes include `precision_plus`, `precision`, `smart`, and
32
- # `economy`. Defaults to `smart`.
33
- run_mode:,
34
- # An array of guardrail metrics that the model input and output pair will be
35
- # evaluated on. For non-enterprise users, these will be limited to the allowed
36
- # guardrail metrics.
37
- guardrail_metrics: nil,
38
- # Model ID used to generate the output, like `gpt-4o` or `o3`.
39
- model_used: nil,
40
- # An optional, user-defined tag for the evaluation.
41
- nametag: nil,
42
- request_options: {}
43
- )
44
- end
45
-
46
- # Use this endpoint to retrieve the evaluation record for a given evaluation ID
47
- sig do
48
- params(
49
- eval_id: String,
50
- request_options: Deeprails::RequestOptions::OrHash
51
- ).returns(Deeprails::Evaluation)
52
- end
53
- def retrieve(
54
- # The ID of the evaluation to retrieve.
55
- eval_id,
56
- request_options: {}
57
- )
58
- end
59
-
60
- # @api private
61
- sig { params(client: Deeprails::Client).returns(T.attached_class) }
62
- def self.new(client:)
63
- end
64
- end
65
- end
66
- end
@@ -1,122 +0,0 @@
1
- module Deeprails
2
- module Models
3
- type evaluate_create_params =
4
- {
5
- model_input: Deeprails::EvaluateCreateParams::ModelInput,
6
- model_output: String,
7
- run_mode: Deeprails::Models::EvaluateCreateParams::run_mode,
8
- guardrail_metrics: ::Array[Deeprails::Models::EvaluateCreateParams::guardrail_metric],
9
- model_used: String,
10
- nametag: String
11
- }
12
- & Deeprails::Internal::Type::request_parameters
13
-
14
- class EvaluateCreateParams < Deeprails::Internal::Type::BaseModel
15
- extend Deeprails::Internal::Type::RequestParameters::Converter
16
- include Deeprails::Internal::Type::RequestParameters
17
-
18
- attr_accessor model_input: Deeprails::EvaluateCreateParams::ModelInput
19
-
20
- attr_accessor model_output: String
21
-
22
- attr_accessor run_mode: Deeprails::Models::EvaluateCreateParams::run_mode
23
-
24
- attr_reader guardrail_metrics: ::Array[Deeprails::Models::EvaluateCreateParams::guardrail_metric]?
25
-
26
- def guardrail_metrics=: (
27
- ::Array[Deeprails::Models::EvaluateCreateParams::guardrail_metric]
28
- ) -> ::Array[Deeprails::Models::EvaluateCreateParams::guardrail_metric]
29
-
30
- attr_reader model_used: String?
31
-
32
- def model_used=: (String) -> String
33
-
34
- attr_reader nametag: String?
35
-
36
- def nametag=: (String) -> String
37
-
38
- def initialize: (
39
- model_input: Deeprails::EvaluateCreateParams::ModelInput,
40
- model_output: String,
41
- run_mode: Deeprails::Models::EvaluateCreateParams::run_mode,
42
- ?guardrail_metrics: ::Array[Deeprails::Models::EvaluateCreateParams::guardrail_metric],
43
- ?model_used: String,
44
- ?nametag: String,
45
- ?request_options: Deeprails::request_opts
46
- ) -> void
47
-
48
- def to_hash: -> {
49
- model_input: Deeprails::EvaluateCreateParams::ModelInput,
50
- model_output: String,
51
- run_mode: Deeprails::Models::EvaluateCreateParams::run_mode,
52
- guardrail_metrics: ::Array[Deeprails::Models::EvaluateCreateParams::guardrail_metric],
53
- model_used: String,
54
- nametag: String,
55
- request_options: Deeprails::RequestOptions
56
- }
57
-
58
- type model_input =
59
- { ground_truth: String, system_prompt: String, user_prompt: String }
60
-
61
- class ModelInput < Deeprails::Internal::Type::BaseModel
62
- attr_reader ground_truth: String?
63
-
64
- def ground_truth=: (String) -> String
65
-
66
- attr_reader system_prompt: String?
67
-
68
- def system_prompt=: (String) -> String
69
-
70
- attr_reader user_prompt: String?
71
-
72
- def user_prompt=: (String) -> String
73
-
74
- def initialize: (
75
- ?ground_truth: String,
76
- ?system_prompt: String,
77
- ?user_prompt: String
78
- ) -> void
79
-
80
- def to_hash: -> {
81
- ground_truth: String,
82
- system_prompt: String,
83
- user_prompt: String
84
- }
85
- end
86
-
87
- type run_mode = :precision_plus | :precision | :smart | :economy
88
-
89
- module RunMode
90
- extend Deeprails::Internal::Type::Enum
91
-
92
- PRECISION_PLUS: :precision_plus
93
- PRECISION: :precision
94
- SMART: :smart
95
- ECONOMY: :economy
96
-
97
- def self?.values: -> ::Array[Deeprails::Models::EvaluateCreateParams::run_mode]
98
- end
99
-
100
- type guardrail_metric =
101
- :correctness
102
- | :completeness
103
- | :instruction_adherence
104
- | :context_adherence
105
- | :ground_truth_adherence
106
- | :comprehensive_safety
107
-
108
- module GuardrailMetric
109
- extend Deeprails::Internal::Type::Enum
110
-
111
- CORRECTNESS: :correctness
112
- COMPLETENESS: :completeness
113
- INSTRUCTION_ADHERENCE: :instruction_adherence
114
- CONTEXT_ADHERENCE: :context_adherence
115
- GROUND_TRUTH_ADHERENCE: :ground_truth_adherence
116
- COMPREHENSIVE_SAFETY: :comprehensive_safety
117
-
118
- def self?.values: -> ::Array[Deeprails::Models::EvaluateCreateParams::guardrail_metric]
119
- end
120
- end
121
- end
122
- end
@@ -1,15 +0,0 @@
1
- module Deeprails
2
- module Models
3
- type evaluate_retrieve_params =
4
- { } & Deeprails::Internal::Type::request_parameters
5
-
6
- class EvaluateRetrieveParams < Deeprails::Internal::Type::BaseModel
7
- extend Deeprails::Internal::Type::RequestParameters::Converter
8
- include Deeprails::Internal::Type::RequestParameters
9
-
10
- def initialize: (?request_options: Deeprails::request_opts) -> void
11
-
12
- def to_hash: -> { request_options: Deeprails::RequestOptions }
13
- end
14
- end
15
- end