deeprails 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,233 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Deeprails
4
- module Models
5
- # @see Deeprails::Resources::Evaluate#create
6
- class Evaluation < Deeprails::Internal::Type::BaseModel
7
- # @!attribute eval_id
8
- # A unique evaluation ID.
9
- #
10
- # @return [String]
11
- required :eval_id, String
12
-
13
- # @!attribute evaluation_status
14
- # Status of the evaluation.
15
- #
16
- # @return [Symbol, Deeprails::Models::Evaluation::EvaluationStatus]
17
- required :evaluation_status, enum: -> { Deeprails::Evaluation::EvaluationStatus }
18
-
19
- # @!attribute model_input
20
- # A dictionary of inputs sent to the LLM to generate output. The dictionary must
21
- # contain at least a `user_prompt` field or a `system_prompt` field. For
22
- # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
23
- #
24
- # @return [Deeprails::Models::Evaluation::ModelInput]
25
- required :model_input, -> { Deeprails::Evaluation::ModelInput }
26
-
27
- # @!attribute model_output
28
- # Output generated by the LLM to be evaluated.
29
- #
30
- # @return [String]
31
- required :model_output, String
32
-
33
- # @!attribute run_mode
34
- # Run mode for the evaluation. The run mode allows the user to optimize for speed,
35
- # accuracy, and cost by determining which models are used to evaluate the event.
36
- #
37
- # @return [Symbol, Deeprails::Models::Evaluation::RunMode]
38
- required :run_mode, enum: -> { Deeprails::Evaluation::RunMode }
39
-
40
- # @!attribute created_at
41
- # The time the evaluation was created in UTC.
42
- #
43
- # @return [Time, nil]
44
- optional :created_at, Time
45
-
46
- # @!attribute end_timestamp
47
- # The time the evaluation completed in UTC.
48
- #
49
- # @return [Time, nil]
50
- optional :end_timestamp, Time
51
-
52
- # @!attribute error_message
53
- # Description of the error causing the evaluation to fail, if any.
54
- #
55
- # @return [String, nil]
56
- optional :error_message, String
57
-
58
- # @!attribute error_timestamp
59
- # The time the error causing the evaluation to fail was recorded.
60
- #
61
- # @return [Time, nil]
62
- optional :error_timestamp, Time
63
-
64
- # @!attribute evaluation_result
65
- # Evaluation result consisting of average scores and rationales for each of the
66
- # evaluated guardrail metrics.
67
- #
68
- # @return [Hash{Symbol=>Object}, nil]
69
- optional :evaluation_result, Deeprails::Internal::Type::HashOf[Deeprails::Internal::Type::Unknown]
70
-
71
- # @!attribute evaluation_total_cost
72
- # Total cost of the evaluation.
73
- #
74
- # @return [Float, nil]
75
- optional :evaluation_total_cost, Float
76
-
77
- # @!attribute guardrail_metrics
78
- # An array of guardrail metrics that the model input and output pair will be
79
- # evaluated on.
80
- #
81
- # @return [Array<Symbol, Deeprails::Models::Evaluation::GuardrailMetric>, nil]
82
- optional :guardrail_metrics,
83
- -> { Deeprails::Internal::Type::ArrayOf[enum: Deeprails::Evaluation::GuardrailMetric] }
84
-
85
- # @!attribute model_used
86
- # Model ID used to generate the output, like `gpt-4o` or `o3`.
87
- #
88
- # @return [String, nil]
89
- optional :model_used, String
90
-
91
- # @!attribute modified_at
92
- # The most recent time the evaluation was modified in UTC.
93
- #
94
- # @return [Time, nil]
95
- optional :modified_at, Time
96
-
97
- # @!attribute nametag
98
- # An optional, user-defined tag for the evaluation.
99
- #
100
- # @return [String, nil]
101
- optional :nametag, String
102
-
103
- # @!attribute progress
104
- # Evaluation progress. Values range between 0 and 100; 100 corresponds to a
105
- # completed `evaluation_status`.
106
- #
107
- # @return [Integer, nil]
108
- optional :progress, Integer
109
-
110
- # @!attribute start_timestamp
111
- # The time the evaluation started in UTC.
112
- #
113
- # @return [Time, nil]
114
- optional :start_timestamp, Time
115
-
116
- # @!method initialize(eval_id:, evaluation_status:, model_input:, model_output:, run_mode:, created_at: nil, end_timestamp: nil, error_message: nil, error_timestamp: nil, evaluation_result: nil, evaluation_total_cost: nil, guardrail_metrics: nil, model_used: nil, modified_at: nil, nametag: nil, progress: nil, start_timestamp: nil)
117
- # Some parameter documentations has been truncated, see
118
- # {Deeprails::Models::Evaluation} for more details.
119
- #
120
- # @param eval_id [String] A unique evaluation ID.
121
- #
122
- # @param evaluation_status [Symbol, Deeprails::Models::Evaluation::EvaluationStatus] Status of the evaluation.
123
- #
124
- # @param model_input [Deeprails::Models::Evaluation::ModelInput] A dictionary of inputs sent to the LLM to generate output. The dictionary must c
125
- #
126
- # @param model_output [String] Output generated by the LLM to be evaluated.
127
- #
128
- # @param run_mode [Symbol, Deeprails::Models::Evaluation::RunMode] Run mode for the evaluation. The run mode allows the user to optimize for speed
129
- #
130
- # @param created_at [Time] The time the evaluation was created in UTC.
131
- #
132
- # @param end_timestamp [Time] The time the evaluation completed in UTC.
133
- #
134
- # @param error_message [String] Description of the error causing the evaluation to fail, if any.
135
- #
136
- # @param error_timestamp [Time] The time the error causing the evaluation to fail was recorded.
137
- #
138
- # @param evaluation_result [Hash{Symbol=>Object}] Evaluation result consisting of average scores and rationales for each of the ev
139
- #
140
- # @param evaluation_total_cost [Float] Total cost of the evaluation.
141
- #
142
- # @param guardrail_metrics [Array<Symbol, Deeprails::Models::Evaluation::GuardrailMetric>] An array of guardrail metrics that the model input and output pair will be evalu
143
- #
144
- # @param model_used [String] Model ID used to generate the output, like `gpt-4o` or `o3`.
145
- #
146
- # @param modified_at [Time] The most recent time the evaluation was modified in UTC.
147
- #
148
- # @param nametag [String] An optional, user-defined tag for the evaluation.
149
- #
150
- # @param progress [Integer] Evaluation progress. Values range between 0 and 100; 100 corresponds to a compl
151
- #
152
- # @param start_timestamp [Time] The time the evaluation started in UTC.
153
-
154
- # Status of the evaluation.
155
- #
156
- # @see Deeprails::Models::Evaluation#evaluation_status
157
- module EvaluationStatus
158
- extend Deeprails::Internal::Type::Enum
159
-
160
- IN_PROGRESS = :in_progress
161
- COMPLETED = :completed
162
- CANCELED = :canceled
163
- QUEUED = :queued
164
- FAILED = :failed
165
-
166
- # @!method self.values
167
- # @return [Array<Symbol>]
168
- end
169
-
170
- # @see Deeprails::Models::Evaluation#model_input
171
- class ModelInput < Deeprails::Internal::Type::BaseModel
172
- # @!attribute ground_truth
173
- # The ground truth for evaluating Ground Truth Adherence guardrail.
174
- #
175
- # @return [String, nil]
176
- optional :ground_truth, String
177
-
178
- # @!attribute system_prompt
179
- # The system prompt used to generate the output.
180
- #
181
- # @return [String, nil]
182
- optional :system_prompt, String
183
-
184
- # @!attribute user_prompt
185
- # The user prompt used to generate the output.
186
- #
187
- # @return [String, nil]
188
- optional :user_prompt, String
189
-
190
- # @!method initialize(ground_truth: nil, system_prompt: nil, user_prompt: nil)
191
- # A dictionary of inputs sent to the LLM to generate output. The dictionary must
192
- # contain at least a `user_prompt` field or a `system_prompt` field. For
193
- # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
194
- #
195
- # @param ground_truth [String] The ground truth for evaluating Ground Truth Adherence guardrail.
196
- #
197
- # @param system_prompt [String] The system prompt used to generate the output.
198
- #
199
- # @param user_prompt [String] The user prompt used to generate the output.
200
- end
201
-
202
- # Run mode for the evaluation. The run mode allows the user to optimize for speed,
203
- # accuracy, and cost by determining which models are used to evaluate the event.
204
- #
205
- # @see Deeprails::Models::Evaluation#run_mode
206
- module RunMode
207
- extend Deeprails::Internal::Type::Enum
208
-
209
- PRECISION_PLUS = :precision_plus
210
- PRECISION = :precision
211
- SMART = :smart
212
- ECONOMY = :economy
213
-
214
- # @!method self.values
215
- # @return [Array<Symbol>]
216
- end
217
-
218
- module GuardrailMetric
219
- extend Deeprails::Internal::Type::Enum
220
-
221
- CORRECTNESS = :correctness
222
- COMPLETENESS = :completeness
223
- INSTRUCTION_ADHERENCE = :instruction_adherence
224
- CONTEXT_ADHERENCE = :context_adherence
225
- GROUND_TRUTH_ADHERENCE = :ground_truth_adherence
226
- COMPREHENSIVE_SAFETY = :comprehensive_safety
227
-
228
- # @!method self.values
229
- # @return [Array<Symbol>]
230
- end
231
- end
232
- end
233
- end
@@ -1,70 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Deeprails
4
- module Resources
5
- class Evaluate
6
- # Some parameter documentations has been truncated, see
7
- # {Deeprails::Models::EvaluateCreateParams} for more details.
8
- #
9
- # Use this endpoint to evaluate a model's input and output pair against selected
10
- # guardrail metrics
11
- #
12
- # @overload create(model_input:, model_output:, run_mode:, guardrail_metrics: nil, model_used: nil, nametag: nil, request_options: {})
13
- #
14
- # @param model_input [Deeprails::Models::EvaluateCreateParams::ModelInput] A dictionary of inputs sent to the LLM to generate output. The dictionary must c
15
- #
16
- # @param model_output [String] Output generated by the LLM to be evaluated.
17
- #
18
- # @param run_mode [Symbol, Deeprails::Models::EvaluateCreateParams::RunMode] Run mode for the evaluation. The run mode allows the user to optimize for speed
19
- #
20
- # @param guardrail_metrics [Array<Symbol, Deeprails::Models::EvaluateCreateParams::GuardrailMetric>] An array of guardrail metrics that the model input and output pair will be evalu
21
- #
22
- # @param model_used [String] Model ID used to generate the output, like `gpt-4o` or `o3`.
23
- #
24
- # @param nametag [String] An optional, user-defined tag for the evaluation.
25
- #
26
- # @param request_options [Deeprails::RequestOptions, Hash{Symbol=>Object}, nil]
27
- #
28
- # @return [Deeprails::Models::Evaluation]
29
- #
30
- # @see Deeprails::Models::EvaluateCreateParams
31
- def create(params)
32
- parsed, options = Deeprails::EvaluateCreateParams.dump_request(params)
33
- @client.request(
34
- method: :post,
35
- path: "evaluate",
36
- body: parsed,
37
- model: Deeprails::Evaluation,
38
- options: options
39
- )
40
- end
41
-
42
- # Use this endpoint to retrieve the evaluation record for a given evaluation ID
43
- #
44
- # @overload retrieve(eval_id, request_options: {})
45
- #
46
- # @param eval_id [String] The ID of the evaluation to retrieve.
47
- #
48
- # @param request_options [Deeprails::RequestOptions, Hash{Symbol=>Object}, nil]
49
- #
50
- # @return [Deeprails::Models::Evaluation]
51
- #
52
- # @see Deeprails::Models::EvaluateRetrieveParams
53
- def retrieve(eval_id, params = {})
54
- @client.request(
55
- method: :get,
56
- path: ["evaluate/%1$s", eval_id],
57
- model: Deeprails::Evaluation,
58
- options: params[:request_options]
59
- )
60
- end
61
-
62
- # @api private
63
- #
64
- # @param client [Deeprails::Client]
65
- def initialize(client:)
66
- @client = client
67
- end
68
- end
69
- end
70
- end
@@ -1,280 +0,0 @@
1
- # typed: strong
2
-
3
- module Deeprails
4
- module Models
5
- class EvaluateCreateParams < Deeprails::Internal::Type::BaseModel
6
- extend Deeprails::Internal::Type::RequestParameters::Converter
7
- include Deeprails::Internal::Type::RequestParameters
8
-
9
- OrHash =
10
- T.type_alias do
11
- T.any(Deeprails::EvaluateCreateParams, Deeprails::Internal::AnyHash)
12
- end
13
-
14
- # A dictionary of inputs sent to the LLM to generate output. The dictionary must
15
- # contain at least a `user_prompt` field or a `system_prompt` field. For
16
- # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
17
- sig { returns(Deeprails::EvaluateCreateParams::ModelInput) }
18
- attr_reader :model_input
19
-
20
- sig do
21
- params(
22
- model_input: Deeprails::EvaluateCreateParams::ModelInput::OrHash
23
- ).void
24
- end
25
- attr_writer :model_input
26
-
27
- # Output generated by the LLM to be evaluated.
28
- sig { returns(String) }
29
- attr_accessor :model_output
30
-
31
- # Run mode for the evaluation. The run mode allows the user to optimize for speed,
32
- # accuracy, and cost by determining which models are used to evaluate the event.
33
- # Available run modes include `precision_plus`, `precision`, `smart`, and
34
- # `economy`. Defaults to `smart`.
35
- sig { returns(Deeprails::EvaluateCreateParams::RunMode::OrSymbol) }
36
- attr_accessor :run_mode
37
-
38
- # An array of guardrail metrics that the model input and output pair will be
39
- # evaluated on. For non-enterprise users, these will be limited to the allowed
40
- # guardrail metrics.
41
- sig do
42
- returns(
43
- T.nilable(
44
- T::Array[Deeprails::EvaluateCreateParams::GuardrailMetric::OrSymbol]
45
- )
46
- )
47
- end
48
- attr_reader :guardrail_metrics
49
-
50
- sig do
51
- params(
52
- guardrail_metrics:
53
- T::Array[Deeprails::EvaluateCreateParams::GuardrailMetric::OrSymbol]
54
- ).void
55
- end
56
- attr_writer :guardrail_metrics
57
-
58
- # Model ID used to generate the output, like `gpt-4o` or `o3`.
59
- sig { returns(T.nilable(String)) }
60
- attr_reader :model_used
61
-
62
- sig { params(model_used: String).void }
63
- attr_writer :model_used
64
-
65
- # An optional, user-defined tag for the evaluation.
66
- sig { returns(T.nilable(String)) }
67
- attr_reader :nametag
68
-
69
- sig { params(nametag: String).void }
70
- attr_writer :nametag
71
-
72
- sig do
73
- params(
74
- model_input: Deeprails::EvaluateCreateParams::ModelInput::OrHash,
75
- model_output: String,
76
- run_mode: Deeprails::EvaluateCreateParams::RunMode::OrSymbol,
77
- guardrail_metrics:
78
- T::Array[
79
- Deeprails::EvaluateCreateParams::GuardrailMetric::OrSymbol
80
- ],
81
- model_used: String,
82
- nametag: String,
83
- request_options: Deeprails::RequestOptions::OrHash
84
- ).returns(T.attached_class)
85
- end
86
- def self.new(
87
- # A dictionary of inputs sent to the LLM to generate output. The dictionary must
88
- # contain at least a `user_prompt` field or a `system_prompt` field. For
89
- # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
90
- model_input:,
91
- # Output generated by the LLM to be evaluated.
92
- model_output:,
93
- # Run mode for the evaluation. The run mode allows the user to optimize for speed,
94
- # accuracy, and cost by determining which models are used to evaluate the event.
95
- # Available run modes include `precision_plus`, `precision`, `smart`, and
96
- # `economy`. Defaults to `smart`.
97
- run_mode:,
98
- # An array of guardrail metrics that the model input and output pair will be
99
- # evaluated on. For non-enterprise users, these will be limited to the allowed
100
- # guardrail metrics.
101
- guardrail_metrics: nil,
102
- # Model ID used to generate the output, like `gpt-4o` or `o3`.
103
- model_used: nil,
104
- # An optional, user-defined tag for the evaluation.
105
- nametag: nil,
106
- request_options: {}
107
- )
108
- end
109
-
110
- sig do
111
- override.returns(
112
- {
113
- model_input: Deeprails::EvaluateCreateParams::ModelInput,
114
- model_output: String,
115
- run_mode: Deeprails::EvaluateCreateParams::RunMode::OrSymbol,
116
- guardrail_metrics:
117
- T::Array[
118
- Deeprails::EvaluateCreateParams::GuardrailMetric::OrSymbol
119
- ],
120
- model_used: String,
121
- nametag: String,
122
- request_options: Deeprails::RequestOptions
123
- }
124
- )
125
- end
126
- def to_hash
127
- end
128
-
129
- class ModelInput < Deeprails::Internal::Type::BaseModel
130
- OrHash =
131
- T.type_alias do
132
- T.any(
133
- Deeprails::EvaluateCreateParams::ModelInput,
134
- Deeprails::Internal::AnyHash
135
- )
136
- end
137
-
138
- # The ground truth for evaluating Ground Truth Adherence guardrail.
139
- sig { returns(T.nilable(String)) }
140
- attr_reader :ground_truth
141
-
142
- sig { params(ground_truth: String).void }
143
- attr_writer :ground_truth
144
-
145
- # The system prompt used to generate the output.
146
- sig { returns(T.nilable(String)) }
147
- attr_reader :system_prompt
148
-
149
- sig { params(system_prompt: String).void }
150
- attr_writer :system_prompt
151
-
152
- # The user prompt used to generate the output.
153
- sig { returns(T.nilable(String)) }
154
- attr_reader :user_prompt
155
-
156
- sig { params(user_prompt: String).void }
157
- attr_writer :user_prompt
158
-
159
- # A dictionary of inputs sent to the LLM to generate output. The dictionary must
160
- # contain at least a `user_prompt` field or a `system_prompt` field. For
161
- # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
162
- sig do
163
- params(
164
- ground_truth: String,
165
- system_prompt: String,
166
- user_prompt: String
167
- ).returns(T.attached_class)
168
- end
169
- def self.new(
170
- # The ground truth for evaluating Ground Truth Adherence guardrail.
171
- ground_truth: nil,
172
- # The system prompt used to generate the output.
173
- system_prompt: nil,
174
- # The user prompt used to generate the output.
175
- user_prompt: nil
176
- )
177
- end
178
-
179
- sig do
180
- override.returns(
181
- { ground_truth: String, system_prompt: String, user_prompt: String }
182
- )
183
- end
184
- def to_hash
185
- end
186
- end
187
-
188
- # Run mode for the evaluation. The run mode allows the user to optimize for speed,
189
- # accuracy, and cost by determining which models are used to evaluate the event.
190
- # Available run modes include `precision_plus`, `precision`, `smart`, and
191
- # `economy`. Defaults to `smart`.
192
- module RunMode
193
- extend Deeprails::Internal::Type::Enum
194
-
195
- TaggedSymbol =
196
- T.type_alias do
197
- T.all(Symbol, Deeprails::EvaluateCreateParams::RunMode)
198
- end
199
- OrSymbol = T.type_alias { T.any(Symbol, String) }
200
-
201
- PRECISION_PLUS =
202
- T.let(
203
- :precision_plus,
204
- Deeprails::EvaluateCreateParams::RunMode::TaggedSymbol
205
- )
206
- PRECISION =
207
- T.let(
208
- :precision,
209
- Deeprails::EvaluateCreateParams::RunMode::TaggedSymbol
210
- )
211
- SMART =
212
- T.let(:smart, Deeprails::EvaluateCreateParams::RunMode::TaggedSymbol)
213
- ECONOMY =
214
- T.let(
215
- :economy,
216
- Deeprails::EvaluateCreateParams::RunMode::TaggedSymbol
217
- )
218
-
219
- sig do
220
- override.returns(
221
- T::Array[Deeprails::EvaluateCreateParams::RunMode::TaggedSymbol]
222
- )
223
- end
224
- def self.values
225
- end
226
- end
227
-
228
- module GuardrailMetric
229
- extend Deeprails::Internal::Type::Enum
230
-
231
- TaggedSymbol =
232
- T.type_alias do
233
- T.all(Symbol, Deeprails::EvaluateCreateParams::GuardrailMetric)
234
- end
235
- OrSymbol = T.type_alias { T.any(Symbol, String) }
236
-
237
- CORRECTNESS =
238
- T.let(
239
- :correctness,
240
- Deeprails::EvaluateCreateParams::GuardrailMetric::TaggedSymbol
241
- )
242
- COMPLETENESS =
243
- T.let(
244
- :completeness,
245
- Deeprails::EvaluateCreateParams::GuardrailMetric::TaggedSymbol
246
- )
247
- INSTRUCTION_ADHERENCE =
248
- T.let(
249
- :instruction_adherence,
250
- Deeprails::EvaluateCreateParams::GuardrailMetric::TaggedSymbol
251
- )
252
- CONTEXT_ADHERENCE =
253
- T.let(
254
- :context_adherence,
255
- Deeprails::EvaluateCreateParams::GuardrailMetric::TaggedSymbol
256
- )
257
- GROUND_TRUTH_ADHERENCE =
258
- T.let(
259
- :ground_truth_adherence,
260
- Deeprails::EvaluateCreateParams::GuardrailMetric::TaggedSymbol
261
- )
262
- COMPREHENSIVE_SAFETY =
263
- T.let(
264
- :comprehensive_safety,
265
- Deeprails::EvaluateCreateParams::GuardrailMetric::TaggedSymbol
266
- )
267
-
268
- sig do
269
- override.returns(
270
- T::Array[
271
- Deeprails::EvaluateCreateParams::GuardrailMetric::TaggedSymbol
272
- ]
273
- )
274
- end
275
- def self.values
276
- end
277
- end
278
- end
279
- end
280
- end
@@ -1,27 +0,0 @@
1
- # typed: strong
2
-
3
- module Deeprails
4
- module Models
5
- class EvaluateRetrieveParams < Deeprails::Internal::Type::BaseModel
6
- extend Deeprails::Internal::Type::RequestParameters::Converter
7
- include Deeprails::Internal::Type::RequestParameters
8
-
9
- OrHash =
10
- T.type_alias do
11
- T.any(Deeprails::EvaluateRetrieveParams, Deeprails::Internal::AnyHash)
12
- end
13
-
14
- sig do
15
- params(request_options: Deeprails::RequestOptions::OrHash).returns(
16
- T.attached_class
17
- )
18
- end
19
- def self.new(request_options: {})
20
- end
21
-
22
- sig { override.returns({ request_options: Deeprails::RequestOptions }) }
23
- def to_hash
24
- end
25
- end
26
- end
27
- end