deeprails 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +1 -1
- data/lib/deeprails/client.rb +0 -4
- data/lib/deeprails/models/monitor_detail_response.rb +231 -3
- data/lib/deeprails/models.rb +0 -6
- data/lib/deeprails/version.rb +1 -1
- data/lib/deeprails.rb +0 -4
- data/rbi/deeprails/client.rbi +0 -3
- data/rbi/deeprails/models/monitor_detail_response.rbi +483 -4
- data/rbi/deeprails/models.rbi +0 -6
- data/sig/deeprails/client.rbs +0 -2
- data/sig/deeprails/models/monitor_detail_response.rbs +207 -6
- data/sig/deeprails/models.rbs +0 -6
- metadata +2 -14
- data/lib/deeprails/models/evaluate_create_params.rb +0 -134
- data/lib/deeprails/models/evaluate_retrieve_params.rb +0 -14
- data/lib/deeprails/models/evaluation.rb +0 -233
- data/lib/deeprails/resources/evaluate.rb +0 -70
- data/rbi/deeprails/models/evaluate_create_params.rbi +0 -280
- data/rbi/deeprails/models/evaluate_retrieve_params.rbi +0 -27
- data/rbi/deeprails/models/evaluation.rbi +0 -402
- data/rbi/deeprails/resources/evaluate.rbi +0 -66
- data/sig/deeprails/models/evaluate_create_params.rbs +0 -122
- data/sig/deeprails/models/evaluate_retrieve_params.rbs +0 -15
- data/sig/deeprails/models/evaluation.rbs +0 -204
- data/sig/deeprails/resources/evaluate.rbs +0 -22
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0db85ba4c5a974e1efaa1db27b1888cbbf8ae21c5b1d15b4d75733f662c42a8a
|
|
4
|
+
data.tar.gz: 1ff3fc484f0032e6dddae533e803d7f591d270527428abba892879e5110105d3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 12ba483e35868225c32653c0f8d3d464944d29e76633f52616e2557a613355222f886caae2aa3c908d535d5e693c92349ec5a8ebe5404c82ecd8d156ade46a96
|
|
7
|
+
data.tar.gz: 68678894ea182b88ad6dd35b1cbedf1c2f86c210a7769180f2ac82d71f2687a1c96ecd8ca032e0541acd38bee83e0447cbb4fd5ff09f50d9c6fd8f8ea163ec0b
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.9.0 (2025-10-29)
|
|
4
|
+
|
|
5
|
+
Full Changelog: [v0.8.0...v0.9.0](https://github.com/deeprails/deeprails-ruby-sdk/compare/v0.8.0...v0.9.0)
|
|
6
|
+
|
|
7
|
+
### Features
|
|
8
|
+
|
|
9
|
+
* **api:** remove evaluate api ([733236c](https://github.com/deeprails/deeprails-ruby-sdk/commit/733236c71a7bad030caea5ebe42a3c8061e05ec6))
|
|
10
|
+
* **api:** remove evaluate references ([de622dd](https://github.com/deeprails/deeprails-ruby-sdk/commit/de622dd89f3c14f5db366bd425c1e4c68bc59886))
|
|
11
|
+
|
|
3
12
|
## 0.8.0 (2025-10-24)
|
|
4
13
|
|
|
5
14
|
Full Changelog: [v0.7.0...v0.8.0](https://github.com/deeprails/deeprails-ruby-sdk/compare/v0.7.0...v0.8.0)
|
data/README.md
CHANGED
data/lib/deeprails/client.rb
CHANGED
|
@@ -24,9 +24,6 @@ module Deeprails
|
|
|
24
24
|
# @return [Deeprails::Resources::Monitor]
|
|
25
25
|
attr_reader :monitor
|
|
26
26
|
|
|
27
|
-
# @return [Deeprails::Resources::Evaluate]
|
|
28
|
-
attr_reader :evaluate
|
|
29
|
-
|
|
30
27
|
# @api private
|
|
31
28
|
#
|
|
32
29
|
# @return [Hash{String=>String}]
|
|
@@ -76,7 +73,6 @@ module Deeprails
|
|
|
76
73
|
|
|
77
74
|
@defend = Deeprails::Resources::Defend.new(client: self)
|
|
78
75
|
@monitor = Deeprails::Resources::Monitor.new(client: self)
|
|
79
|
-
@evaluate = Deeprails::Resources::Evaluate.new(client: self)
|
|
80
76
|
end
|
|
81
77
|
end
|
|
82
78
|
end
|
|
@@ -39,8 +39,9 @@ module Deeprails
|
|
|
39
39
|
# An array of all evaluations performed by this monitor. Each one corresponds to a
|
|
40
40
|
# separate monitor event.
|
|
41
41
|
#
|
|
42
|
-
# @return [Array<Deeprails::Models::Evaluation>, nil]
|
|
43
|
-
optional :evaluations,
|
|
42
|
+
# @return [Array<Deeprails::Models::MonitorDetailResponse::Evaluation>, nil]
|
|
43
|
+
optional :evaluations,
|
|
44
|
+
-> { Deeprails::Internal::Type::ArrayOf[Deeprails::MonitorDetailResponse::Evaluation] }
|
|
44
45
|
|
|
45
46
|
# @!attribute stats
|
|
46
47
|
# Contains five fields used for stats of this monitor: total evaluations,
|
|
@@ -76,7 +77,7 @@ module Deeprails
|
|
|
76
77
|
#
|
|
77
78
|
# @param description [String] Description of this monitor.
|
|
78
79
|
#
|
|
79
|
-
# @param evaluations [Array<Deeprails::Models::Evaluation>] An array of all evaluations performed by this monitor. Each one corresponds to
|
|
80
|
+
# @param evaluations [Array<Deeprails::Models::MonitorDetailResponse::Evaluation>] An array of all evaluations performed by this monitor. Each one corresponds to
|
|
80
81
|
#
|
|
81
82
|
# @param stats [Deeprails::Models::MonitorDetailResponse::Stats] Contains five fields used for stats of this monitor: total evaluations, complete
|
|
82
83
|
#
|
|
@@ -98,6 +99,233 @@ module Deeprails
|
|
|
98
99
|
# @return [Array<Symbol>]
|
|
99
100
|
end
|
|
100
101
|
|
|
102
|
+
class Evaluation < Deeprails::Internal::Type::BaseModel
|
|
103
|
+
# @!attribute eval_id
|
|
104
|
+
# A unique evaluation ID.
|
|
105
|
+
#
|
|
106
|
+
# @return [String]
|
|
107
|
+
required :eval_id, String
|
|
108
|
+
|
|
109
|
+
# @!attribute evaluation_status
|
|
110
|
+
# Status of the evaluation.
|
|
111
|
+
#
|
|
112
|
+
# @return [Symbol, Deeprails::Models::MonitorDetailResponse::Evaluation::EvaluationStatus]
|
|
113
|
+
required :evaluation_status, enum: -> { Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus }
|
|
114
|
+
|
|
115
|
+
# @!attribute model_input
|
|
116
|
+
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
117
|
+
# contain at least a `user_prompt` field or a `system_prompt` field. For
|
|
118
|
+
# ground_truth_adherence guardrail metric, `ground_truth` should be provided.
|
|
119
|
+
#
|
|
120
|
+
# @return [Deeprails::Models::MonitorDetailResponse::Evaluation::ModelInput]
|
|
121
|
+
required :model_input, -> { Deeprails::MonitorDetailResponse::Evaluation::ModelInput }
|
|
122
|
+
|
|
123
|
+
# @!attribute model_output
|
|
124
|
+
# Output generated by the LLM to be evaluated.
|
|
125
|
+
#
|
|
126
|
+
# @return [String]
|
|
127
|
+
required :model_output, String
|
|
128
|
+
|
|
129
|
+
# @!attribute run_mode
|
|
130
|
+
# Run mode for the evaluation. The run mode allows the user to optimize for speed,
|
|
131
|
+
# accuracy, and cost by determining which models are used to evaluate the event.
|
|
132
|
+
#
|
|
133
|
+
# @return [Symbol, Deeprails::Models::MonitorDetailResponse::Evaluation::RunMode]
|
|
134
|
+
required :run_mode, enum: -> { Deeprails::MonitorDetailResponse::Evaluation::RunMode }
|
|
135
|
+
|
|
136
|
+
# @!attribute created_at
|
|
137
|
+
# The time the evaluation was created in UTC.
|
|
138
|
+
#
|
|
139
|
+
# @return [Time, nil]
|
|
140
|
+
optional :created_at, Time
|
|
141
|
+
|
|
142
|
+
# @!attribute end_timestamp
|
|
143
|
+
# The time the evaluation completed in UTC.
|
|
144
|
+
#
|
|
145
|
+
# @return [Time, nil]
|
|
146
|
+
optional :end_timestamp, Time
|
|
147
|
+
|
|
148
|
+
# @!attribute error_message
|
|
149
|
+
# Description of the error causing the evaluation to fail, if any.
|
|
150
|
+
#
|
|
151
|
+
# @return [String, nil]
|
|
152
|
+
optional :error_message, String
|
|
153
|
+
|
|
154
|
+
# @!attribute error_timestamp
|
|
155
|
+
# The time the error causing the evaluation to fail was recorded.
|
|
156
|
+
#
|
|
157
|
+
# @return [Time, nil]
|
|
158
|
+
optional :error_timestamp, Time
|
|
159
|
+
|
|
160
|
+
# @!attribute evaluation_result
|
|
161
|
+
# Evaluation result consisting of average scores and rationales for each of the
|
|
162
|
+
# evaluated guardrail metrics.
|
|
163
|
+
#
|
|
164
|
+
# @return [Hash{Symbol=>Object}, nil]
|
|
165
|
+
optional :evaluation_result, Deeprails::Internal::Type::HashOf[Deeprails::Internal::Type::Unknown]
|
|
166
|
+
|
|
167
|
+
# @!attribute evaluation_total_cost
|
|
168
|
+
# Total cost of the evaluation.
|
|
169
|
+
#
|
|
170
|
+
# @return [Float, nil]
|
|
171
|
+
optional :evaluation_total_cost, Float
|
|
172
|
+
|
|
173
|
+
# @!attribute guardrail_metrics
|
|
174
|
+
# An array of guardrail metrics that the model input and output pair will be
|
|
175
|
+
# evaluated on.
|
|
176
|
+
#
|
|
177
|
+
# @return [Array<Symbol, Deeprails::Models::MonitorDetailResponse::Evaluation::GuardrailMetric>, nil]
|
|
178
|
+
optional :guardrail_metrics,
|
|
179
|
+
-> { Deeprails::Internal::Type::ArrayOf[enum: Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric] }
|
|
180
|
+
|
|
181
|
+
# @!attribute model_used
|
|
182
|
+
# Model ID used to generate the output, like `gpt-4o` or `o3`.
|
|
183
|
+
#
|
|
184
|
+
# @return [String, nil]
|
|
185
|
+
optional :model_used, String
|
|
186
|
+
|
|
187
|
+
# @!attribute modified_at
|
|
188
|
+
# The most recent time the evaluation was modified in UTC.
|
|
189
|
+
#
|
|
190
|
+
# @return [Time, nil]
|
|
191
|
+
optional :modified_at, Time
|
|
192
|
+
|
|
193
|
+
# @!attribute nametag
|
|
194
|
+
# An optional, user-defined tag for the evaluation.
|
|
195
|
+
#
|
|
196
|
+
# @return [String, nil]
|
|
197
|
+
optional :nametag, String
|
|
198
|
+
|
|
199
|
+
# @!attribute progress
|
|
200
|
+
# Evaluation progress. Values range between 0 and 100; 100 corresponds to a
|
|
201
|
+
# completed `evaluation_status`.
|
|
202
|
+
#
|
|
203
|
+
# @return [Integer, nil]
|
|
204
|
+
optional :progress, Integer
|
|
205
|
+
|
|
206
|
+
# @!attribute start_timestamp
|
|
207
|
+
# The time the evaluation started in UTC.
|
|
208
|
+
#
|
|
209
|
+
# @return [Time, nil]
|
|
210
|
+
optional :start_timestamp, Time
|
|
211
|
+
|
|
212
|
+
# @!method initialize(eval_id:, evaluation_status:, model_input:, model_output:, run_mode:, created_at: nil, end_timestamp: nil, error_message: nil, error_timestamp: nil, evaluation_result: nil, evaluation_total_cost: nil, guardrail_metrics: nil, model_used: nil, modified_at: nil, nametag: nil, progress: nil, start_timestamp: nil)
|
|
213
|
+
# Some parameter documentations has been truncated, see
|
|
214
|
+
# {Deeprails::Models::MonitorDetailResponse::Evaluation} for more details.
|
|
215
|
+
#
|
|
216
|
+
# @param eval_id [String] A unique evaluation ID.
|
|
217
|
+
#
|
|
218
|
+
# @param evaluation_status [Symbol, Deeprails::Models::MonitorDetailResponse::Evaluation::EvaluationStatus] Status of the evaluation.
|
|
219
|
+
#
|
|
220
|
+
# @param model_input [Deeprails::Models::MonitorDetailResponse::Evaluation::ModelInput] A dictionary of inputs sent to the LLM to generate output. The dictionary must c
|
|
221
|
+
#
|
|
222
|
+
# @param model_output [String] Output generated by the LLM to be evaluated.
|
|
223
|
+
#
|
|
224
|
+
# @param run_mode [Symbol, Deeprails::Models::MonitorDetailResponse::Evaluation::RunMode] Run mode for the evaluation. The run mode allows the user to optimize for speed
|
|
225
|
+
#
|
|
226
|
+
# @param created_at [Time] The time the evaluation was created in UTC.
|
|
227
|
+
#
|
|
228
|
+
# @param end_timestamp [Time] The time the evaluation completed in UTC.
|
|
229
|
+
#
|
|
230
|
+
# @param error_message [String] Description of the error causing the evaluation to fail, if any.
|
|
231
|
+
#
|
|
232
|
+
# @param error_timestamp [Time] The time the error causing the evaluation to fail was recorded.
|
|
233
|
+
#
|
|
234
|
+
# @param evaluation_result [Hash{Symbol=>Object}] Evaluation result consisting of average scores and rationales for each of the ev
|
|
235
|
+
#
|
|
236
|
+
# @param evaluation_total_cost [Float] Total cost of the evaluation.
|
|
237
|
+
#
|
|
238
|
+
# @param guardrail_metrics [Array<Symbol, Deeprails::Models::MonitorDetailResponse::Evaluation::GuardrailMetric>] An array of guardrail metrics that the model input and output pair will be evalu
|
|
239
|
+
#
|
|
240
|
+
# @param model_used [String] Model ID used to generate the output, like `gpt-4o` or `o3`.
|
|
241
|
+
#
|
|
242
|
+
# @param modified_at [Time] The most recent time the evaluation was modified in UTC.
|
|
243
|
+
#
|
|
244
|
+
# @param nametag [String] An optional, user-defined tag for the evaluation.
|
|
245
|
+
#
|
|
246
|
+
# @param progress [Integer] Evaluation progress. Values range between 0 and 100; 100 corresponds to a compl
|
|
247
|
+
#
|
|
248
|
+
# @param start_timestamp [Time] The time the evaluation started in UTC.
|
|
249
|
+
|
|
250
|
+
# Status of the evaluation.
|
|
251
|
+
#
|
|
252
|
+
# @see Deeprails::Models::MonitorDetailResponse::Evaluation#evaluation_status
|
|
253
|
+
module EvaluationStatus
|
|
254
|
+
extend Deeprails::Internal::Type::Enum
|
|
255
|
+
|
|
256
|
+
IN_PROGRESS = :in_progress
|
|
257
|
+
COMPLETED = :completed
|
|
258
|
+
CANCELED = :canceled
|
|
259
|
+
QUEUED = :queued
|
|
260
|
+
FAILED = :failed
|
|
261
|
+
|
|
262
|
+
# @!method self.values
|
|
263
|
+
# @return [Array<Symbol>]
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
# @see Deeprails::Models::MonitorDetailResponse::Evaluation#model_input
|
|
267
|
+
class ModelInput < Deeprails::Internal::Type::BaseModel
|
|
268
|
+
# @!attribute ground_truth
|
|
269
|
+
# The ground truth for evaluating Ground Truth Adherence guardrail.
|
|
270
|
+
#
|
|
271
|
+
# @return [String, nil]
|
|
272
|
+
optional :ground_truth, String
|
|
273
|
+
|
|
274
|
+
# @!attribute system_prompt
|
|
275
|
+
# The system prompt used to generate the output.
|
|
276
|
+
#
|
|
277
|
+
# @return [String, nil]
|
|
278
|
+
optional :system_prompt, String
|
|
279
|
+
|
|
280
|
+
# @!attribute user_prompt
|
|
281
|
+
# The user prompt used to generate the output.
|
|
282
|
+
#
|
|
283
|
+
# @return [String, nil]
|
|
284
|
+
optional :user_prompt, String
|
|
285
|
+
|
|
286
|
+
# @!method initialize(ground_truth: nil, system_prompt: nil, user_prompt: nil)
|
|
287
|
+
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
288
|
+
# contain at least a `user_prompt` field or a `system_prompt` field. For
|
|
289
|
+
# ground_truth_adherence guardrail metric, `ground_truth` should be provided.
|
|
290
|
+
#
|
|
291
|
+
# @param ground_truth [String] The ground truth for evaluating Ground Truth Adherence guardrail.
|
|
292
|
+
#
|
|
293
|
+
# @param system_prompt [String] The system prompt used to generate the output.
|
|
294
|
+
#
|
|
295
|
+
# @param user_prompt [String] The user prompt used to generate the output.
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
# Run mode for the evaluation. The run mode allows the user to optimize for speed,
|
|
299
|
+
# accuracy, and cost by determining which models are used to evaluate the event.
|
|
300
|
+
#
|
|
301
|
+
# @see Deeprails::Models::MonitorDetailResponse::Evaluation#run_mode
|
|
302
|
+
module RunMode
|
|
303
|
+
extend Deeprails::Internal::Type::Enum
|
|
304
|
+
|
|
305
|
+
PRECISION_PLUS = :precision_plus
|
|
306
|
+
PRECISION = :precision
|
|
307
|
+
SMART = :smart
|
|
308
|
+
ECONOMY = :economy
|
|
309
|
+
|
|
310
|
+
# @!method self.values
|
|
311
|
+
# @return [Array<Symbol>]
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
module GuardrailMetric
|
|
315
|
+
extend Deeprails::Internal::Type::Enum
|
|
316
|
+
|
|
317
|
+
CORRECTNESS = :correctness
|
|
318
|
+
COMPLETENESS = :completeness
|
|
319
|
+
INSTRUCTION_ADHERENCE = :instruction_adherence
|
|
320
|
+
CONTEXT_ADHERENCE = :context_adherence
|
|
321
|
+
GROUND_TRUTH_ADHERENCE = :ground_truth_adherence
|
|
322
|
+
COMPREHENSIVE_SAFETY = :comprehensive_safety
|
|
323
|
+
|
|
324
|
+
# @!method self.values
|
|
325
|
+
# @return [Array<Symbol>]
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
|
|
101
329
|
# @see Deeprails::Models::MonitorDetailResponse#stats
|
|
102
330
|
class Stats < Deeprails::Internal::Type::BaseModel
|
|
103
331
|
# @!attribute completed_evaluations
|
data/lib/deeprails/models.rb
CHANGED
|
@@ -51,12 +51,6 @@ module Deeprails
|
|
|
51
51
|
|
|
52
52
|
DefendUpdateWorkflowParams = Deeprails::Models::DefendUpdateWorkflowParams
|
|
53
53
|
|
|
54
|
-
EvaluateCreateParams = Deeprails::Models::EvaluateCreateParams
|
|
55
|
-
|
|
56
|
-
EvaluateRetrieveParams = Deeprails::Models::EvaluateRetrieveParams
|
|
57
|
-
|
|
58
|
-
Evaluation = Deeprails::Models::Evaluation
|
|
59
|
-
|
|
60
54
|
MonitorCreateParams = Deeprails::Models::MonitorCreateParams
|
|
61
55
|
|
|
62
56
|
MonitorDetailResponse = Deeprails::Models::MonitorDetailResponse
|
data/lib/deeprails/version.rb
CHANGED
data/lib/deeprails.rb
CHANGED
|
@@ -56,9 +56,6 @@ require_relative "deeprails/models/defend_retrieve_event_params"
|
|
|
56
56
|
require_relative "deeprails/models/defend_retrieve_workflow_params"
|
|
57
57
|
require_relative "deeprails/models/defend_submit_event_params"
|
|
58
58
|
require_relative "deeprails/models/defend_update_workflow_params"
|
|
59
|
-
require_relative "deeprails/models/evaluate_create_params"
|
|
60
|
-
require_relative "deeprails/models/evaluate_retrieve_params"
|
|
61
|
-
require_relative "deeprails/models/evaluation"
|
|
62
59
|
require_relative "deeprails/models/monitor_create_params"
|
|
63
60
|
require_relative "deeprails/models/monitor_detail_response"
|
|
64
61
|
require_relative "deeprails/models/monitor_event_response"
|
|
@@ -69,5 +66,4 @@ require_relative "deeprails/models/monitor_update_params"
|
|
|
69
66
|
require_relative "deeprails/models/workflow_event_response"
|
|
70
67
|
require_relative "deeprails/models"
|
|
71
68
|
require_relative "deeprails/resources/defend"
|
|
72
|
-
require_relative "deeprails/resources/evaluate"
|
|
73
69
|
require_relative "deeprails/resources/monitor"
|
data/rbi/deeprails/client.rbi
CHANGED
|
@@ -19,9 +19,6 @@ module Deeprails
|
|
|
19
19
|
sig { returns(Deeprails::Resources::Monitor) }
|
|
20
20
|
attr_reader :monitor
|
|
21
21
|
|
|
22
|
-
sig { returns(Deeprails::Resources::Evaluate) }
|
|
23
|
-
attr_reader :evaluate
|
|
24
|
-
|
|
25
22
|
# @api private
|
|
26
23
|
sig { override.returns(T::Hash[String, String]) }
|
|
27
24
|
private def auth_headers
|