deeprails 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/README.md +1 -1
  4. data/lib/deeprails/client.rb +0 -4
  5. data/lib/deeprails/internal/transport/pooled_net_requester.rb +30 -24
  6. data/lib/deeprails/models/defend_create_workflow_params.rb +4 -4
  7. data/lib/deeprails/models/defend_response.rb +4 -4
  8. data/lib/deeprails/models/defend_submit_event_params.rb +4 -4
  9. data/lib/deeprails/models/monitor_detail_response.rb +378 -0
  10. data/lib/deeprails/models/monitor_event_response.rb +41 -0
  11. data/lib/deeprails/models/monitor_response.rb +83 -0
  12. data/lib/deeprails/models/monitor_submit_event_params.rb +2 -2
  13. data/lib/deeprails/models.rb +4 -6
  14. data/lib/deeprails/resources/defend.rb +2 -2
  15. data/lib/deeprails/resources/monitor.rb +8 -8
  16. data/lib/deeprails/version.rb +1 -1
  17. data/lib/deeprails.rb +3 -7
  18. data/rbi/deeprails/client.rbi +0 -3
  19. data/rbi/deeprails/models/defend_create_workflow_params.rbi +6 -6
  20. data/rbi/deeprails/models/defend_response.rbi +6 -6
  21. data/rbi/deeprails/models/defend_submit_event_params.rbi +5 -5
  22. data/rbi/deeprails/models/monitor_detail_response.rbi +730 -0
  23. data/rbi/deeprails/models/monitor_event_response.rbi +64 -0
  24. data/rbi/deeprails/models/monitor_response.rbi +142 -0
  25. data/rbi/deeprails/models/monitor_submit_event_params.rbi +3 -3
  26. data/rbi/deeprails/models.rbi +4 -6
  27. data/rbi/deeprails/resources/defend.rbi +3 -3
  28. data/rbi/deeprails/resources/monitor.rbi +5 -5
  29. data/sig/deeprails/client.rbs +0 -2
  30. data/sig/deeprails/models/defend_create_workflow_params.rbs +5 -5
  31. data/sig/deeprails/models/defend_response.rbs +5 -5
  32. data/sig/deeprails/models/monitor_detail_response.rbs +335 -0
  33. data/sig/deeprails/models/monitor_event_response.rbs +37 -0
  34. data/sig/deeprails/models/monitor_response.rbs +73 -0
  35. data/sig/deeprails/models.rbs +4 -6
  36. data/sig/deeprails/resources/defend.rbs +1 -1
  37. data/sig/deeprails/resources/monitor.rbs +4 -4
  38. metadata +11 -23
  39. data/lib/deeprails/models/api_response.rb +0 -116
  40. data/lib/deeprails/models/evaluate_create_params.rb +0 -134
  41. data/lib/deeprails/models/evaluate_retrieve_params.rb +0 -14
  42. data/lib/deeprails/models/evaluation.rb +0 -233
  43. data/lib/deeprails/models/monitor_retrieve_response.rb +0 -183
  44. data/lib/deeprails/models/monitor_submit_event_response.rb +0 -74
  45. data/lib/deeprails/resources/evaluate.rb +0 -70
  46. data/rbi/deeprails/models/api_response.rbi +0 -201
  47. data/rbi/deeprails/models/evaluate_create_params.rbi +0 -280
  48. data/rbi/deeprails/models/evaluate_retrieve_params.rbi +0 -27
  49. data/rbi/deeprails/models/evaluation.rbi +0 -402
  50. data/rbi/deeprails/models/monitor_retrieve_response.rbi +0 -333
  51. data/rbi/deeprails/models/monitor_submit_event_response.rbi +0 -131
  52. data/rbi/deeprails/resources/evaluate.rbi +0 -66
  53. data/sig/deeprails/models/api_response.rbs +0 -100
  54. data/sig/deeprails/models/evaluate_create_params.rbs +0 -122
  55. data/sig/deeprails/models/evaluate_retrieve_params.rbs +0 -15
  56. data/sig/deeprails/models/evaluation.rbs +0 -204
  57. data/sig/deeprails/models/monitor_retrieve_response.rbs +0 -167
  58. data/sig/deeprails/models/monitor_submit_event_response.rbs +0 -70
  59. data/sig/deeprails/resources/evaluate.rbs +0 -22
@@ -1,402 +0,0 @@
1
- # typed: strong
2
-
3
- module Deeprails
4
- module Models
5
- class Evaluation < Deeprails::Internal::Type::BaseModel
6
- OrHash =
7
- T.type_alias do
8
- T.any(Deeprails::Evaluation, Deeprails::Internal::AnyHash)
9
- end
10
-
11
- # A unique evaluation ID.
12
- sig { returns(String) }
13
- attr_accessor :eval_id
14
-
15
- # Status of the evaluation.
16
- sig { returns(Deeprails::Evaluation::EvaluationStatus::TaggedSymbol) }
17
- attr_accessor :evaluation_status
18
-
19
- # A dictionary of inputs sent to the LLM to generate output. The dictionary must
20
- # contain at least `user_prompt` or `system_prompt` field. For
21
- # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
22
- sig { returns(Deeprails::Evaluation::ModelInput) }
23
- attr_reader :model_input
24
-
25
- sig do
26
- params(model_input: Deeprails::Evaluation::ModelInput::OrHash).void
27
- end
28
- attr_writer :model_input
29
-
30
- # Output generated by the LLM to be evaluated.
31
- sig { returns(String) }
32
- attr_accessor :model_output
33
-
34
- # Run mode for the evaluation. The run mode allows the user to optimize for speed,
35
- # accuracy, and cost by determining which models are used to evaluate the event.
36
- sig { returns(Deeprails::Evaluation::RunMode::TaggedSymbol) }
37
- attr_accessor :run_mode
38
-
39
- # The time the evaluation was created in UTC.
40
- sig { returns(T.nilable(Time)) }
41
- attr_reader :created_at
42
-
43
- sig { params(created_at: Time).void }
44
- attr_writer :created_at
45
-
46
- # The time the evaluation completed in UTC.
47
- sig { returns(T.nilable(Time)) }
48
- attr_reader :end_timestamp
49
-
50
- sig { params(end_timestamp: Time).void }
51
- attr_writer :end_timestamp
52
-
53
- # Description of the error causing the evaluation to fail, if any.
54
- sig { returns(T.nilable(String)) }
55
- attr_reader :error_message
56
-
57
- sig { params(error_message: String).void }
58
- attr_writer :error_message
59
-
60
- # The time the error causing the evaluation to fail was recorded.
61
- sig { returns(T.nilable(Time)) }
62
- attr_reader :error_timestamp
63
-
64
- sig { params(error_timestamp: Time).void }
65
- attr_writer :error_timestamp
66
-
67
- # Evaluation result consisting of average scores and rationales for each of the
68
- # evaluated guardrail metrics.
69
- sig { returns(T.nilable(T::Hash[Symbol, T.anything])) }
70
- attr_reader :evaluation_result
71
-
72
- sig { params(evaluation_result: T::Hash[Symbol, T.anything]).void }
73
- attr_writer :evaluation_result
74
-
75
- # Total cost of the evaluation.
76
- sig { returns(T.nilable(Float)) }
77
- attr_reader :evaluation_total_cost
78
-
79
- sig { params(evaluation_total_cost: Float).void }
80
- attr_writer :evaluation_total_cost
81
-
82
- # An array of guardrail metrics that the model input and output pair will be
83
- # evaluated on.
84
- sig do
85
- returns(
86
- T.nilable(
87
- T::Array[Deeprails::Evaluation::GuardrailMetric::TaggedSymbol]
88
- )
89
- )
90
- end
91
- attr_reader :guardrail_metrics
92
-
93
- sig do
94
- params(
95
- guardrail_metrics:
96
- T::Array[Deeprails::Evaluation::GuardrailMetric::OrSymbol]
97
- ).void
98
- end
99
- attr_writer :guardrail_metrics
100
-
101
- # Model ID used to generate the output, like `gpt-4o` or `o3`.
102
- sig { returns(T.nilable(String)) }
103
- attr_reader :model_used
104
-
105
- sig { params(model_used: String).void }
106
- attr_writer :model_used
107
-
108
- # The most recent time the evaluation was modified in UTC.
109
- sig { returns(T.nilable(Time)) }
110
- attr_reader :modified_at
111
-
112
- sig { params(modified_at: Time).void }
113
- attr_writer :modified_at
114
-
115
- # An optional, user-defined tag for the evaluation.
116
- sig { returns(T.nilable(String)) }
117
- attr_reader :nametag
118
-
119
- sig { params(nametag: String).void }
120
- attr_writer :nametag
121
-
122
- # Evaluation progress. Values range between 0 and 100; 100 corresponds to a
123
- # completed `evaluation_status`.
124
- sig { returns(T.nilable(Integer)) }
125
- attr_reader :progress
126
-
127
- sig { params(progress: Integer).void }
128
- attr_writer :progress
129
-
130
- # The time the evaluation started in UTC.
131
- sig { returns(T.nilable(Time)) }
132
- attr_reader :start_timestamp
133
-
134
- sig { params(start_timestamp: Time).void }
135
- attr_writer :start_timestamp
136
-
137
- sig do
138
- params(
139
- eval_id: String,
140
- evaluation_status: Deeprails::Evaluation::EvaluationStatus::OrSymbol,
141
- model_input: Deeprails::Evaluation::ModelInput::OrHash,
142
- model_output: String,
143
- run_mode: Deeprails::Evaluation::RunMode::OrSymbol,
144
- created_at: Time,
145
- end_timestamp: Time,
146
- error_message: String,
147
- error_timestamp: Time,
148
- evaluation_result: T::Hash[Symbol, T.anything],
149
- evaluation_total_cost: Float,
150
- guardrail_metrics:
151
- T::Array[Deeprails::Evaluation::GuardrailMetric::OrSymbol],
152
- model_used: String,
153
- modified_at: Time,
154
- nametag: String,
155
- progress: Integer,
156
- start_timestamp: Time
157
- ).returns(T.attached_class)
158
- end
159
- def self.new(
160
- # A unique evaluation ID.
161
- eval_id:,
162
- # Status of the evaluation.
163
- evaluation_status:,
164
- # A dictionary of inputs sent to the LLM to generate output. The dictionary must
165
- # contain at least `user_prompt` or `system_prompt` field. For
166
- # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
167
- model_input:,
168
- # Output generated by the LLM to be evaluated.
169
- model_output:,
170
- # Run mode for the evaluation. The run mode allows the user to optimize for speed,
171
- # accuracy, and cost by determining which models are used to evaluate the event.
172
- run_mode:,
173
- # The time the evaluation was created in UTC.
174
- created_at: nil,
175
- # The time the evaluation completed in UTC.
176
- end_timestamp: nil,
177
- # Description of the error causing the evaluation to fail, if any.
178
- error_message: nil,
179
- # The time the error causing the evaluation to fail was recorded.
180
- error_timestamp: nil,
181
- # Evaluation result consisting of average scores and rationales for each of the
182
- # evaluated guardrail metrics.
183
- evaluation_result: nil,
184
- # Total cost of the evaluation.
185
- evaluation_total_cost: nil,
186
- # An array of guardrail metrics that the model input and output pair will be
187
- # evaluated on.
188
- guardrail_metrics: nil,
189
- # Model ID used to generate the output, like `gpt-4o` or `o3`.
190
- model_used: nil,
191
- # The most recent time the evaluation was modified in UTC.
192
- modified_at: nil,
193
- # An optional, user-defined tag for the evaluation.
194
- nametag: nil,
195
- # Evaluation progress. Values range between 0 and 100; 100 corresponds to a
196
- # completed `evaluation_status`.
197
- progress: nil,
198
- # The time the evaluation started in UTC.
199
- start_timestamp: nil
200
- )
201
- end
202
-
203
- sig do
204
- override.returns(
205
- {
206
- eval_id: String,
207
- evaluation_status:
208
- Deeprails::Evaluation::EvaluationStatus::TaggedSymbol,
209
- model_input: Deeprails::Evaluation::ModelInput,
210
- model_output: String,
211
- run_mode: Deeprails::Evaluation::RunMode::TaggedSymbol,
212
- created_at: Time,
213
- end_timestamp: Time,
214
- error_message: String,
215
- error_timestamp: Time,
216
- evaluation_result: T::Hash[Symbol, T.anything],
217
- evaluation_total_cost: Float,
218
- guardrail_metrics:
219
- T::Array[Deeprails::Evaluation::GuardrailMetric::TaggedSymbol],
220
- model_used: String,
221
- modified_at: Time,
222
- nametag: String,
223
- progress: Integer,
224
- start_timestamp: Time
225
- }
226
- )
227
- end
228
- def to_hash
229
- end
230
-
231
- # Status of the evaluation.
232
- module EvaluationStatus
233
- extend Deeprails::Internal::Type::Enum
234
-
235
- TaggedSymbol =
236
- T.type_alias do
237
- T.all(Symbol, Deeprails::Evaluation::EvaluationStatus)
238
- end
239
- OrSymbol = T.type_alias { T.any(Symbol, String) }
240
-
241
- IN_PROGRESS =
242
- T.let(
243
- :in_progress,
244
- Deeprails::Evaluation::EvaluationStatus::TaggedSymbol
245
- )
246
- COMPLETED =
247
- T.let(
248
- :completed,
249
- Deeprails::Evaluation::EvaluationStatus::TaggedSymbol
250
- )
251
- CANCELED =
252
- T.let(
253
- :canceled,
254
- Deeprails::Evaluation::EvaluationStatus::TaggedSymbol
255
- )
256
- QUEUED =
257
- T.let(:queued, Deeprails::Evaluation::EvaluationStatus::TaggedSymbol)
258
- FAILED =
259
- T.let(:failed, Deeprails::Evaluation::EvaluationStatus::TaggedSymbol)
260
-
261
- sig do
262
- override.returns(
263
- T::Array[Deeprails::Evaluation::EvaluationStatus::TaggedSymbol]
264
- )
265
- end
266
- def self.values
267
- end
268
- end
269
-
270
- class ModelInput < Deeprails::Internal::Type::BaseModel
271
- OrHash =
272
- T.type_alias do
273
- T.any(
274
- Deeprails::Evaluation::ModelInput,
275
- Deeprails::Internal::AnyHash
276
- )
277
- end
278
-
279
- # The ground truth for evaluating Ground Truth Adherence guardrail.
280
- sig { returns(T.nilable(String)) }
281
- attr_reader :ground_truth
282
-
283
- sig { params(ground_truth: String).void }
284
- attr_writer :ground_truth
285
-
286
- # The system prompt used to generate the output.
287
- sig { returns(T.nilable(String)) }
288
- attr_reader :system_prompt
289
-
290
- sig { params(system_prompt: String).void }
291
- attr_writer :system_prompt
292
-
293
- # The user prompt used to generate the output.
294
- sig { returns(T.nilable(String)) }
295
- attr_reader :user_prompt
296
-
297
- sig { params(user_prompt: String).void }
298
- attr_writer :user_prompt
299
-
300
- # A dictionary of inputs sent to the LLM to generate output. The dictionary must
301
- # contain at least `user_prompt` or `system_prompt` field. For
302
- # ground_truth_adherence guardrail metric, `ground_truth` should be provided.
303
- sig do
304
- params(
305
- ground_truth: String,
306
- system_prompt: String,
307
- user_prompt: String
308
- ).returns(T.attached_class)
309
- end
310
- def self.new(
311
- # The ground truth for evaluating Ground Truth Adherence guardrail.
312
- ground_truth: nil,
313
- # The system prompt used to generate the output.
314
- system_prompt: nil,
315
- # The user prompt used to generate the output.
316
- user_prompt: nil
317
- )
318
- end
319
-
320
- sig do
321
- override.returns(
322
- { ground_truth: String, system_prompt: String, user_prompt: String }
323
- )
324
- end
325
- def to_hash
326
- end
327
- end
328
-
329
- # Run mode for the evaluation. The run mode allows the user to optimize for speed,
330
- # accuracy, and cost by determining which models are used to evaluate the event.
331
- module RunMode
332
- extend Deeprails::Internal::Type::Enum
333
-
334
- TaggedSymbol =
335
- T.type_alias { T.all(Symbol, Deeprails::Evaluation::RunMode) }
336
- OrSymbol = T.type_alias { T.any(Symbol, String) }
337
-
338
- PRECISION_PLUS =
339
- T.let(:precision_plus, Deeprails::Evaluation::RunMode::TaggedSymbol)
340
- PRECISION =
341
- T.let(:precision, Deeprails::Evaluation::RunMode::TaggedSymbol)
342
- SMART = T.let(:smart, Deeprails::Evaluation::RunMode::TaggedSymbol)
343
- ECONOMY = T.let(:economy, Deeprails::Evaluation::RunMode::TaggedSymbol)
344
-
345
- sig do
346
- override.returns(
347
- T::Array[Deeprails::Evaluation::RunMode::TaggedSymbol]
348
- )
349
- end
350
- def self.values
351
- end
352
- end
353
-
354
- module GuardrailMetric
355
- extend Deeprails::Internal::Type::Enum
356
-
357
- TaggedSymbol =
358
- T.type_alias { T.all(Symbol, Deeprails::Evaluation::GuardrailMetric) }
359
- OrSymbol = T.type_alias { T.any(Symbol, String) }
360
-
361
- CORRECTNESS =
362
- T.let(
363
- :correctness,
364
- Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
365
- )
366
- COMPLETENESS =
367
- T.let(
368
- :completeness,
369
- Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
370
- )
371
- INSTRUCTION_ADHERENCE =
372
- T.let(
373
- :instruction_adherence,
374
- Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
375
- )
376
- CONTEXT_ADHERENCE =
377
- T.let(
378
- :context_adherence,
379
- Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
380
- )
381
- GROUND_TRUTH_ADHERENCE =
382
- T.let(
383
- :ground_truth_adherence,
384
- Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
385
- )
386
- COMPREHENSIVE_SAFETY =
387
- T.let(
388
- :comprehensive_safety,
389
- Deeprails::Evaluation::GuardrailMetric::TaggedSymbol
390
- )
391
-
392
- sig do
393
- override.returns(
394
- T::Array[Deeprails::Evaluation::GuardrailMetric::TaggedSymbol]
395
- )
396
- end
397
- def self.values
398
- end
399
- end
400
- end
401
- end
402
- end