deeprails 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/README.md +1 -1
- data/lib/deeprails/client.rb +0 -4
- data/lib/deeprails/internal/transport/pooled_net_requester.rb +30 -24
- data/lib/deeprails/models/defend_create_workflow_params.rb +4 -4
- data/lib/deeprails/models/defend_response.rb +4 -4
- data/lib/deeprails/models/defend_submit_event_params.rb +4 -4
- data/lib/deeprails/models/monitor_detail_response.rb +378 -0
- data/lib/deeprails/models/monitor_event_response.rb +41 -0
- data/lib/deeprails/models/monitor_response.rb +83 -0
- data/lib/deeprails/models/monitor_submit_event_params.rb +2 -2
- data/lib/deeprails/models.rb +4 -6
- data/lib/deeprails/resources/defend.rb +2 -2
- data/lib/deeprails/resources/monitor.rb +8 -8
- data/lib/deeprails/version.rb +1 -1
- data/lib/deeprails.rb +3 -7
- data/rbi/deeprails/client.rbi +0 -3
- data/rbi/deeprails/models/defend_create_workflow_params.rbi +6 -6
- data/rbi/deeprails/models/defend_response.rbi +6 -6
- data/rbi/deeprails/models/defend_submit_event_params.rbi +5 -5
- data/rbi/deeprails/models/monitor_detail_response.rbi +730 -0
- data/rbi/deeprails/models/monitor_event_response.rbi +64 -0
- data/rbi/deeprails/models/monitor_response.rbi +142 -0
- data/rbi/deeprails/models/monitor_submit_event_params.rbi +3 -3
- data/rbi/deeprails/models.rbi +4 -6
- data/rbi/deeprails/resources/defend.rbi +3 -3
- data/rbi/deeprails/resources/monitor.rbi +5 -5
- data/sig/deeprails/client.rbs +0 -2
- data/sig/deeprails/models/defend_create_workflow_params.rbs +5 -5
- data/sig/deeprails/models/defend_response.rbs +5 -5
- data/sig/deeprails/models/monitor_detail_response.rbs +335 -0
- data/sig/deeprails/models/monitor_event_response.rbs +37 -0
- data/sig/deeprails/models/monitor_response.rbs +73 -0
- data/sig/deeprails/models.rbs +4 -6
- data/sig/deeprails/resources/defend.rbs +1 -1
- data/sig/deeprails/resources/monitor.rbs +4 -4
- metadata +11 -23
- data/lib/deeprails/models/api_response.rb +0 -116
- data/lib/deeprails/models/evaluate_create_params.rb +0 -134
- data/lib/deeprails/models/evaluate_retrieve_params.rb +0 -14
- data/lib/deeprails/models/evaluation.rb +0 -233
- data/lib/deeprails/models/monitor_retrieve_response.rb +0 -183
- data/lib/deeprails/models/monitor_submit_event_response.rb +0 -74
- data/lib/deeprails/resources/evaluate.rb +0 -70
- data/rbi/deeprails/models/api_response.rbi +0 -201
- data/rbi/deeprails/models/evaluate_create_params.rbi +0 -280
- data/rbi/deeprails/models/evaluate_retrieve_params.rbi +0 -27
- data/rbi/deeprails/models/evaluation.rbi +0 -402
- data/rbi/deeprails/models/monitor_retrieve_response.rbi +0 -333
- data/rbi/deeprails/models/monitor_submit_event_response.rbi +0 -131
- data/rbi/deeprails/resources/evaluate.rbi +0 -66
- data/sig/deeprails/models/api_response.rbs +0 -100
- data/sig/deeprails/models/evaluate_create_params.rbs +0 -122
- data/sig/deeprails/models/evaluate_retrieve_params.rbs +0 -15
- data/sig/deeprails/models/evaluation.rbs +0 -204
- data/sig/deeprails/models/monitor_retrieve_response.rbs +0 -167
- data/sig/deeprails/models/monitor_submit_event_response.rbs +0 -70
- data/sig/deeprails/resources/evaluate.rbs +0 -22
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0db85ba4c5a974e1efaa1db27b1888cbbf8ae21c5b1d15b4d75733f662c42a8a
|
|
4
|
+
data.tar.gz: 1ff3fc484f0032e6dddae533e803d7f591d270527428abba892879e5110105d3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 12ba483e35868225c32653c0f8d3d464944d29e76633f52616e2557a613355222f886caae2aa3c908d535d5e693c92349ec5a8ebe5404c82ecd8d156ade46a96
|
|
7
|
+
data.tar.gz: 68678894ea182b88ad6dd35b1cbedf1c2f86c210a7769180f2ac82d71f2687a1c96ecd8ca032e0541acd38bee83e0447cbb4fd5ff09f50d9c6fd8f8ea163ec0b
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.9.0 (2025-10-29)
|
|
4
|
+
|
|
5
|
+
Full Changelog: [v0.8.0...v0.9.0](https://github.com/deeprails/deeprails-ruby-sdk/compare/v0.8.0...v0.9.0)
|
|
6
|
+
|
|
7
|
+
### Features
|
|
8
|
+
|
|
9
|
+
* **api:** remove evaluate api ([733236c](https://github.com/deeprails/deeprails-ruby-sdk/commit/733236c71a7bad030caea5ebe42a3c8061e05ec6))
|
|
10
|
+
* **api:** remove evaluate references ([de622dd](https://github.com/deeprails/deeprails-ruby-sdk/commit/de622dd89f3c14f5db366bd425c1e4c68bc59886))
|
|
11
|
+
|
|
12
|
+
## 0.8.0 (2025-10-24)
|
|
13
|
+
|
|
14
|
+
Full Changelog: [v0.7.0...v0.8.0](https://github.com/deeprails/deeprails-ruby-sdk/compare/v0.7.0...v0.8.0)
|
|
15
|
+
|
|
16
|
+
### Features
|
|
17
|
+
|
|
18
|
+
* **api:** remove apiresponse from monitor ([c26404c](https://github.com/deeprails/deeprails-ruby-sdk/commit/c26404cf6e750823780141362084731adf50b089))
|
|
19
|
+
* handle thread interrupts in the core HTTP client ([7c7e928](https://github.com/deeprails/deeprails-ruby-sdk/commit/7c7e928f1ec5bb401bce8200ec186e8c080de890))
|
|
20
|
+
|
|
3
21
|
## 0.7.0 (2025-10-22)
|
|
4
22
|
|
|
5
23
|
Full Changelog: [v0.6.0...v0.7.0](https://github.com/deeprails/deeprails-ruby-sdk/compare/v0.6.0...v0.7.0)
|
data/README.md
CHANGED
data/lib/deeprails/client.rb
CHANGED
|
@@ -24,9 +24,6 @@ module Deeprails
|
|
|
24
24
|
# @return [Deeprails::Resources::Monitor]
|
|
25
25
|
attr_reader :monitor
|
|
26
26
|
|
|
27
|
-
# @return [Deeprails::Resources::Evaluate]
|
|
28
|
-
attr_reader :evaluate
|
|
29
|
-
|
|
30
27
|
# @api private
|
|
31
28
|
#
|
|
32
29
|
# @return [Hash{String=>String}]
|
|
@@ -76,7 +73,6 @@ module Deeprails
|
|
|
76
73
|
|
|
77
74
|
@defend = Deeprails::Resources::Defend.new(client: self)
|
|
78
75
|
@monitor = Deeprails::Resources::Monitor.new(client: self)
|
|
79
|
-
@evaluate = Deeprails::Resources::Evaluate.new(client: self)
|
|
80
76
|
end
|
|
81
77
|
end
|
|
82
78
|
end
|
|
@@ -128,40 +128,48 @@ module Deeprails
|
|
|
128
128
|
url, deadline = request.fetch_values(:url, :deadline)
|
|
129
129
|
|
|
130
130
|
req = nil
|
|
131
|
-
eof = false
|
|
132
131
|
finished = false
|
|
133
|
-
closing = nil
|
|
134
132
|
|
|
135
133
|
# rubocop:disable Metrics/BlockLength
|
|
136
134
|
enum = Enumerator.new do |y|
|
|
137
135
|
next if finished
|
|
138
136
|
|
|
139
137
|
with_pool(url, deadline: deadline) do |conn|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
conn.start
|
|
148
|
-
end
|
|
138
|
+
eof = false
|
|
139
|
+
closing = nil
|
|
140
|
+
::Thread.handle_interrupt(Object => :never) do
|
|
141
|
+
::Thread.handle_interrupt(Object => :immediate) do
|
|
142
|
+
req, closing = self.class.build_request(request) do
|
|
143
|
+
self.class.calibrate_socket_timeout(conn, deadline)
|
|
144
|
+
end
|
|
149
145
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
rsp.read_body do |bytes|
|
|
156
|
-
y << bytes.force_encoding(Encoding::BINARY)
|
|
157
|
-
break if finished
|
|
146
|
+
self.class.calibrate_socket_timeout(conn, deadline)
|
|
147
|
+
unless conn.started?
|
|
148
|
+
conn.keep_alive_timeout = self.class::KEEP_ALIVE_TIMEOUT
|
|
149
|
+
conn.start
|
|
150
|
+
end
|
|
158
151
|
|
|
159
152
|
self.class.calibrate_socket_timeout(conn, deadline)
|
|
153
|
+
conn.request(req) do |rsp|
|
|
154
|
+
y << [req, rsp]
|
|
155
|
+
break if finished
|
|
156
|
+
|
|
157
|
+
rsp.read_body do |bytes|
|
|
158
|
+
y << bytes.force_encoding(Encoding::BINARY)
|
|
159
|
+
break if finished
|
|
160
|
+
|
|
161
|
+
self.class.calibrate_socket_timeout(conn, deadline)
|
|
162
|
+
end
|
|
163
|
+
eof = true
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
ensure
|
|
167
|
+
begin
|
|
168
|
+
conn.finish if !eof && conn&.started?
|
|
169
|
+
ensure
|
|
170
|
+
closing&.call
|
|
160
171
|
end
|
|
161
|
-
eof = true
|
|
162
172
|
end
|
|
163
|
-
ensure
|
|
164
|
-
conn.finish if !eof && conn&.started?
|
|
165
173
|
end
|
|
166
174
|
rescue Timeout::Error
|
|
167
175
|
raise Deeprails::Errors::APITimeoutError.new(url: url, request: req)
|
|
@@ -174,8 +182,6 @@ module Deeprails
|
|
|
174
182
|
body = Deeprails::Internal::Util.fused_enum(enum, external: true) do
|
|
175
183
|
finished = true
|
|
176
184
|
loop { enum.next }
|
|
177
|
-
ensure
|
|
178
|
-
closing&.call
|
|
179
185
|
end
|
|
180
186
|
[Integer(response.code), response, body]
|
|
181
187
|
end
|
|
@@ -57,14 +57,14 @@ module Deeprails
|
|
|
57
57
|
# @return [String, nil]
|
|
58
58
|
optional :description, String
|
|
59
59
|
|
|
60
|
-
# @!attribute
|
|
60
|
+
# @!attribute max_improvement_attempts
|
|
61
61
|
# Max. number of improvement action retries until a given event passes the
|
|
62
62
|
# guardrails. Defaults to 10.
|
|
63
63
|
#
|
|
64
64
|
# @return [Integer, nil]
|
|
65
|
-
optional :
|
|
65
|
+
optional :max_improvement_attempts, Integer
|
|
66
66
|
|
|
67
|
-
# @!method initialize(improvement_action:, name:, type:, automatic_hallucination_tolerance_levels: nil, custom_hallucination_threshold_values: nil, description: nil,
|
|
67
|
+
# @!method initialize(improvement_action:, name:, type:, automatic_hallucination_tolerance_levels: nil, custom_hallucination_threshold_values: nil, description: nil, max_improvement_attempts: nil, request_options: {})
|
|
68
68
|
# Some parameter documentations has been truncated, see
|
|
69
69
|
# {Deeprails::Models::DefendCreateWorkflowParams} for more details.
|
|
70
70
|
#
|
|
@@ -80,7 +80,7 @@ module Deeprails
|
|
|
80
80
|
#
|
|
81
81
|
# @param description [String] Description for the workflow.
|
|
82
82
|
#
|
|
83
|
-
# @param
|
|
83
|
+
# @param max_improvement_attempts [Integer] Max. number of improvement action retries until a given event passes the guardra
|
|
84
84
|
#
|
|
85
85
|
# @param request_options [Deeprails::RequestOptions, Hash{Symbol=>Object}]
|
|
86
86
|
|
|
@@ -38,12 +38,12 @@ module Deeprails
|
|
|
38
38
|
# @return [Symbol, Deeprails::Models::DefendResponse::ImprovementAction, nil]
|
|
39
39
|
optional :improvement_action, enum: -> { Deeprails::DefendResponse::ImprovementAction }
|
|
40
40
|
|
|
41
|
-
# @!attribute
|
|
41
|
+
# @!attribute max_improvement_attempts
|
|
42
42
|
# Max. number of improvement action retries until a given event passes the
|
|
43
43
|
# guardrails.
|
|
44
44
|
#
|
|
45
45
|
# @return [Integer, nil]
|
|
46
|
-
optional :
|
|
46
|
+
optional :max_improvement_attempts, Integer
|
|
47
47
|
|
|
48
48
|
# @!attribute modified_at
|
|
49
49
|
# The most recent time the workflow was modified in UTC.
|
|
@@ -64,7 +64,7 @@ module Deeprails
|
|
|
64
64
|
# @return [Float, nil]
|
|
65
65
|
optional :success_rate, Float
|
|
66
66
|
|
|
67
|
-
# @!method initialize(name:, workflow_id:, created_at: nil, description: nil, improvement_action: nil,
|
|
67
|
+
# @!method initialize(name:, workflow_id:, created_at: nil, description: nil, improvement_action: nil, max_improvement_attempts: nil, modified_at: nil, status: nil, success_rate: nil)
|
|
68
68
|
# Some parameter documentations has been truncated, see
|
|
69
69
|
# {Deeprails::Models::DefendResponse} for more details.
|
|
70
70
|
#
|
|
@@ -78,7 +78,7 @@ module Deeprails
|
|
|
78
78
|
#
|
|
79
79
|
# @param improvement_action [Symbol, Deeprails::Models::DefendResponse::ImprovementAction] The action used to improve outputs that fail one or more guardrail metrics for t
|
|
80
80
|
#
|
|
81
|
-
# @param
|
|
81
|
+
# @param max_improvement_attempts [Integer] Max. number of improvement action retries until a given event passes the guardra
|
|
82
82
|
#
|
|
83
83
|
# @param modified_at [Time] The most recent time the workflow was modified in UTC.
|
|
84
84
|
#
|
|
@@ -9,7 +9,7 @@ module Deeprails
|
|
|
9
9
|
|
|
10
10
|
# @!attribute model_input
|
|
11
11
|
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
12
|
-
# contain at least `user_prompt` or `system_prompt` field. For the
|
|
12
|
+
# contain at least a `user_prompt` field or a `system_prompt` field. For the
|
|
13
13
|
# ground_truth_adherence guardrail metric, `ground_truth` should be provided.
|
|
14
14
|
#
|
|
15
15
|
# @return [Deeprails::Models::DefendSubmitEventParams::ModelInput]
|
|
@@ -60,7 +60,7 @@ module Deeprails
|
|
|
60
60
|
|
|
61
61
|
class ModelInput < Deeprails::Internal::Type::BaseModel
|
|
62
62
|
# @!attribute ground_truth
|
|
63
|
-
# The ground truth for evaluating Ground Truth Adherence guardrail.
|
|
63
|
+
# The ground truth for evaluating the Ground Truth Adherence guardrail.
|
|
64
64
|
#
|
|
65
65
|
# @return [String, nil]
|
|
66
66
|
optional :ground_truth, String
|
|
@@ -79,10 +79,10 @@ module Deeprails
|
|
|
79
79
|
|
|
80
80
|
# @!method initialize(ground_truth: nil, system_prompt: nil, user_prompt: nil)
|
|
81
81
|
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
82
|
-
# contain at least `user_prompt` or `system_prompt` field. For the
|
|
82
|
+
# contain at least a `user_prompt` field or a `system_prompt` field. For the
|
|
83
83
|
# ground_truth_adherence guardrail metric, `ground_truth` should be provided.
|
|
84
84
|
#
|
|
85
|
-
# @param ground_truth [String] The ground truth for evaluating Ground Truth Adherence guardrail.
|
|
85
|
+
# @param ground_truth [String] The ground truth for evaluating the Ground Truth Adherence guardrail.
|
|
86
86
|
#
|
|
87
87
|
# @param system_prompt [String] The system prompt used to generate the output.
|
|
88
88
|
#
|
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Deeprails
|
|
4
|
+
module Models
|
|
5
|
+
# @see Deeprails::Resources::Monitor#retrieve
|
|
6
|
+
class MonitorDetailResponse < Deeprails::Internal::Type::BaseModel
|
|
7
|
+
# @!attribute monitor_id
|
|
8
|
+
# A unique monitor ID.
|
|
9
|
+
#
|
|
10
|
+
# @return [String]
|
|
11
|
+
required :monitor_id, String
|
|
12
|
+
|
|
13
|
+
# @!attribute monitor_status
|
|
14
|
+
# Status of the monitor. Can be `active` or `inactive`. Inactive monitors no
|
|
15
|
+
# longer record and evaluate events.
|
|
16
|
+
#
|
|
17
|
+
# @return [Symbol, Deeprails::Models::MonitorDetailResponse::MonitorStatus]
|
|
18
|
+
required :monitor_status, enum: -> { Deeprails::MonitorDetailResponse::MonitorStatus }
|
|
19
|
+
|
|
20
|
+
# @!attribute name
|
|
21
|
+
# Name of this monitor.
|
|
22
|
+
#
|
|
23
|
+
# @return [String]
|
|
24
|
+
required :name, String
|
|
25
|
+
|
|
26
|
+
# @!attribute created_at
|
|
27
|
+
# The time the monitor was created in UTC.
|
|
28
|
+
#
|
|
29
|
+
# @return [Time, nil]
|
|
30
|
+
optional :created_at, Time
|
|
31
|
+
|
|
32
|
+
# @!attribute description
|
|
33
|
+
# Description of this monitor.
|
|
34
|
+
#
|
|
35
|
+
# @return [String, nil]
|
|
36
|
+
optional :description, String
|
|
37
|
+
|
|
38
|
+
# @!attribute evaluations
|
|
39
|
+
# An array of all evaluations performed by this monitor. Each one corresponds to a
|
|
40
|
+
# separate monitor event.
|
|
41
|
+
#
|
|
42
|
+
# @return [Array<Deeprails::Models::MonitorDetailResponse::Evaluation>, nil]
|
|
43
|
+
optional :evaluations,
|
|
44
|
+
-> { Deeprails::Internal::Type::ArrayOf[Deeprails::MonitorDetailResponse::Evaluation] }
|
|
45
|
+
|
|
46
|
+
# @!attribute stats
|
|
47
|
+
# Contains five fields used for stats of this monitor: total evaluations,
|
|
48
|
+
# completed evaluations, failed evaluations, queued evaluations, and in progress
|
|
49
|
+
# evaluations.
|
|
50
|
+
#
|
|
51
|
+
# @return [Deeprails::Models::MonitorDetailResponse::Stats, nil]
|
|
52
|
+
optional :stats, -> { Deeprails::MonitorDetailResponse::Stats }
|
|
53
|
+
|
|
54
|
+
# @!attribute updated_at
|
|
55
|
+
# The most recent time the monitor was modified in UTC.
|
|
56
|
+
#
|
|
57
|
+
# @return [Time, nil]
|
|
58
|
+
optional :updated_at, Time
|
|
59
|
+
|
|
60
|
+
# @!attribute user_id
|
|
61
|
+
# User ID of the user who created the monitor.
|
|
62
|
+
#
|
|
63
|
+
# @return [String, nil]
|
|
64
|
+
optional :user_id, String
|
|
65
|
+
|
|
66
|
+
# @!method initialize(monitor_id:, monitor_status:, name:, created_at: nil, description: nil, evaluations: nil, stats: nil, updated_at: nil, user_id: nil)
|
|
67
|
+
# Some parameter documentations has been truncated, see
|
|
68
|
+
# {Deeprails::Models::MonitorDetailResponse} for more details.
|
|
69
|
+
#
|
|
70
|
+
# @param monitor_id [String] A unique monitor ID.
|
|
71
|
+
#
|
|
72
|
+
# @param monitor_status [Symbol, Deeprails::Models::MonitorDetailResponse::MonitorStatus] Status of the monitor. Can be `active` or `inactive`. Inactive monitors no lon
|
|
73
|
+
#
|
|
74
|
+
# @param name [String] Name of this monitor.
|
|
75
|
+
#
|
|
76
|
+
# @param created_at [Time] The time the monitor was created in UTC.
|
|
77
|
+
#
|
|
78
|
+
# @param description [String] Description of this monitor.
|
|
79
|
+
#
|
|
80
|
+
# @param evaluations [Array<Deeprails::Models::MonitorDetailResponse::Evaluation>] An array of all evaluations performed by this monitor. Each one corresponds to
|
|
81
|
+
#
|
|
82
|
+
# @param stats [Deeprails::Models::MonitorDetailResponse::Stats] Contains five fields used for stats of this monitor: total evaluations, complete
|
|
83
|
+
#
|
|
84
|
+
# @param updated_at [Time] The most recent time the monitor was modified in UTC.
|
|
85
|
+
#
|
|
86
|
+
# @param user_id [String] User ID of the user who created the monitor.
|
|
87
|
+
|
|
88
|
+
# Status of the monitor. Can be `active` or `inactive`. Inactive monitors no
|
|
89
|
+
# longer record and evaluate events.
|
|
90
|
+
#
|
|
91
|
+
# @see Deeprails::Models::MonitorDetailResponse#monitor_status
|
|
92
|
+
module MonitorStatus
|
|
93
|
+
extend Deeprails::Internal::Type::Enum
|
|
94
|
+
|
|
95
|
+
ACTIVE = :active
|
|
96
|
+
INACTIVE = :inactive
|
|
97
|
+
|
|
98
|
+
# @!method self.values
|
|
99
|
+
# @return [Array<Symbol>]
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
class Evaluation < Deeprails::Internal::Type::BaseModel
|
|
103
|
+
# @!attribute eval_id
|
|
104
|
+
# A unique evaluation ID.
|
|
105
|
+
#
|
|
106
|
+
# @return [String]
|
|
107
|
+
required :eval_id, String
|
|
108
|
+
|
|
109
|
+
# @!attribute evaluation_status
|
|
110
|
+
# Status of the evaluation.
|
|
111
|
+
#
|
|
112
|
+
# @return [Symbol, Deeprails::Models::MonitorDetailResponse::Evaluation::EvaluationStatus]
|
|
113
|
+
required :evaluation_status, enum: -> { Deeprails::MonitorDetailResponse::Evaluation::EvaluationStatus }
|
|
114
|
+
|
|
115
|
+
# @!attribute model_input
|
|
116
|
+
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
117
|
+
# contain at least a `user_prompt` field or a `system_prompt` field. For
|
|
118
|
+
# ground_truth_adherence guardrail metric, `ground_truth` should be provided.
|
|
119
|
+
#
|
|
120
|
+
# @return [Deeprails::Models::MonitorDetailResponse::Evaluation::ModelInput]
|
|
121
|
+
required :model_input, -> { Deeprails::MonitorDetailResponse::Evaluation::ModelInput }
|
|
122
|
+
|
|
123
|
+
# @!attribute model_output
|
|
124
|
+
# Output generated by the LLM to be evaluated.
|
|
125
|
+
#
|
|
126
|
+
# @return [String]
|
|
127
|
+
required :model_output, String
|
|
128
|
+
|
|
129
|
+
# @!attribute run_mode
|
|
130
|
+
# Run mode for the evaluation. The run mode allows the user to optimize for speed,
|
|
131
|
+
# accuracy, and cost by determining which models are used to evaluate the event.
|
|
132
|
+
#
|
|
133
|
+
# @return [Symbol, Deeprails::Models::MonitorDetailResponse::Evaluation::RunMode]
|
|
134
|
+
required :run_mode, enum: -> { Deeprails::MonitorDetailResponse::Evaluation::RunMode }
|
|
135
|
+
|
|
136
|
+
# @!attribute created_at
|
|
137
|
+
# The time the evaluation was created in UTC.
|
|
138
|
+
#
|
|
139
|
+
# @return [Time, nil]
|
|
140
|
+
optional :created_at, Time
|
|
141
|
+
|
|
142
|
+
# @!attribute end_timestamp
|
|
143
|
+
# The time the evaluation completed in UTC.
|
|
144
|
+
#
|
|
145
|
+
# @return [Time, nil]
|
|
146
|
+
optional :end_timestamp, Time
|
|
147
|
+
|
|
148
|
+
# @!attribute error_message
|
|
149
|
+
# Description of the error causing the evaluation to fail, if any.
|
|
150
|
+
#
|
|
151
|
+
# @return [String, nil]
|
|
152
|
+
optional :error_message, String
|
|
153
|
+
|
|
154
|
+
# @!attribute error_timestamp
|
|
155
|
+
# The time the error causing the evaluation to fail was recorded.
|
|
156
|
+
#
|
|
157
|
+
# @return [Time, nil]
|
|
158
|
+
optional :error_timestamp, Time
|
|
159
|
+
|
|
160
|
+
# @!attribute evaluation_result
|
|
161
|
+
# Evaluation result consisting of average scores and rationales for each of the
|
|
162
|
+
# evaluated guardrail metrics.
|
|
163
|
+
#
|
|
164
|
+
# @return [Hash{Symbol=>Object}, nil]
|
|
165
|
+
optional :evaluation_result, Deeprails::Internal::Type::HashOf[Deeprails::Internal::Type::Unknown]
|
|
166
|
+
|
|
167
|
+
# @!attribute evaluation_total_cost
|
|
168
|
+
# Total cost of the evaluation.
|
|
169
|
+
#
|
|
170
|
+
# @return [Float, nil]
|
|
171
|
+
optional :evaluation_total_cost, Float
|
|
172
|
+
|
|
173
|
+
# @!attribute guardrail_metrics
|
|
174
|
+
# An array of guardrail metrics that the model input and output pair will be
|
|
175
|
+
# evaluated on.
|
|
176
|
+
#
|
|
177
|
+
# @return [Array<Symbol, Deeprails::Models::MonitorDetailResponse::Evaluation::GuardrailMetric>, nil]
|
|
178
|
+
optional :guardrail_metrics,
|
|
179
|
+
-> { Deeprails::Internal::Type::ArrayOf[enum: Deeprails::MonitorDetailResponse::Evaluation::GuardrailMetric] }
|
|
180
|
+
|
|
181
|
+
# @!attribute model_used
|
|
182
|
+
# Model ID used to generate the output, like `gpt-4o` or `o3`.
|
|
183
|
+
#
|
|
184
|
+
# @return [String, nil]
|
|
185
|
+
optional :model_used, String
|
|
186
|
+
|
|
187
|
+
# @!attribute modified_at
|
|
188
|
+
# The most recent time the evaluation was modified in UTC.
|
|
189
|
+
#
|
|
190
|
+
# @return [Time, nil]
|
|
191
|
+
optional :modified_at, Time
|
|
192
|
+
|
|
193
|
+
# @!attribute nametag
|
|
194
|
+
# An optional, user-defined tag for the evaluation.
|
|
195
|
+
#
|
|
196
|
+
# @return [String, nil]
|
|
197
|
+
optional :nametag, String
|
|
198
|
+
|
|
199
|
+
# @!attribute progress
|
|
200
|
+
# Evaluation progress. Values range between 0 and 100; 100 corresponds to a
|
|
201
|
+
# completed `evaluation_status`.
|
|
202
|
+
#
|
|
203
|
+
# @return [Integer, nil]
|
|
204
|
+
optional :progress, Integer
|
|
205
|
+
|
|
206
|
+
# @!attribute start_timestamp
|
|
207
|
+
# The time the evaluation started in UTC.
|
|
208
|
+
#
|
|
209
|
+
# @return [Time, nil]
|
|
210
|
+
optional :start_timestamp, Time
|
|
211
|
+
|
|
212
|
+
# @!method initialize(eval_id:, evaluation_status:, model_input:, model_output:, run_mode:, created_at: nil, end_timestamp: nil, error_message: nil, error_timestamp: nil, evaluation_result: nil, evaluation_total_cost: nil, guardrail_metrics: nil, model_used: nil, modified_at: nil, nametag: nil, progress: nil, start_timestamp: nil)
|
|
213
|
+
# Some parameter documentations has been truncated, see
|
|
214
|
+
# {Deeprails::Models::MonitorDetailResponse::Evaluation} for more details.
|
|
215
|
+
#
|
|
216
|
+
# @param eval_id [String] A unique evaluation ID.
|
|
217
|
+
#
|
|
218
|
+
# @param evaluation_status [Symbol, Deeprails::Models::MonitorDetailResponse::Evaluation::EvaluationStatus] Status of the evaluation.
|
|
219
|
+
#
|
|
220
|
+
# @param model_input [Deeprails::Models::MonitorDetailResponse::Evaluation::ModelInput] A dictionary of inputs sent to the LLM to generate output. The dictionary must c
|
|
221
|
+
#
|
|
222
|
+
# @param model_output [String] Output generated by the LLM to be evaluated.
|
|
223
|
+
#
|
|
224
|
+
# @param run_mode [Symbol, Deeprails::Models::MonitorDetailResponse::Evaluation::RunMode] Run mode for the evaluation. The run mode allows the user to optimize for speed
|
|
225
|
+
#
|
|
226
|
+
# @param created_at [Time] The time the evaluation was created in UTC.
|
|
227
|
+
#
|
|
228
|
+
# @param end_timestamp [Time] The time the evaluation completed in UTC.
|
|
229
|
+
#
|
|
230
|
+
# @param error_message [String] Description of the error causing the evaluation to fail, if any.
|
|
231
|
+
#
|
|
232
|
+
# @param error_timestamp [Time] The time the error causing the evaluation to fail was recorded.
|
|
233
|
+
#
|
|
234
|
+
# @param evaluation_result [Hash{Symbol=>Object}] Evaluation result consisting of average scores and rationales for each of the ev
|
|
235
|
+
#
|
|
236
|
+
# @param evaluation_total_cost [Float] Total cost of the evaluation.
|
|
237
|
+
#
|
|
238
|
+
# @param guardrail_metrics [Array<Symbol, Deeprails::Models::MonitorDetailResponse::Evaluation::GuardrailMetric>] An array of guardrail metrics that the model input and output pair will be evalu
|
|
239
|
+
#
|
|
240
|
+
# @param model_used [String] Model ID used to generate the output, like `gpt-4o` or `o3`.
|
|
241
|
+
#
|
|
242
|
+
# @param modified_at [Time] The most recent time the evaluation was modified in UTC.
|
|
243
|
+
#
|
|
244
|
+
# @param nametag [String] An optional, user-defined tag for the evaluation.
|
|
245
|
+
#
|
|
246
|
+
# @param progress [Integer] Evaluation progress. Values range between 0 and 100; 100 corresponds to a compl
|
|
247
|
+
#
|
|
248
|
+
# @param start_timestamp [Time] The time the evaluation started in UTC.
|
|
249
|
+
|
|
250
|
+
# Status of the evaluation.
|
|
251
|
+
#
|
|
252
|
+
# @see Deeprails::Models::MonitorDetailResponse::Evaluation#evaluation_status
|
|
253
|
+
module EvaluationStatus
|
|
254
|
+
extend Deeprails::Internal::Type::Enum
|
|
255
|
+
|
|
256
|
+
IN_PROGRESS = :in_progress
|
|
257
|
+
COMPLETED = :completed
|
|
258
|
+
CANCELED = :canceled
|
|
259
|
+
QUEUED = :queued
|
|
260
|
+
FAILED = :failed
|
|
261
|
+
|
|
262
|
+
# @!method self.values
|
|
263
|
+
# @return [Array<Symbol>]
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
# @see Deeprails::Models::MonitorDetailResponse::Evaluation#model_input
|
|
267
|
+
class ModelInput < Deeprails::Internal::Type::BaseModel
|
|
268
|
+
# @!attribute ground_truth
|
|
269
|
+
# The ground truth for evaluating Ground Truth Adherence guardrail.
|
|
270
|
+
#
|
|
271
|
+
# @return [String, nil]
|
|
272
|
+
optional :ground_truth, String
|
|
273
|
+
|
|
274
|
+
# @!attribute system_prompt
|
|
275
|
+
# The system prompt used to generate the output.
|
|
276
|
+
#
|
|
277
|
+
# @return [String, nil]
|
|
278
|
+
optional :system_prompt, String
|
|
279
|
+
|
|
280
|
+
# @!attribute user_prompt
|
|
281
|
+
# The user prompt used to generate the output.
|
|
282
|
+
#
|
|
283
|
+
# @return [String, nil]
|
|
284
|
+
optional :user_prompt, String
|
|
285
|
+
|
|
286
|
+
# @!method initialize(ground_truth: nil, system_prompt: nil, user_prompt: nil)
|
|
287
|
+
# A dictionary of inputs sent to the LLM to generate output. The dictionary must
|
|
288
|
+
# contain at least a `user_prompt` field or a `system_prompt` field. For
|
|
289
|
+
# ground_truth_adherence guardrail metric, `ground_truth` should be provided.
|
|
290
|
+
#
|
|
291
|
+
# @param ground_truth [String] The ground truth for evaluating Ground Truth Adherence guardrail.
|
|
292
|
+
#
|
|
293
|
+
# @param system_prompt [String] The system prompt used to generate the output.
|
|
294
|
+
#
|
|
295
|
+
# @param user_prompt [String] The user prompt used to generate the output.
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
# Run mode for the evaluation. The run mode allows the user to optimize for speed,
|
|
299
|
+
# accuracy, and cost by determining which models are used to evaluate the event.
|
|
300
|
+
#
|
|
301
|
+
# @see Deeprails::Models::MonitorDetailResponse::Evaluation#run_mode
|
|
302
|
+
module RunMode
|
|
303
|
+
extend Deeprails::Internal::Type::Enum
|
|
304
|
+
|
|
305
|
+
PRECISION_PLUS = :precision_plus
|
|
306
|
+
PRECISION = :precision
|
|
307
|
+
SMART = :smart
|
|
308
|
+
ECONOMY = :economy
|
|
309
|
+
|
|
310
|
+
# @!method self.values
|
|
311
|
+
# @return [Array<Symbol>]
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
module GuardrailMetric
|
|
315
|
+
extend Deeprails::Internal::Type::Enum
|
|
316
|
+
|
|
317
|
+
CORRECTNESS = :correctness
|
|
318
|
+
COMPLETENESS = :completeness
|
|
319
|
+
INSTRUCTION_ADHERENCE = :instruction_adherence
|
|
320
|
+
CONTEXT_ADHERENCE = :context_adherence
|
|
321
|
+
GROUND_TRUTH_ADHERENCE = :ground_truth_adherence
|
|
322
|
+
COMPREHENSIVE_SAFETY = :comprehensive_safety
|
|
323
|
+
|
|
324
|
+
# @!method self.values
|
|
325
|
+
# @return [Array<Symbol>]
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
# @see Deeprails::Models::MonitorDetailResponse#stats
|
|
330
|
+
class Stats < Deeprails::Internal::Type::BaseModel
|
|
331
|
+
# @!attribute completed_evaluations
|
|
332
|
+
# Number of evaluations that completed successfully.
|
|
333
|
+
#
|
|
334
|
+
# @return [Integer, nil]
|
|
335
|
+
optional :completed_evaluations, Integer
|
|
336
|
+
|
|
337
|
+
# @!attribute failed_evaluations
|
|
338
|
+
# Number of evaluations that failed.
|
|
339
|
+
#
|
|
340
|
+
# @return [Integer, nil]
|
|
341
|
+
optional :failed_evaluations, Integer
|
|
342
|
+
|
|
343
|
+
# @!attribute in_progress_evaluations
|
|
344
|
+
# Number of evaluations currently in progress.
|
|
345
|
+
#
|
|
346
|
+
# @return [Integer, nil]
|
|
347
|
+
optional :in_progress_evaluations, Integer
|
|
348
|
+
|
|
349
|
+
# @!attribute queued_evaluations
|
|
350
|
+
# Number of evaluations currently queued.
|
|
351
|
+
#
|
|
352
|
+
# @return [Integer, nil]
|
|
353
|
+
optional :queued_evaluations, Integer
|
|
354
|
+
|
|
355
|
+
# @!attribute total_evaluations
|
|
356
|
+
# Total number of evaluations performed by this monitor.
|
|
357
|
+
#
|
|
358
|
+
# @return [Integer, nil]
|
|
359
|
+
optional :total_evaluations, Integer
|
|
360
|
+
|
|
361
|
+
# @!method initialize(completed_evaluations: nil, failed_evaluations: nil, in_progress_evaluations: nil, queued_evaluations: nil, total_evaluations: nil)
|
|
362
|
+
# Contains five fields used for stats of this monitor: total evaluations,
|
|
363
|
+
# completed evaluations, failed evaluations, queued evaluations, and in progress
|
|
364
|
+
# evaluations.
|
|
365
|
+
#
|
|
366
|
+
# @param completed_evaluations [Integer] Number of evaluations that completed successfully.
|
|
367
|
+
#
|
|
368
|
+
# @param failed_evaluations [Integer] Number of evaluations that failed.
|
|
369
|
+
#
|
|
370
|
+
# @param in_progress_evaluations [Integer] Number of evaluations currently in progress.
|
|
371
|
+
#
|
|
372
|
+
# @param queued_evaluations [Integer] Number of evaluations currently queued.
|
|
373
|
+
#
|
|
374
|
+
# @param total_evaluations [Integer] Total number of evaluations performed by this monitor.
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
end
|
|
378
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Deeprails
|
|
4
|
+
module Models
|
|
5
|
+
# @see Deeprails::Resources::Monitor#submit_event
|
|
6
|
+
class MonitorEventResponse < Deeprails::Internal::Type::BaseModel
|
|
7
|
+
# @!attribute evaluation_id
|
|
8
|
+
# A unique evaluation ID associated with this event.
|
|
9
|
+
#
|
|
10
|
+
# @return [String]
|
|
11
|
+
required :evaluation_id, String
|
|
12
|
+
|
|
13
|
+
# @!attribute event_id
|
|
14
|
+
# A unique monitor event ID.
|
|
15
|
+
#
|
|
16
|
+
# @return [String]
|
|
17
|
+
required :event_id, String
|
|
18
|
+
|
|
19
|
+
# @!attribute monitor_id
|
|
20
|
+
# Monitor ID associated with this event.
|
|
21
|
+
#
|
|
22
|
+
# @return [String]
|
|
23
|
+
required :monitor_id, String
|
|
24
|
+
|
|
25
|
+
# @!attribute created_at
|
|
26
|
+
# The time the monitor event was created in UTC.
|
|
27
|
+
#
|
|
28
|
+
# @return [Time, nil]
|
|
29
|
+
optional :created_at, Time
|
|
30
|
+
|
|
31
|
+
# @!method initialize(evaluation_id:, event_id:, monitor_id:, created_at: nil)
|
|
32
|
+
# @param evaluation_id [String] A unique evaluation ID associated with this event.
|
|
33
|
+
#
|
|
34
|
+
# @param event_id [String] A unique monitor event ID.
|
|
35
|
+
#
|
|
36
|
+
# @param monitor_id [String] Monitor ID associated with this event.
|
|
37
|
+
#
|
|
38
|
+
# @param created_at [Time] The time the monitor event was created in UTC.
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|