rubyllm-observ 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +54 -6
- data/app/assets/stylesheets/observ/_annotations.scss +114 -103
- data/app/assets/stylesheets/observ/_card.scss +58 -49
- data/app/assets/stylesheets/observ/_chat.scss +247 -155
- data/app/assets/stylesheets/observ/_components.scss +622 -340
- data/app/assets/stylesheets/observ/_dashboard.scss +31 -28
- data/app/assets/stylesheets/observ/_datasets.scss +494 -547
- data/app/assets/stylesheets/observ/_drawer.scss +250 -228
- data/app/assets/stylesheets/observ/_filters.scss +139 -0
- data/app/assets/stylesheets/observ/_json_viewer.scss +103 -97
- data/app/assets/stylesheets/observ/_layout.scss +443 -178
- data/app/assets/stylesheets/observ/_metrics.scss +79 -76
- data/app/assets/stylesheets/observ/_namespace.scss +18 -0
- data/app/assets/stylesheets/observ/_observations.scss +122 -119
- data/app/assets/stylesheets/observ/_pagination.scss +129 -112
- data/app/assets/stylesheets/observ/_prompts.scss +485 -269
- data/app/assets/stylesheets/observ/_reset.scss +249 -0
- data/app/assets/stylesheets/observ/_table.scss +46 -38
- data/app/assets/stylesheets/observ/_variables.scss +54 -0
- data/app/assets/stylesheets/observ/application.scss +3 -0
- data/app/controllers/observ/dataset_run_items_controller.rb +0 -1
- data/app/controllers/observ/review_queue_controller.rb +154 -0
- data/app/controllers/observ/scores_controller.rb +64 -0
- data/app/controllers/observ/sessions_controller.rb +23 -0
- data/app/helpers/observ/application_helper.rb +1 -0
- data/app/helpers/observ/reviews_helper.rb +33 -0
- data/app/models/concerns/observ/json_queryable.rb +138 -0
- data/app/models/concerns/observ/reviewable.rb +41 -0
- data/app/models/concerns/observ/scoreable.rb +34 -0
- data/app/models/observ/dataset_run_item.rb +3 -13
- data/app/models/observ/review_item.rb +48 -0
- data/app/models/observ/score.rb +38 -6
- data/app/models/observ/session.rb +5 -1
- data/app/models/observ/trace.rb +3 -0
- data/app/services/observ/evaluators/base_evaluator.rb +0 -1
- data/app/services/observ/guardrail_service.rb +128 -0
- data/app/views/kaminari/_first_page.html.erb +1 -1
- data/app/views/kaminari/_gap.html.erb +1 -1
- data/app/views/kaminari/_last_page.html.erb +1 -1
- data/app/views/kaminari/_next_page.html.erb +1 -1
- data/app/views/kaminari/_page.html.erb +1 -1
- data/app/views/kaminari/_paginator.html.erb +1 -1
- data/app/views/kaminari/_prev_page.html.erb +1 -1
- data/app/views/kaminari/observ/_first_page.html.erb +1 -1
- data/app/views/kaminari/observ/_gap.html.erb +1 -1
- data/app/views/kaminari/observ/_last_page.html.erb +1 -1
- data/app/views/kaminari/observ/_next_page.html.erb +1 -1
- data/app/views/kaminari/observ/_page.html.erb +1 -1
- data/app/views/kaminari/observ/_paginator.html.erb +1 -1
- data/app/views/kaminari/observ/_prev_page.html.erb +1 -1
- data/app/views/layouts/observ/application.html.erb +96 -58
- data/app/views/observ/annotations/_form.html.erb +5 -5
- data/app/views/observ/annotations/index.html.erb +4 -4
- data/app/views/observ/annotations/sessions_index.html.erb +9 -9
- data/app/views/observ/annotations/traces_index.html.erb +9 -9
- data/app/views/observ/chats/_form.html.erb +7 -7
- data/app/views/observ/datasets/index.html.erb +6 -6
- data/app/views/observ/messages/_form.html.erb +11 -12
- data/app/views/observ/observations/index.html.erb +3 -4
- data/app/views/observ/prompts/_form.html.erb +37 -38
- data/app/views/observ/prompts/_new_form.html.erb +37 -38
- data/app/views/observ/prompts/compare.html.erb +59 -55
- data/app/views/observ/prompts/edit.html.erb +3 -3
- data/app/views/observ/prompts/index.html.erb +9 -9
- data/app/views/observ/prompts/new.html.erb +3 -3
- data/app/views/observ/prompts/show.html.erb +2 -2
- data/app/views/observ/prompts/versions.html.erb +22 -22
- data/app/views/observ/review_queue/_item.html.erb +39 -0
- data/app/views/observ/review_queue/_stats.html.erb +18 -0
- data/app/views/observ/review_queue/index.html.erb +49 -0
- data/app/views/observ/review_queue/show.html.erb +76 -0
- data/app/views/observ/review_queue/stats.html.erb +100 -0
- data/app/views/observ/scores/_form.html.erb +39 -0
- data/app/views/observ/scores/create.turbo_stream.erb +10 -0
- data/app/views/observ/sessions/_chat.html.erb +59 -0
- data/app/views/observ/sessions/_metadata.html.erb +17 -0
- data/app/views/observ/sessions/_metrics.html.erb +81 -0
- data/app/views/observ/sessions/_traces.html.erb +92 -0
- data/app/views/observ/sessions/annotations_drawer.turbo_stream.erb +8 -1
- data/app/views/observ/sessions/index.html.erb +60 -4
- data/app/views/observ/sessions/show.html.erb +4 -217
- data/app/views/observ/traces/_details.html.erb +47 -0
- data/app/views/observ/traces/_input.html.erb +10 -0
- data/app/views/observ/traces/_metadata.html.erb +10 -0
- data/app/views/observ/traces/_observations.html.erb +172 -0
- data/app/views/observ/traces/_output.html.erb +10 -0
- data/app/views/observ/traces/annotations_drawer.turbo_stream.erb +8 -1
- data/app/views/observ/traces/index.html.erb +3 -4
- data/app/views/observ/traces/show.html.erb +5 -232
- data/config/routes.rb +14 -0
- data/db/migrate/015_refactor_scores_to_polymorphic.rb +27 -0
- data/db/migrate/016_create_observ_review_items.rb +25 -0
- data/lib/observ/version.rb +1 -1
- data/lib/rubyllm-observ.rb +1 -0
- metadata +31 -1
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
module ReviewsHelper
|
|
5
|
+
# Returns the appropriate badge class for a priority level
|
|
6
|
+
def priority_badge_class(priority)
|
|
7
|
+
case priority.to_s
|
|
8
|
+
when "critical"
|
|
9
|
+
"observ-badge--danger"
|
|
10
|
+
when "high"
|
|
11
|
+
"observ-badge--warning"
|
|
12
|
+
else
|
|
13
|
+
"observ-badge--secondary"
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Returns the appropriate badge class for a review status
|
|
18
|
+
def review_status_badge_class(status)
|
|
19
|
+
case status.to_s
|
|
20
|
+
when "pending"
|
|
21
|
+
"observ-badge--default"
|
|
22
|
+
when "in_progress"
|
|
23
|
+
"observ-badge--info"
|
|
24
|
+
when "completed"
|
|
25
|
+
"observ-badge--success"
|
|
26
|
+
when "skipped"
|
|
27
|
+
"observ-badge--secondary"
|
|
28
|
+
else
|
|
29
|
+
"observ-badge--default"
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
# Provides database-agnostic JSON querying capabilities.
|
|
5
|
+
#
|
|
6
|
+
# This concern allows models with JSON/JSONB columns to query nested
|
|
7
|
+
# JSON fields in a way that works across different database adapters
|
|
8
|
+
# (PostgreSQL, SQLite, MySQL, etc.)
|
|
9
|
+
#
|
|
10
|
+
# Usage:
|
|
11
|
+
# class Session < ApplicationRecord
|
|
12
|
+
# include Observ::JsonQueryable
|
|
13
|
+
# end
|
|
14
|
+
#
|
|
15
|
+
# # Query by JSON field
|
|
16
|
+
# Session.where_json(:metadata, :agent_type, "MyAgent")
|
|
17
|
+
# Session.where_json(:metadata, "nested.path", "value")
|
|
18
|
+
#
|
|
19
|
+
module JsonQueryable
|
|
20
|
+
extend ActiveSupport::Concern
|
|
21
|
+
|
|
22
|
+
class_methods do
|
|
23
|
+
# Query records where a JSON column's nested field equals a value.
|
|
24
|
+
#
|
|
25
|
+
# @param column [Symbol] The JSON column name
|
|
26
|
+
# @param path [String, Symbol] The JSON path (e.g., "agent_type" or "nested.path")
|
|
27
|
+
# @param value [String, Integer, Boolean] The value to match
|
|
28
|
+
# @return [ActiveRecord::Relation]
|
|
29
|
+
#
|
|
30
|
+
# @example Simple path
|
|
31
|
+
# Session.where_json(:metadata, :agent_type, "MyAgent")
|
|
32
|
+
#
|
|
33
|
+
# @example Nested path
|
|
34
|
+
# Session.where_json(:metadata, "config.mode", "production")
|
|
35
|
+
#
|
|
36
|
+
def where_json(column, path, value)
|
|
37
|
+
json_query = JsonQuery.new(connection, table_name, column, path)
|
|
38
|
+
where(json_query.to_sql, value)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Query records where a JSON column's nested field is not null.
|
|
42
|
+
#
|
|
43
|
+
# @param column [Symbol] The JSON column name
|
|
44
|
+
# @param path [String, Symbol] The JSON path
|
|
45
|
+
# @return [ActiveRecord::Relation]
|
|
46
|
+
#
|
|
47
|
+
def where_json_present(column, path)
|
|
48
|
+
json_query = JsonQuery.new(connection, table_name, column, path)
|
|
49
|
+
where("#{json_query.extract_sql} IS NOT NULL")
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Pluck values from a JSON column's nested field.
|
|
53
|
+
#
|
|
54
|
+
# @param column [Symbol] The JSON column name
|
|
55
|
+
# @param path [String, Symbol] The JSON path
|
|
56
|
+
# @return [Array]
|
|
57
|
+
#
|
|
58
|
+
def pluck_json(column, path)
|
|
59
|
+
json_query = JsonQuery.new(connection, table_name, column, path)
|
|
60
|
+
pluck(Arel.sql(json_query.extract_sql))
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Internal class that generates database-specific SQL for JSON queries.
|
|
65
|
+
class JsonQuery
|
|
66
|
+
attr_reader :connection, :table_name, :column, :path
|
|
67
|
+
|
|
68
|
+
def initialize(connection, table_name, column, path)
|
|
69
|
+
@connection = connection
|
|
70
|
+
@table_name = table_name
|
|
71
|
+
@column = column.to_s
|
|
72
|
+
@path = path.to_s
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Returns SQL fragment for WHERE clause comparison (with placeholder)
|
|
76
|
+
def to_sql
|
|
77
|
+
"#{extract_sql} = ?"
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Returns SQL fragment for extracting the JSON value
|
|
81
|
+
def extract_sql
|
|
82
|
+
case adapter_name
|
|
83
|
+
when /postgresql/i
|
|
84
|
+
postgresql_extract
|
|
85
|
+
when /sqlite/i
|
|
86
|
+
sqlite_extract
|
|
87
|
+
when /mysql|mariadb/i
|
|
88
|
+
mysql_extract
|
|
89
|
+
else
|
|
90
|
+
# Fallback for unknown adapters - try PostgreSQL syntax
|
|
91
|
+
postgresql_extract
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
private
|
|
96
|
+
|
|
97
|
+
def adapter_name
|
|
98
|
+
connection.adapter_name
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def quoted_column
|
|
102
|
+
"#{quoted_table}.#{connection.quote_column_name(column)}"
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def quoted_table
|
|
106
|
+
connection.quote_table_name(table_name)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# PostgreSQL: Uses ->> operator for text extraction
|
|
110
|
+
# For nested paths like "a.b.c", chains -> operators: col->'a'->'b'->>'c'
|
|
111
|
+
def postgresql_extract
|
|
112
|
+
parts = path.split(".")
|
|
113
|
+
|
|
114
|
+
if parts.size == 1
|
|
115
|
+
"#{quoted_column}->>'#{parts.first}'"
|
|
116
|
+
else
|
|
117
|
+
# Chain -> operators for intermediate keys, ->> for final key
|
|
118
|
+
intermediate = parts[0..-2].map { |p| "->#{connection.quote(p)}" }.join
|
|
119
|
+
"#{quoted_column}#{intermediate}->>#{connection.quote(parts.last)}"
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# SQLite: Uses json_extract function
|
|
124
|
+
# Path format: $.key or $.nested.path
|
|
125
|
+
def sqlite_extract
|
|
126
|
+
json_path = "$." + path.gsub(".", ".")
|
|
127
|
+
"json_extract(#{quoted_column}, '#{json_path}')"
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# MySQL/MariaDB: Uses JSON_EXTRACT or ->> operator (MySQL 5.7.13+)
|
|
131
|
+
# Path format: $.key or $.nested.path
|
|
132
|
+
def mysql_extract
|
|
133
|
+
json_path = "$." + path.gsub(".", ".")
|
|
134
|
+
"JSON_UNQUOTE(JSON_EXTRACT(#{quoted_column}, '#{json_path}'))"
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
module Reviewable
|
|
5
|
+
extend ActiveSupport::Concern
|
|
6
|
+
|
|
7
|
+
included do
|
|
8
|
+
has_one :review_item, as: :reviewable, class_name: "Observ::ReviewItem", dependent: :destroy
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# Enqueue this item for review, or return existing review_item if already queued
|
|
12
|
+
def enqueue_for_review!(reason:, priority: :normal, details: {})
|
|
13
|
+
review_item || create_review_item!(
|
|
14
|
+
reason: reason.to_s,
|
|
15
|
+
reason_details: details,
|
|
16
|
+
priority: priority,
|
|
17
|
+
status: :pending
|
|
18
|
+
)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Returns the review status or 'not_queued' if not in queue
|
|
22
|
+
def review_status
|
|
23
|
+
review_item&.status || "not_queued"
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Returns true if review has been completed
|
|
27
|
+
def reviewed?
|
|
28
|
+
review_item&.completed?
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Returns true if review is pending or in progress
|
|
32
|
+
def pending_review?
|
|
33
|
+
review_item&.pending? || review_item&.in_progress?
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Returns true if this item is in the review queue
|
|
37
|
+
def in_review_queue?
|
|
38
|
+
review_item.present?
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
module Scoreable
|
|
5
|
+
extend ActiveSupport::Concern
|
|
6
|
+
|
|
7
|
+
included do
|
|
8
|
+
has_many :scores, as: :scoreable, class_name: "Observ::Score", dependent: :destroy
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# Find a score by name, optionally filtered by source
|
|
12
|
+
# Returns the most recent score if multiple exist
|
|
13
|
+
def score_for(name, source: nil)
|
|
14
|
+
scope = scores.where(name: name)
|
|
15
|
+
scope = scope.where(source: source) if source
|
|
16
|
+
scope.order(created_at: :desc).first
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Returns true if any scores exist for this record
|
|
20
|
+
def scored?
|
|
21
|
+
scores.exists?
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Returns the manual score (name="manual", source="manual")
|
|
25
|
+
def manual_score
|
|
26
|
+
score_for("manual", source: :manual)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Returns a hash of score names to their average values
|
|
30
|
+
def score_summary
|
|
31
|
+
scores.group(:name).average(:value).transform_values { |v| v.round(4) }
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -2,14 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
module Observ
|
|
4
4
|
class DatasetRunItem < ApplicationRecord
|
|
5
|
+
include Observ::Scoreable
|
|
6
|
+
|
|
5
7
|
self.table_name = "observ_dataset_run_items"
|
|
6
8
|
|
|
7
9
|
belongs_to :dataset_run, class_name: "Observ::DatasetRun", inverse_of: :run_items
|
|
8
10
|
belongs_to :dataset_item, class_name: "Observ::DatasetItem", inverse_of: :run_items
|
|
9
11
|
belongs_to :trace, class_name: "Observ::Trace", optional: true
|
|
10
12
|
belongs_to :observation, class_name: "Observ::Observation", optional: true
|
|
11
|
-
has_many :scores, class_name: "Observ::Score",
|
|
12
|
-
foreign_key: :dataset_run_item_id, dependent: :destroy, inverse_of: :dataset_run_item
|
|
13
13
|
|
|
14
14
|
validates :dataset_run_id, uniqueness: { scope: :dataset_item_id }
|
|
15
15
|
|
|
@@ -70,17 +70,7 @@ module Observ
|
|
|
70
70
|
trace&.duration_ms
|
|
71
71
|
end
|
|
72
72
|
|
|
73
|
-
#
|
|
74
|
-
def score_for(name, source: nil)
|
|
75
|
-
scope = scores.where(name: name)
|
|
76
|
-
scope = scope.where(source: source) if source
|
|
77
|
-
scope.order(created_at: :desc).first
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
def scored?
|
|
81
|
-
scores.any?
|
|
82
|
-
end
|
|
83
|
-
|
|
73
|
+
# DatasetRunItem-specific score helpers
|
|
84
74
|
def passing_scores_count
|
|
85
75
|
scores.where("value >= 0.5").count
|
|
86
76
|
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
class ReviewItem < ApplicationRecord
|
|
5
|
+
self.table_name = "observ_review_items"
|
|
6
|
+
|
|
7
|
+
belongs_to :reviewable, polymorphic: true
|
|
8
|
+
|
|
9
|
+
enum :status, { pending: 0, in_progress: 1, completed: 2, skipped: 3 }
|
|
10
|
+
enum :priority, { normal: 0, high: 1, critical: 2 }
|
|
11
|
+
|
|
12
|
+
validates :reviewable, presence: true
|
|
13
|
+
validates :reviewable_id, uniqueness: { scope: :reviewable_type }
|
|
14
|
+
|
|
15
|
+
scope :actionable, -> { where(status: [ :pending, :in_progress ]) }
|
|
16
|
+
scope :by_priority, -> { order(priority: :desc, created_at: :asc) }
|
|
17
|
+
scope :sessions, -> { where(reviewable_type: "Observ::Session") }
|
|
18
|
+
scope :traces, -> { where(reviewable_type: "Observ::Trace") }
|
|
19
|
+
|
|
20
|
+
def complete!(by: nil)
|
|
21
|
+
update!(status: :completed, completed_at: Time.current, completed_by: by)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def skip!(by: nil)
|
|
25
|
+
update!(status: :skipped, completed_at: Time.current, completed_by: by)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def start_review!
|
|
29
|
+
update!(status: :in_progress) if pending?
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def priority_badge_class
|
|
33
|
+
case priority
|
|
34
|
+
when "critical" then "observ-badge--danger"
|
|
35
|
+
when "high" then "observ-badge--warning"
|
|
36
|
+
else "observ-badge--secondary"
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def reason_display
|
|
41
|
+
reason&.titleize&.gsub("_", " ") || "Manual"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def reviewable_type_display
|
|
45
|
+
reviewable_type.demodulize
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
data/app/models/observ/score.rb
CHANGED
|
@@ -4,8 +4,7 @@ module Observ
|
|
|
4
4
|
class Score < ApplicationRecord
|
|
5
5
|
self.table_name = "observ_scores"
|
|
6
6
|
|
|
7
|
-
belongs_to :
|
|
8
|
-
belongs_to :trace, class_name: "Observ::Trace"
|
|
7
|
+
belongs_to :scoreable, polymorphic: true
|
|
9
8
|
belongs_to :observation, class_name: "Observ::Observation", optional: true
|
|
10
9
|
|
|
11
10
|
enum :data_type, { numeric: 0, boolean: 1, categorical: 2 }
|
|
@@ -13,11 +12,44 @@ module Observ
|
|
|
13
12
|
|
|
14
13
|
validates :name, presence: true
|
|
15
14
|
validates :value, presence: true, numericality: true
|
|
16
|
-
validates :
|
|
15
|
+
validates :scoreable_id, uniqueness: {
|
|
16
|
+
scope: [ :scoreable_type, :name, :source ],
|
|
17
|
+
message: "already has a score with this name and source"
|
|
18
|
+
}
|
|
17
19
|
|
|
18
|
-
#
|
|
19
|
-
|
|
20
|
-
|
|
20
|
+
# Scopes
|
|
21
|
+
scope :for_sessions, -> { where(scoreable_type: "Observ::Session") }
|
|
22
|
+
scope :for_traces, -> { where(scoreable_type: "Observ::Trace") }
|
|
23
|
+
scope :for_dataset_run_items, -> { where(scoreable_type: "Observ::DatasetRunItem") }
|
|
24
|
+
|
|
25
|
+
# Convenience accessors for polymorphic parent
|
|
26
|
+
def dataset_run_item
|
|
27
|
+
scoreable if scoreable_type == "Observ::DatasetRunItem"
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def trace
|
|
31
|
+
case scoreable_type
|
|
32
|
+
when "Observ::Trace" then scoreable
|
|
33
|
+
when "Observ::DatasetRunItem" then scoreable.trace
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def session
|
|
38
|
+
case scoreable_type
|
|
39
|
+
when "Observ::Session" then scoreable
|
|
40
|
+
when "Observ::Trace" then scoreable.observ_session
|
|
41
|
+
when "Observ::DatasetRunItem" then scoreable.trace&.observ_session
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Delegations for backward compatibility with dataset scoring
|
|
46
|
+
def dataset_run
|
|
47
|
+
dataset_run_item&.dataset_run
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def dataset_item
|
|
51
|
+
dataset_run_item&.dataset_item
|
|
52
|
+
end
|
|
21
53
|
|
|
22
54
|
# Boolean helpers
|
|
23
55
|
def passed?
|
|
@@ -2,9 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
module Observ
|
|
4
4
|
class Session < ApplicationRecord
|
|
5
|
+
include Observ::Scoreable
|
|
6
|
+
include Observ::Reviewable
|
|
7
|
+
include Observ::JsonQueryable
|
|
8
|
+
|
|
5
9
|
self.table_name = "observ_sessions"
|
|
6
10
|
|
|
7
|
-
|
|
11
|
+
has_many :traces, class_name: "Observ::Trace",
|
|
8
12
|
foreign_key: :observ_session_id, dependent: :destroy, inverse_of: :observ_session
|
|
9
13
|
has_many :annotations, as: :annotatable, dependent: :destroy
|
|
10
14
|
|
data/app/models/observ/trace.rb
CHANGED
|
@@ -38,7 +38,6 @@ module Observ
|
|
|
38
38
|
def create_or_update_score(run_item, value)
|
|
39
39
|
score = run_item.scores.find_or_initialize_by(name: name, source: :programmatic)
|
|
40
40
|
score.assign_attributes(
|
|
41
|
-
trace: run_item.trace,
|
|
42
41
|
value: value,
|
|
43
42
|
data_type: data_type,
|
|
44
43
|
comment: options[:comment]
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
class GuardrailService
|
|
5
|
+
class << self
|
|
6
|
+
def evaluate_trace(trace)
|
|
7
|
+
return if trace.in_review_queue?
|
|
8
|
+
|
|
9
|
+
trace_rules.each do |rule|
|
|
10
|
+
next unless rule[:condition].call(trace)
|
|
11
|
+
|
|
12
|
+
trace.enqueue_for_review!(
|
|
13
|
+
reason: rule[:name].to_s,
|
|
14
|
+
priority: rule[:priority],
|
|
15
|
+
details: rule[:details]&.call(trace) || {}
|
|
16
|
+
)
|
|
17
|
+
return # One reason is enough
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def evaluate_session(session)
|
|
22
|
+
return if session.in_review_queue?
|
|
23
|
+
|
|
24
|
+
session_rules.each do |rule|
|
|
25
|
+
next unless rule[:condition].call(session)
|
|
26
|
+
|
|
27
|
+
session.enqueue_for_review!(
|
|
28
|
+
reason: rule[:name].to_s,
|
|
29
|
+
priority: rule[:priority],
|
|
30
|
+
details: rule[:details]&.call(session) || {}
|
|
31
|
+
)
|
|
32
|
+
return
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def evaluate_all_recent(since: 1.hour.ago)
|
|
37
|
+
Observ::Trace.where(created_at: since..).find_each do |trace|
|
|
38
|
+
evaluate_trace(trace)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
Observ::Session.where(created_at: since..).find_each do |session|
|
|
42
|
+
evaluate_session(session)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def random_sample(scope:, percentage: 5)
|
|
47
|
+
items = scope.where(created_at: 1.day.ago..)
|
|
48
|
+
.left_joins(:review_item)
|
|
49
|
+
.where(observ_review_items: { id: nil })
|
|
50
|
+
|
|
51
|
+
sample_size = [ (items.count * percentage / 100.0).ceil, 1 ].max
|
|
52
|
+
|
|
53
|
+
items.order("RANDOM()").limit(sample_size).find_each do |item|
|
|
54
|
+
item.enqueue_for_review!(reason: "random_sample", priority: :normal)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
def trace_rules
|
|
61
|
+
[
|
|
62
|
+
{
|
|
63
|
+
name: :error_detected,
|
|
64
|
+
priority: :critical,
|
|
65
|
+
condition: ->(t) { t.metadata&.dig("error").present? },
|
|
66
|
+
details: ->(t) { { error: t.metadata["error"] } }
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
name: :high_cost,
|
|
70
|
+
priority: :high,
|
|
71
|
+
condition: ->(t) { t.total_cost.present? && t.total_cost > thresholds[:trace_cost] },
|
|
72
|
+
details: ->(t) { { cost: t.total_cost.to_f, threshold: thresholds[:trace_cost] } }
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
name: :high_latency,
|
|
76
|
+
priority: :normal,
|
|
77
|
+
condition: ->(t) { t.duration_ms.present? && t.duration_ms > thresholds[:latency_ms] },
|
|
78
|
+
details: ->(t) { { latency_ms: t.duration_ms, threshold: thresholds[:latency_ms] } }
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
name: :no_output,
|
|
82
|
+
priority: :high,
|
|
83
|
+
condition: ->(t) { t.output.blank? && t.end_time.present? }
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
name: :high_token_count,
|
|
87
|
+
priority: :normal,
|
|
88
|
+
condition: ->(t) { t.total_tokens.present? && t.total_tokens > thresholds[:tokens] },
|
|
89
|
+
details: ->(t) { { tokens: t.total_tokens, threshold: thresholds[:tokens] } }
|
|
90
|
+
}
|
|
91
|
+
]
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def session_rules
|
|
95
|
+
[
|
|
96
|
+
{
|
|
97
|
+
name: :high_cost,
|
|
98
|
+
priority: :high,
|
|
99
|
+
condition: ->(s) { s.total_cost.present? && s.total_cost > thresholds[:session_cost] },
|
|
100
|
+
details: ->(s) { { cost: s.total_cost.to_f, threshold: thresholds[:session_cost] } }
|
|
101
|
+
},
|
|
102
|
+
# {
|
|
103
|
+
# name: :short_session,
|
|
104
|
+
# priority: :normal,
|
|
105
|
+
# condition: ->(s) { s.total_traces_count == 1 && s.end_time.present? },
|
|
106
|
+
# details: ->(s) { { trace_count: s.total_traces_count } }
|
|
107
|
+
# },
|
|
108
|
+
{
|
|
109
|
+
name: :many_traces,
|
|
110
|
+
priority: :normal,
|
|
111
|
+
condition: ->(s) { s.total_traces_count.present? && s.total_traces_count > thresholds[:max_traces] },
|
|
112
|
+
details: ->(s) { { trace_count: s.total_traces_count, threshold: thresholds[:max_traces] } }
|
|
113
|
+
}
|
|
114
|
+
]
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def thresholds
|
|
118
|
+
@thresholds ||= {
|
|
119
|
+
trace_cost: 0.10,
|
|
120
|
+
session_cost: 0.50,
|
|
121
|
+
latency_ms: 30_000,
|
|
122
|
+
tokens: 10_000,
|
|
123
|
+
max_traces: 20
|
|
124
|
+
}
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
@@ -5,4 +5,4 @@
|
|
|
5
5
|
per_page: number of items to fetch per page
|
|
6
6
|
remote: data-remote
|
|
7
7
|
-%>
|
|
8
|
-
<span class="
|
|
8
|
+
<span class="observ-pagination__page observ-pagination__page--gap"><%= t('views.pagination.truncate').html_safe %></span>
|
|
@@ -7,6 +7,6 @@
|
|
|
7
7
|
per_page: number of items to fetch per page
|
|
8
8
|
remote: data-remote
|
|
9
9
|
-%>
|
|
10
|
-
<span class="
|
|
10
|
+
<span class="observ-pagination__page<%= ' observ-pagination__page--current' if page.current? %>">
|
|
11
11
|
<%= link_to_unless page.current?, page, url, {remote: remote, rel: page.rel} %>
|
|
12
12
|
</span>
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
paginator: the paginator that renders the pagination tags inside
|
|
8
8
|
-%>
|
|
9
9
|
<%= paginator.render do -%>
|
|
10
|
-
<nav class="pagination" role="navigation" aria-label="pager">
|
|
10
|
+
<nav class="observ-pagination" role="navigation" aria-label="pager">
|
|
11
11
|
<%= first_page_tag unless current_page.first? %>
|
|
12
12
|
<%= prev_page_tag unless current_page.first? %>
|
|
13
13
|
<% each_page do |page| -%>
|
|
@@ -6,6 +6,6 @@
|
|
|
6
6
|
per_page: number of items to fetch per page
|
|
7
7
|
remote: data-remote
|
|
8
8
|
-%>
|
|
9
|
-
<span class="
|
|
9
|
+
<span class="observ-pagination__prev">
|
|
10
10
|
<%= link_to_unless current_page.first?, t('views.pagination.previous').html_safe, url, rel: 'prev', remote: remote %>
|
|
11
11
|
</span>
|
|
@@ -5,4 +5,4 @@
|
|
|
5
5
|
per_page: number of items to fetch per page
|
|
6
6
|
remote: data-remote
|
|
7
7
|
-%>
|
|
8
|
-
<span class="
|
|
8
|
+
<span class="observ-pagination__page observ-pagination__page--gap"><%= t('views.pagination.truncate').html_safe %></span>
|
|
@@ -7,6 +7,6 @@
|
|
|
7
7
|
per_page: number of items to fetch per page
|
|
8
8
|
remote: data-remote
|
|
9
9
|
-%>
|
|
10
|
-
<span class="
|
|
10
|
+
<span class="observ-pagination__page<%= ' observ-pagination__page--current' if page.current? %>">
|
|
11
11
|
<%= link_to_unless page.current?, page, url, {remote: remote, rel: page.rel} %>
|
|
12
12
|
</span>
|