rubyllm-observ 0.6.6 → 0.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +319 -1
  3. data/app/assets/javascripts/observ/controllers/config_editor_controller.js +178 -0
  4. data/app/assets/javascripts/observ/controllers/index.js +29 -0
  5. data/app/assets/javascripts/observ/controllers/message_form_controller.js +24 -2
  6. data/app/assets/stylesheets/observ/_chat.scss +199 -0
  7. data/app/assets/stylesheets/observ/_config_editor.scss +119 -0
  8. data/app/assets/stylesheets/observ/application.scss +1 -0
  9. data/app/controllers/observ/annotations_controller.rb +2 -2
  10. data/app/controllers/observ/chats_controller.rb +1 -1
  11. data/app/controllers/observ/dataset_items_controller.rb +3 -3
  12. data/app/controllers/observ/dataset_runs_controller.rb +3 -3
  13. data/app/controllers/observ/datasets_controller.rb +4 -4
  14. data/app/controllers/observ/messages_controller.rb +5 -1
  15. data/app/controllers/observ/prompts_controller.rb +14 -6
  16. data/app/controllers/observ/review_queue_controller.rb +1 -1
  17. data/app/controllers/observ/scores_controller.rb +1 -1
  18. data/app/controllers/observ/traces_controller.rb +1 -1
  19. data/app/helpers/observ/application_helper.rb +1 -0
  20. data/app/helpers/observ/dashboard_helper.rb +2 -2
  21. data/app/helpers/observ/markdown_helper.rb +29 -0
  22. data/app/helpers/observ/pagination_helper.rb +1 -1
  23. data/app/helpers/observ/prompts_helper.rb +48 -0
  24. data/app/jobs/observ/moderation_guardrail_job.rb +115 -0
  25. data/app/models/concerns/observ/prompt_management.rb +10 -0
  26. data/app/models/observ/embedding.rb +45 -0
  27. data/app/models/observ/image_generation.rb +38 -0
  28. data/app/models/observ/moderation.rb +40 -0
  29. data/app/models/observ/null_prompt.rb +49 -2
  30. data/app/models/observ/observation.rb +3 -1
  31. data/app/models/observ/prompt.rb +2 -2
  32. data/app/models/observ/review_item.rb +1 -1
  33. data/app/models/observ/score.rb +1 -1
  34. data/app/models/observ/session.rb +33 -0
  35. data/app/models/observ/trace.rb +90 -4
  36. data/app/models/observ/transcription.rb +38 -0
  37. data/app/presenters/observ/agent_select_presenter.rb +3 -3
  38. data/app/services/observ/chat_instrumenter.rb +97 -7
  39. data/app/services/observ/concerns/observable_service.rb +108 -3
  40. data/app/services/observ/dataset_runner_service.rb +1 -1
  41. data/app/services/observ/embedding_instrumenter.rb +193 -0
  42. data/app/services/observ/evaluator_runner_service.rb +1 -1
  43. data/app/services/observ/evaluators/contains_evaluator.rb +1 -1
  44. data/app/services/observ/guardrail_service.rb +10 -1
  45. data/app/services/observ/image_generation_instrumenter.rb +243 -0
  46. data/app/services/observ/moderation_guardrail_service.rb +239 -0
  47. data/app/services/observ/moderation_instrumenter.rb +141 -0
  48. data/app/services/observ/prompt_manager/caching.rb +15 -2
  49. data/app/services/observ/transcription_instrumenter.rb +187 -0
  50. data/app/validators/observ/prompt_config_validator.rb +5 -5
  51. data/app/views/observ/chats/show.html.erb +9 -0
  52. data/app/views/observ/messages/_message.html.erb +1 -1
  53. data/app/views/observ/messages/create.turbo_stream.erb +1 -3
  54. data/app/views/observ/prompts/_config_editor.html.erb +115 -0
  55. data/app/views/observ/prompts/_form.html.erb +2 -13
  56. data/app/views/observ/prompts/_new_form.html.erb +2 -12
  57. data/config/routes.rb +13 -13
  58. data/db/migrate/005_create_observ_prompts.rb +2 -2
  59. data/db/migrate/011_create_observ_dataset_items.rb +1 -1
  60. data/db/migrate/012_create_observ_dataset_runs.rb +2 -2
  61. data/db/migrate/013_create_observ_dataset_run_items.rb +1 -1
  62. data/db/migrate/014_create_observ_scores.rb +2 -2
  63. data/db/migrate/015_refactor_scores_to_polymorphic.rb +2 -2
  64. data/db/migrate/016_create_observ_review_items.rb +2 -2
  65. data/lib/generators/observ/install_chat/templates/jobs/chat_response_job.rb.tt +9 -3
  66. data/lib/observ/engine.rb +7 -0
  67. data/lib/observ/version.rb +1 -1
  68. data/lib/tasks/observ_tasks.rake +2 -2
  69. metadata +33 -3
@@ -0,0 +1,243 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Observ
4
+ class ImageGenerationInstrumenter
5
+ # Hardcoded pricing for image generation models (USD per image)
6
+ # Prices are organized by model_id, then by size, then by quality
7
+ # Source: https://openai.com/pricing, https://cloud.google.com/vertex-ai/pricing
8
+ IMAGE_PRICING = {
9
+ # OpenAI DALL-E 3 (size and quality based)
10
+ # Quality options: "standard", "hd"
11
+ "dall-e-3" => {
12
+ "1024x1024" => { "standard" => 0.04, "hd" => 0.08 },
13
+ "1792x1024" => { "standard" => 0.08, "hd" => 0.12 },
14
+ "1024x1792" => { "standard" => 0.08, "hd" => 0.12 }
15
+ },
16
+ # OpenAI DALL-E 2 (size based, no quality option)
17
+ "dall-e-2" => {
18
+ "1024x1024" => { "default" => 0.02 },
19
+ "512x512" => { "default" => 0.018 },
20
+ "256x256" => { "default" => 0.016 }
21
+ },
22
+ # OpenAI GPT-image-1 (token-based, estimated per-image costs)
23
+ # Quality options: "low", "medium", "high" (maps "standard" -> "medium")
24
+ # Source: "Image outputs cost approximately $0.01 (low), $0.04 (medium), $0.17 (high) for square images"
25
+ # Larger sizes are estimated at ~1.7x for 1792x1024 and ~2.9x for 1792x1792
26
+ "gpt-image-1" => {
27
+ "1024x1024" => { "low" => 0.01, "medium" => 0.04, "high" => 0.17 },
28
+ "1792x1024" => { "low" => 0.017, "medium" => 0.068, "high" => 0.29 },
29
+ "1024x1792" => { "low" => 0.017, "medium" => 0.068, "high" => 0.29 },
30
+ "1792x1792" => { "low" => 0.029, "medium" => 0.116, "high" => 0.49 },
31
+ "default" => { "low" => 0.01, "medium" => 0.04, "high" => 0.17 }
32
+ },
33
+ # OpenAI GPT-image-1-mini (token-based, estimated per-image costs)
34
+ # Approximately 5x cheaper than gpt-image-1 based on token pricing ratio
35
+ "gpt-image-1-mini" => {
36
+ "1024x1024" => { "low" => 0.002, "medium" => 0.008, "high" => 0.034 },
37
+ "1792x1024" => { "low" => 0.0034, "medium" => 0.0136, "high" => 0.058 },
38
+ "1024x1792" => { "low" => 0.0034, "medium" => 0.0136, "high" => 0.058 },
39
+ "1792x1792" => { "low" => 0.0058, "medium" => 0.0232, "high" => 0.098 },
40
+ "default" => { "low" => 0.002, "medium" => 0.008, "high" => 0.034 }
41
+ },
42
+ # Google Imagen models (flat rate per image)
43
+ "imagen-3.0-generate-002" => {
44
+ "default" => { "default" => 0.04 }
45
+ },
46
+ "imagen-4.0-generate-001" => {
47
+ "default" => { "default" => 0.04 }
48
+ },
49
+ "imagen-4.0-generate-preview-06-06" => {
50
+ "default" => { "default" => 0.04 }
51
+ },
52
+ "imagen-4.0-ultra-generate-preview-06-06" => {
53
+ "default" => { "default" => 0.08 }
54
+ }
55
+ }.freeze
56
+
57
+ # Maps quality names between different conventions
58
+ # DALL-E uses: "standard", "hd"
59
+ # GPT-image uses: "low", "medium", "high"
60
+ QUALITY_MAPPINGS = {
61
+ "standard" => "medium", # Map DALL-E "standard" to GPT-image "medium"
62
+ "hd" => "high" # Map DALL-E "hd" to GPT-image "high"
63
+ }.freeze
64
+
65
+ attr_reader :session, :context
66
+
67
+ def initialize(session, context: {})
68
+ @session = session
69
+ @context = context
70
+ @original_paint_method = nil
71
+ @instrumented = false
72
+ end
73
+
74
+ def instrument!
75
+ return if @instrumented
76
+
77
+ wrap_paint_method
78
+ @instrumented = true
79
+
80
+ Rails.logger.info "[Observability] Instrumented RubyLLM.paint for session #{session.session_id}"
81
+ end
82
+
83
+ def uninstrument!
84
+ return unless @instrumented
85
+ return unless @original_paint_method
86
+
87
+ RubyLLM.define_singleton_method(:paint, @original_paint_method)
88
+ @instrumented = false
89
+
90
+ Rails.logger.info "[Observability] Uninstrumented RubyLLM.paint"
91
+ end
92
+
93
+ private
94
+
95
+ def wrap_paint_method
96
+ return if @original_paint_method
97
+
98
+ @original_paint_method = RubyLLM.method(:paint)
99
+ instrumenter = self
100
+
101
+ RubyLLM.define_singleton_method(:paint) do |*args, **kwargs|
102
+ instrumenter.send(:handle_paint_call, args, kwargs)
103
+ end
104
+ end
105
+
106
+ def handle_paint_call(args, kwargs)
107
+ prompt = args[0]
108
+ model_id = kwargs[:model] || default_image_model
109
+ size = kwargs[:size] || "1024x1024"
110
+ quality = kwargs[:quality] || "standard"
111
+
112
+ trace = session.create_trace(
113
+ name: "image_generation",
114
+ input: { prompt: prompt },
115
+ metadata: @context.merge(
116
+ model: model_id,
117
+ size: size,
118
+ quality: quality
119
+ ).compact
120
+ )
121
+
122
+ image_obs = trace.create_image_generation(
123
+ name: "paint",
124
+ model: model_id,
125
+ metadata: {
126
+ size: size,
127
+ quality: quality
128
+ }.compact
129
+ )
130
+
131
+ result = @original_paint_method.call(*args, **kwargs)
132
+
133
+ finalize_image_generation(image_obs, result, prompt, size: size, quality: quality)
134
+ trace.finalize(
135
+ output: format_output(result),
136
+ metadata: { size: extract_size(result) || size, quality: quality }
137
+ )
138
+
139
+ result
140
+ rescue StandardError => e
141
+ handle_error(e, trace, image_obs)
142
+ raise
143
+ end
144
+
145
+ def finalize_image_generation(image_obs, result, prompt, size:, quality:)
146
+ cost = calculate_cost(result, size: size, quality: quality)
147
+
148
+ image_obs.finalize(
149
+ output: format_output(result),
150
+ usage: {},
151
+ cost_usd: cost
152
+ )
153
+
154
+ image_obs.update!(
155
+ input: prompt,
156
+ metadata: image_obs.metadata.merge(
157
+ revised_prompt: result.revised_prompt,
158
+ output_format: result.base64? ? "base64" : "url",
159
+ mime_type: result.mime_type,
160
+ size: extract_size(result) || size,
161
+ quality: quality
162
+ ).compact
163
+ )
164
+ end
165
+
166
+ def calculate_cost(result, size:, quality:)
167
+ model_id = result.model_id
168
+ return 0.0 unless model_id
169
+
170
+ lookup_image_price(model_id, size, quality)
171
+ rescue StandardError => e
172
+ Rails.logger.warn "[Observability] Failed to calculate image generation cost: #{e.message}"
173
+ 0.0
174
+ end
175
+
176
+ def lookup_image_price(model_id, size, quality)
177
+ model_pricing = IMAGE_PRICING[model_id]
178
+ return 0.0 unless model_pricing
179
+
180
+ # Try exact size match, then "default"
181
+ size_pricing = model_pricing[size] || model_pricing["default"]
182
+ return 0.0 unless size_pricing
183
+
184
+ # Try exact quality match first
185
+ return size_pricing[quality] if size_pricing[quality]
186
+
187
+ # Try mapped quality (e.g., "standard" -> "medium" for GPT-image models)
188
+ mapped_quality = QUALITY_MAPPINGS[quality]
189
+ return size_pricing[mapped_quality] if mapped_quality && size_pricing[mapped_quality]
190
+
191
+ # Fall back to "standard", "medium", "default", or first available
192
+ size_pricing["standard"] ||
193
+ size_pricing["medium"] ||
194
+ size_pricing["default"] ||
195
+ size_pricing.values.first ||
196
+ 0.0
197
+ end
198
+
199
+ def extract_size(result)
200
+ # Try to get size from result if available
201
+ result.respond_to?(:size) ? result.size : nil
202
+ end
203
+
204
+ def format_output(result)
205
+ {
206
+ model: result.model_id,
207
+ has_url: result.respond_to?(:url) && result.url.present?,
208
+ base64: result.base64?,
209
+ mime_type: result.mime_type,
210
+ revised_prompt: result.revised_prompt
211
+ }.compact
212
+ end
213
+
214
+ def default_image_model
215
+ if RubyLLM.config.respond_to?(:default_image_model)
216
+ RubyLLM.config.default_image_model
217
+ else
218
+ "dall-e-3"
219
+ end
220
+ end
221
+
222
+ def handle_error(error, trace, image_obs)
223
+ return unless trace
224
+
225
+ error_span = trace.create_span(
226
+ name: "error",
227
+ metadata: {
228
+ error_type: error.class.name,
229
+ level: "ERROR"
230
+ },
231
+ input: {
232
+ error_message: error.message,
233
+ backtrace: error.backtrace&.first(10)
234
+ }.to_json
235
+ )
236
+ error_span.finalize(output: { error_captured: true }.to_json)
237
+
238
+ image_obs&.update(status_message: "FAILED") rescue nil
239
+
240
+ Rails.logger.error "[Observability] Image generation error captured: #{error.class.name} - #{error.message}"
241
+ end
242
+ end
243
+ end
@@ -0,0 +1,239 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Observ
4
+ class ModerationGuardrailService
5
+ include Observ::Concerns::ObservableService
6
+
7
+ # Score thresholds for different actions
8
+ THRESHOLDS = {
9
+ critical: 0.9, # Auto-flag as critical
10
+ high: 0.7, # Flag as high priority
11
+ review: 0.5 # Flag for normal review
12
+ }.freeze
13
+
14
+ # Categories that always trigger critical review
15
+ CRITICAL_CATEGORIES = %w[
16
+ sexual/minors
17
+ self-harm/intent
18
+ self-harm/instructions
19
+ violence/graphic
20
+ ].freeze
21
+
22
+ class Result
23
+ attr_reader :action, :reason, :priority, :details
24
+
25
+ def initialize(action:, reason: nil, priority: nil, details: {})
26
+ @action = action
27
+ @reason = reason
28
+ @priority = priority
29
+ @details = details
30
+ end
31
+
32
+ def flagged? = action == :flagged
33
+ def skipped? = action == :skipped
34
+ def passed? = action == :passed
35
+ end
36
+
37
+ def initialize(observability_session: nil)
38
+ initialize_observability(
39
+ observability_session,
40
+ service_name: "moderation_guardrail",
41
+ metadata: {}
42
+ )
43
+ end
44
+
45
+ # Evaluate a trace for moderation issues
46
+ #
47
+ # @param trace [Observ::Trace] The trace to evaluate
48
+ # @param moderate_input [Boolean] Whether to moderate input content
49
+ # @param moderate_output [Boolean] Whether to moderate output content
50
+ # @return [Result] The evaluation result
51
+ def evaluate_trace(trace, moderate_input: true, moderate_output: true)
52
+ return Result.new(action: :skipped, reason: "already_in_queue") if trace.in_review_queue?
53
+ return Result.new(action: :skipped, reason: "already_has_moderation") if has_existing_flags?(trace)
54
+
55
+ with_observability do |_session|
56
+ content = extract_trace_content(
57
+ trace,
58
+ moderate_input: moderate_input,
59
+ moderate_output: moderate_output
60
+ )
61
+ return Result.new(action: :skipped, reason: "no_content") if content.blank?
62
+
63
+ perform_moderation(trace, content)
64
+ end
65
+ rescue StandardError => e
66
+ Rails.logger.error "[ModerationGuardrailService] Failed to evaluate trace #{trace.id}: #{e.message}"
67
+ Result.new(action: :skipped, reason: "error", details: { error: e.message })
68
+ end
69
+
70
+ # Evaluate all traces in a session
71
+ #
72
+ # @param session [Observ::Session] The session to evaluate
73
+ # @return [Array<Result>] Results for each trace
74
+ def evaluate_session(session)
75
+ return [] if session.traces.empty?
76
+
77
+ session.traces.map do |trace|
78
+ evaluate_trace(trace)
79
+ end
80
+ end
81
+
82
+ # Evaluate session-level content (aggregated input/output)
83
+ #
84
+ # @param session [Observ::Session] The session to evaluate
85
+ # @return [Result] The evaluation result
86
+ def evaluate_session_content(session)
87
+ return Result.new(action: :skipped, reason: "already_in_queue") if session.in_review_queue?
88
+
89
+ with_observability do |_session|
90
+ content = extract_session_content(session)
91
+ return Result.new(action: :skipped, reason: "no_content") if content.blank?
92
+
93
+ perform_session_moderation(session, content)
94
+ end
95
+ rescue StandardError => e
96
+ Rails.logger.error "[ModerationGuardrailService] Failed to evaluate session #{session.id}: #{e.message}"
97
+ Result.new(action: :skipped, reason: "error", details: { error: e.message })
98
+ end
99
+
100
+ private
101
+
102
+ def has_existing_flags?(trace)
103
+ trace.moderations.any?(&:flagged?)
104
+ end
105
+
106
+ def extract_trace_content(trace, moderate_input:, moderate_output:)
107
+ parts = []
108
+ parts << extract_text(trace.input) if moderate_input
109
+ parts << extract_text(trace.output) if moderate_output
110
+ parts.compact.reject(&:blank?).join("\n\n---\n\n")
111
+ end
112
+
113
+ def extract_session_content(session)
114
+ session.traces.flat_map do |trace|
115
+ [extract_text(trace.input), extract_text(trace.output)]
116
+ end.compact.reject(&:blank?).join("\n\n---\n\n").truncate(10_000)
117
+ end
118
+
119
+ def extract_text(content)
120
+ return nil if content.blank?
121
+
122
+ case content
123
+ when String
124
+ content
125
+ when Hash
126
+ # Try common keys for text content
127
+ content["text"] || content["content"] || content["message"] ||
128
+ content[:text] || content[:content] || content[:message] ||
129
+ content.to_json
130
+ else
131
+ content.to_s
132
+ end
133
+ end
134
+
135
+ def perform_moderation(trace, content)
136
+ instrument_moderation(context: {
137
+ service: "moderation_guardrail",
138
+ trace_id: trace.id,
139
+ content_length: content.length
140
+ })
141
+
142
+ result = RubyLLM.moderate(content)
143
+
144
+ evaluate_and_enqueue(trace, result)
145
+ end
146
+
147
+ def perform_session_moderation(session, content)
148
+ instrument_moderation(context: {
149
+ service: "moderation_guardrail",
150
+ session_id: session.id,
151
+ content_length: content.length
152
+ })
153
+
154
+ result = RubyLLM.moderate(content)
155
+
156
+ evaluate_and_enqueue_session(session, result)
157
+ end
158
+
159
+ def evaluate_and_enqueue(trace, moderation_result)
160
+ priority = determine_priority(moderation_result)
161
+
162
+ if priority
163
+ details = build_details(moderation_result)
164
+ trace.enqueue_for_review!(
165
+ reason: "content_moderation",
166
+ priority: priority,
167
+ details: details
168
+ )
169
+
170
+ Result.new(
171
+ action: :flagged,
172
+ priority: priority,
173
+ details: details
174
+ )
175
+ else
176
+ Result.new(action: :passed)
177
+ end
178
+ end
179
+
180
+ def evaluate_and_enqueue_session(session, moderation_result)
181
+ priority = determine_priority(moderation_result)
182
+
183
+ if priority
184
+ details = build_details(moderation_result)
185
+ session.enqueue_for_review!(
186
+ reason: "content_moderation",
187
+ priority: priority,
188
+ details: details
189
+ )
190
+
191
+ Result.new(
192
+ action: :flagged,
193
+ priority: priority,
194
+ details: details
195
+ )
196
+ else
197
+ Result.new(action: :passed)
198
+ end
199
+ end
200
+
201
+ def determine_priority(result)
202
+ # Check for critical categories first
203
+ if (result.flagged_categories & CRITICAL_CATEGORIES).any?
204
+ return :critical
205
+ end
206
+
207
+ # Check if explicitly flagged
208
+ if result.flagged?
209
+ max_score = result.category_scores.values.max || 0
210
+ return max_score >= THRESHOLDS[:critical] ? :critical : :high
211
+ end
212
+
213
+ # Check score thresholds even if not flagged
214
+ max_score = result.category_scores.values.max || 0
215
+
216
+ if max_score >= THRESHOLDS[:high]
217
+ :high
218
+ elsif max_score >= THRESHOLDS[:review]
219
+ :normal
220
+ end
221
+ end
222
+
223
+ def build_details(result)
224
+ {
225
+ flagged: result.flagged?,
226
+ flagged_categories: result.flagged_categories,
227
+ highest_category: highest_category(result),
228
+ highest_score: result.category_scores.values.max&.round(4),
229
+ category_scores: result.category_scores.transform_values { |v| v.round(4) }
230
+ }
231
+ end
232
+
233
+ def highest_category(result)
234
+ return nil if result.category_scores.empty?
235
+
236
+ result.category_scores.max_by { |_, score| score }&.first
237
+ end
238
+ end
239
+ end
@@ -0,0 +1,141 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Observ
4
+ class ModerationInstrumenter
5
+ attr_reader :session, :context
6
+
7
+ def initialize(session, context: {})
8
+ @session = session
9
+ @context = context
10
+ @original_moderate_method = nil
11
+ @instrumented = false
12
+ end
13
+
14
+ def instrument!
15
+ return if @instrumented
16
+
17
+ wrap_moderate_method
18
+ @instrumented = true
19
+
20
+ Rails.logger.info "[Observability] Instrumented RubyLLM.moderate for session #{session.session_id}"
21
+ end
22
+
23
+ def uninstrument!
24
+ return unless @instrumented
25
+ return unless @original_moderate_method
26
+
27
+ RubyLLM.define_singleton_method(:moderate, @original_moderate_method)
28
+ @instrumented = false
29
+
30
+ Rails.logger.info "[Observability] Uninstrumented RubyLLM.moderate"
31
+ end
32
+
33
+ private
34
+
35
+ def wrap_moderate_method
36
+ return if @original_moderate_method
37
+
38
+ @original_moderate_method = RubyLLM.method(:moderate)
39
+ instrumenter = self
40
+
41
+ RubyLLM.define_singleton_method(:moderate) do |*args, **kwargs|
42
+ instrumenter.send(:handle_moderate_call, args, kwargs)
43
+ end
44
+ end
45
+
46
+ def handle_moderate_call(args, kwargs)
47
+ text = args[0]
48
+ model_id = kwargs[:model] || default_moderation_model
49
+
50
+ trace = session.create_trace(
51
+ name: "moderation",
52
+ input: { text: text&.truncate(500) },
53
+ metadata: @context.merge(
54
+ model: model_id
55
+ ).compact
56
+ )
57
+
58
+ moderation_obs = trace.create_moderation(
59
+ name: "moderate",
60
+ model: model_id,
61
+ metadata: {}
62
+ )
63
+
64
+ result = @original_moderate_method.call(*args, **kwargs)
65
+
66
+ finalize_moderation(moderation_obs, result, text)
67
+ trace.finalize(
68
+ output: format_output(result),
69
+ metadata: extract_trace_metadata(result)
70
+ )
71
+
72
+ result
73
+ rescue StandardError => e
74
+ handle_error(e, trace, moderation_obs)
75
+ raise
76
+ end
77
+
78
+ def finalize_moderation(moderation_obs, result, text)
79
+ moderation_obs.finalize(
80
+ output: format_output(result),
81
+ usage: {},
82
+ cost_usd: 0.0 # Moderation is typically free
83
+ )
84
+
85
+ moderation_obs.update!(
86
+ input: text&.truncate(1000),
87
+ metadata: moderation_obs.metadata.merge(
88
+ flagged: result.flagged?,
89
+ categories: result.categories,
90
+ category_scores: result.category_scores,
91
+ flagged_categories: result.flagged_categories
92
+ ).compact
93
+ )
94
+ end
95
+
96
+ def format_output(result)
97
+ {
98
+ model: result.model,
99
+ flagged: result.flagged?,
100
+ flagged_categories: result.flagged_categories,
101
+ id: result.respond_to?(:id) ? result.id : nil
102
+ }.compact
103
+ end
104
+
105
+ def extract_trace_metadata(result)
106
+ {
107
+ flagged: result.flagged?,
108
+ flagged_categories_count: result.flagged_categories&.count || 0
109
+ }.compact
110
+ end
111
+
112
+ def default_moderation_model
113
+ if RubyLLM.config.respond_to?(:default_moderation_model)
114
+ RubyLLM.config.default_moderation_model
115
+ else
116
+ "omni-moderation-latest"
117
+ end
118
+ end
119
+
120
+ def handle_error(error, trace, moderation_obs)
121
+ return unless trace
122
+
123
+ error_span = trace.create_span(
124
+ name: "error",
125
+ metadata: {
126
+ error_type: error.class.name,
127
+ level: "ERROR"
128
+ },
129
+ input: {
130
+ error_message: error.message,
131
+ backtrace: error.backtrace&.first(10)
132
+ }.to_json
133
+ )
134
+ error_span.finalize(output: { error_captured: true }.to_json)
135
+
136
+ moderation_obs&.update(status_message: "FAILED") rescue nil
137
+
138
+ Rails.logger.error "[Observability] Moderation error captured: #{error.class.name} - #{error.message}"
139
+ end
140
+ end
141
+ end
@@ -82,13 +82,14 @@ module Observ
82
82
  # @return [Boolean] true if successful
83
83
  def invalidate_cache(name:, version: nil)
84
84
  keys = if version
85
- [ cache_key(name: name, version: version) ]
85
+ [cache_key(name: name, version: version)]
86
86
  else
87
87
  # Invalidate all state-based keys for this prompt
88
- [ :draft, :production, :archived ].map { |state| cache_key(name: name, state: state) }
88
+ [:draft, :production, :archived].map { |state| cache_key(name: name, state: state) }
89
89
  end
90
90
 
91
91
  keys.each { |key| Rails.cache.delete(key) }
92
+ bump_cache_stamp(name: name)
92
93
  Rails.logger.info("Cache invalidated for #{name}#{version ? " v#{version}" : ""}")
93
94
 
94
95
  true
@@ -121,6 +122,18 @@ module Observ
121
122
  results
122
123
  end
123
124
 
125
+ def cache_stamp_key(name:)
126
+ "#{Observ.config.prompt_cache_namespace}:#{name}:stamp"
127
+ end
128
+
129
+ def cache_stamp(name:)
130
+ Rails.cache.read(cache_stamp_key(name: name))
131
+ end
132
+
133
+ def bump_cache_stamp(name:)
134
+ Rails.cache.write(cache_stamp_key(name: name), Time.current.to_f)
135
+ end
136
+
124
137
  # Get list of critical prompts (prompts used by agents)
125
138
  # @return [Array<String>] Array of prompt names
126
139
  def critical_prompt_names