rubyllm-observ 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (209) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +778 -0
  3. data/Rakefile +49 -0
  4. data/app/assets/javascripts/observ/application.js +12 -0
  5. data/app/assets/javascripts/observ/controllers/autoscroll_controller.js +33 -0
  6. data/app/assets/javascripts/observ/controllers/chat_form_controller.js +93 -0
  7. data/app/assets/javascripts/observ/controllers/copy_controller.js +43 -0
  8. data/app/assets/javascripts/observ/controllers/dashboard_controller.js +58 -0
  9. data/app/assets/javascripts/observ/controllers/drawer_controller.js +58 -0
  10. data/app/assets/javascripts/observ/controllers/expandable_controller.js +33 -0
  11. data/app/assets/javascripts/observ/controllers/filter_controller.js +36 -0
  12. data/app/assets/javascripts/observ/controllers/index.js +52 -0
  13. data/app/assets/javascripts/observ/controllers/json_viewer_controller.js +260 -0
  14. data/app/assets/javascripts/observ/controllers/message_form_controller.js +58 -0
  15. data/app/assets/javascripts/observ/controllers/prompt_variables_controller.js +64 -0
  16. data/app/assets/javascripts/observ/controllers/text_select_controller.js +14 -0
  17. data/app/assets/stylesheets/observ/_annotations.scss +127 -0
  18. data/app/assets/stylesheets/observ/_card.scss +52 -0
  19. data/app/assets/stylesheets/observ/_chat.scss +156 -0
  20. data/app/assets/stylesheets/observ/_components.scss +460 -0
  21. data/app/assets/stylesheets/observ/_dashboard.scss +40 -0
  22. data/app/assets/stylesheets/observ/_datasets.scss +697 -0
  23. data/app/assets/stylesheets/observ/_drawer.scss +273 -0
  24. data/app/assets/stylesheets/observ/_json_viewer.scss +120 -0
  25. data/app/assets/stylesheets/observ/_layout.scss +256 -0
  26. data/app/assets/stylesheets/observ/_metrics.scss +99 -0
  27. data/app/assets/stylesheets/observ/_observations.scss +160 -0
  28. data/app/assets/stylesheets/observ/_pagination.scss +143 -0
  29. data/app/assets/stylesheets/observ/_prompts.scss +365 -0
  30. data/app/assets/stylesheets/observ/_table.scss +53 -0
  31. data/app/assets/stylesheets/observ/_variables.scss +53 -0
  32. data/app/assets/stylesheets/observ/application.scss +15 -0
  33. data/app/controllers/observ/annotations_controller.rb +144 -0
  34. data/app/controllers/observ/application_controller.rb +8 -0
  35. data/app/controllers/observ/chats_controller.rb +58 -0
  36. data/app/controllers/observ/dashboard_controller.rb +159 -0
  37. data/app/controllers/observ/dataset_items_controller.rb +85 -0
  38. data/app/controllers/observ/dataset_run_items_controller.rb +84 -0
  39. data/app/controllers/observ/dataset_runs_controller.rb +110 -0
  40. data/app/controllers/observ/datasets_controller.rb +74 -0
  41. data/app/controllers/observ/messages_controller.rb +26 -0
  42. data/app/controllers/observ/observations_controller.rb +59 -0
  43. data/app/controllers/observ/prompt_versions_controller.rb +148 -0
  44. data/app/controllers/observ/prompts_controller.rb +205 -0
  45. data/app/controllers/observ/sessions_controller.rb +45 -0
  46. data/app/controllers/observ/traces_controller.rb +86 -0
  47. data/app/forms/observ/prompt_form.rb +96 -0
  48. data/app/helpers/observ/application_helper.rb +9 -0
  49. data/app/helpers/observ/chats_helper.rb +47 -0
  50. data/app/helpers/observ/dashboard_helper.rb +154 -0
  51. data/app/helpers/observ/datasets_helper.rb +62 -0
  52. data/app/helpers/observ/pagination_helper.rb +38 -0
  53. data/app/jobs/observ/application_job.rb +4 -0
  54. data/app/jobs/observ/dataset_runner_job.rb +49 -0
  55. data/app/mailers/observ/application_mailer.rb +6 -0
  56. data/app/models/concerns/observ/agent_phaseable.rb +124 -0
  57. data/app/models/concerns/observ/agent_selectable.rb +50 -0
  58. data/app/models/concerns/observ/chat_enhancements.rb +109 -0
  59. data/app/models/concerns/observ/message_enhancements.rb +31 -0
  60. data/app/models/concerns/observ/observability_instrumentation.rb +124 -0
  61. data/app/models/concerns/observ/prompt_management.rb +320 -0
  62. data/app/models/concerns/observ/trace_association.rb +9 -0
  63. data/app/models/observ/annotation.rb +23 -0
  64. data/app/models/observ/application_record.rb +5 -0
  65. data/app/models/observ/dataset.rb +51 -0
  66. data/app/models/observ/dataset_item.rb +41 -0
  67. data/app/models/observ/dataset_run.rb +104 -0
  68. data/app/models/observ/dataset_run_item.rb +111 -0
  69. data/app/models/observ/generation.rb +56 -0
  70. data/app/models/observ/null_prompt.rb +59 -0
  71. data/app/models/observ/observation.rb +38 -0
  72. data/app/models/observ/prompt.rb +315 -0
  73. data/app/models/observ/score.rb +51 -0
  74. data/app/models/observ/session.rb +131 -0
  75. data/app/models/observ/span.rb +13 -0
  76. data/app/models/observ/trace.rb +135 -0
  77. data/app/presenters/observ/agent_select_presenter.rb +59 -0
  78. data/app/services/observ/agent_executor_service.rb +174 -0
  79. data/app/services/observ/agent_provider.rb +60 -0
  80. data/app/services/observ/agent_selection_service.rb +53 -0
  81. data/app/services/observ/chat_instrumenter.rb +523 -0
  82. data/app/services/observ/dataset_runner_service.rb +153 -0
  83. data/app/services/observ/evaluator_runner_service.rb +58 -0
  84. data/app/services/observ/evaluators/base_evaluator.rb +51 -0
  85. data/app/services/observ/evaluators/contains_evaluator.rb +53 -0
  86. data/app/services/observ/evaluators/exact_match_evaluator.rb +23 -0
  87. data/app/services/observ/evaluators/json_structure_evaluator.rb +44 -0
  88. data/app/services/observ/prompt_manager/cache_statistics.rb +82 -0
  89. data/app/services/observ/prompt_manager/caching.rb +167 -0
  90. data/app/services/observ/prompt_manager/comparison.rb +49 -0
  91. data/app/services/observ/prompt_manager/version_management.rb +96 -0
  92. data/app/services/observ/prompt_manager.rb +40 -0
  93. data/app/services/observ/trace_text_formatter.rb +349 -0
  94. data/app/validators/observ/prompt_config_validator.rb +187 -0
  95. data/app/views/kaminari/_first_page.html.erb +11 -0
  96. data/app/views/kaminari/_gap.html.erb +8 -0
  97. data/app/views/kaminari/_last_page.html.erb +11 -0
  98. data/app/views/kaminari/_next_page.html.erb +11 -0
  99. data/app/views/kaminari/_page.html.erb +12 -0
  100. data/app/views/kaminari/_paginator.html.erb +25 -0
  101. data/app/views/kaminari/_prev_page.html.erb +11 -0
  102. data/app/views/kaminari/observ/_first_page.html.erb +11 -0
  103. data/app/views/kaminari/observ/_gap.html.erb +8 -0
  104. data/app/views/kaminari/observ/_last_page.html.erb +11 -0
  105. data/app/views/kaminari/observ/_next_page.html.erb +11 -0
  106. data/app/views/kaminari/observ/_page.html.erb +12 -0
  107. data/app/views/kaminari/observ/_paginator.html.erb +25 -0
  108. data/app/views/kaminari/observ/_prev_page.html.erb +11 -0
  109. data/app/views/layouts/observ/application.html.erb +88 -0
  110. data/app/views/observ/annotations/_annotation.html.erb +13 -0
  111. data/app/views/observ/annotations/_form.html.erb +28 -0
  112. data/app/views/observ/annotations/index.html.erb +28 -0
  113. data/app/views/observ/annotations/sessions_index.html.erb +48 -0
  114. data/app/views/observ/annotations/traces_index.html.erb +48 -0
  115. data/app/views/observ/chats/_form.html.erb +45 -0
  116. data/app/views/observ/chats/index.html.erb +67 -0
  117. data/app/views/observ/chats/new.html.erb +17 -0
  118. data/app/views/observ/chats/show.html.erb +34 -0
  119. data/app/views/observ/dashboard/index.html.erb +236 -0
  120. data/app/views/observ/dataset_items/_form.html.erb +49 -0
  121. data/app/views/observ/dataset_items/edit.html.erb +18 -0
  122. data/app/views/observ/dataset_items/index.html.erb +95 -0
  123. data/app/views/observ/dataset_items/new.html.erb +18 -0
  124. data/app/views/observ/dataset_run_items/_score_close_drawer.html.erb +4 -0
  125. data/app/views/observ/dataset_run_items/_score_drawer.html.erb +75 -0
  126. data/app/views/observ/dataset_run_items/_score_success.html.erb +29 -0
  127. data/app/views/observ/dataset_run_items/_scores_cell.html.erb +19 -0
  128. data/app/views/observ/dataset_run_items/details_drawer.turbo_stream.erb +80 -0
  129. data/app/views/observ/dataset_run_items/score_drawer.turbo_stream.erb +7 -0
  130. data/app/views/observ/dataset_runs/index.html.erb +108 -0
  131. data/app/views/observ/dataset_runs/new.html.erb +57 -0
  132. data/app/views/observ/dataset_runs/review.html.erb +155 -0
  133. data/app/views/observ/dataset_runs/show.html.erb +166 -0
  134. data/app/views/observ/datasets/_form.html.erb +62 -0
  135. data/app/views/observ/datasets/_items_tab.html.erb +66 -0
  136. data/app/views/observ/datasets/_runs_tab.html.erb +82 -0
  137. data/app/views/observ/datasets/edit.html.erb +32 -0
  138. data/app/views/observ/datasets/index.html.erb +105 -0
  139. data/app/views/observ/datasets/new.html.erb +18 -0
  140. data/app/views/observ/datasets/show.html.erb +67 -0
  141. data/app/views/observ/messages/_content.html.erb +1 -0
  142. data/app/views/observ/messages/_form.html.erb +33 -0
  143. data/app/views/observ/messages/_message.html.erb +14 -0
  144. data/app/views/observ/messages/_tool_calls.html.erb +10 -0
  145. data/app/views/observ/messages/create.turbo_stream.erb +9 -0
  146. data/app/views/observ/observations/index.html.erb +97 -0
  147. data/app/views/observ/observations/show_generation.html.erb +195 -0
  148. data/app/views/observ/observations/show_span.html.erb +93 -0
  149. data/app/views/observ/prompts/_diff_content.html.erb +16 -0
  150. data/app/views/observ/prompts/_form.html.erb +111 -0
  151. data/app/views/observ/prompts/_new_form.html.erb +102 -0
  152. data/app/views/observ/prompts/_prompt_actions.html.erb +4 -0
  153. data/app/views/observ/prompts/_prompt_content_highlighted.html.erb +4 -0
  154. data/app/views/observ/prompts/_version_actions.html.erb +40 -0
  155. data/app/views/observ/prompts/compare.html.erb +155 -0
  156. data/app/views/observ/prompts/edit.html.erb +17 -0
  157. data/app/views/observ/prompts/index.html.erb +108 -0
  158. data/app/views/observ/prompts/new.html.erb +17 -0
  159. data/app/views/observ/prompts/show.html.erb +138 -0
  160. data/app/views/observ/prompts/versions.html.erb +87 -0
  161. data/app/views/observ/sessions/annotations_drawer.turbo_stream.erb +25 -0
  162. data/app/views/observ/sessions/drawer_test.turbo_stream.erb +49 -0
  163. data/app/views/observ/sessions/index.html.erb +91 -0
  164. data/app/views/observ/sessions/show.html.erb +251 -0
  165. data/app/views/observ/traces/add_to_dataset_drawer.turbo_stream.erb +48 -0
  166. data/app/views/observ/traces/annotations_drawer.turbo_stream.erb +25 -0
  167. data/app/views/observ/traces/index.html.erb +87 -0
  168. data/app/views/observ/traces/show.html.erb +285 -0
  169. data/app/views/observ/traces/text_output_drawer.turbo_stream.erb +48 -0
  170. data/app/views/shared/_drawer.html.erb +26 -0
  171. data/config/routes.rb +80 -0
  172. data/db/migrate/001_create_observ_sessions.rb +21 -0
  173. data/db/migrate/002_create_observ_traces.rb +25 -0
  174. data/db/migrate/003_create_observ_observations.rb +42 -0
  175. data/db/migrate/004_add_message_id_to_observ_traces.rb +7 -0
  176. data/db/migrate/005_create_observ_prompts.rb +21 -0
  177. data/db/migrate/006_fix_prompt_config_strings.rb +23 -0
  178. data/db/migrate/007_create_observ_annotations.rb +12 -0
  179. data/db/migrate/009_add_prompt_fields_to_observ_chats.rb +11 -0
  180. data/db/migrate/010_create_observ_datasets.rb +15 -0
  181. data/db/migrate/011_create_observ_dataset_items.rb +17 -0
  182. data/db/migrate/012_create_observ_dataset_runs.rb +22 -0
  183. data/db/migrate/013_create_observ_dataset_run_items.rb +16 -0
  184. data/db/migrate/014_create_observ_scores.rb +26 -0
  185. data/lib/generators/observ/add_phase_tracking/add_phase_tracking_generator.rb +150 -0
  186. data/lib/generators/observ/add_phase_tracking/templates/migration.rb.tt +6 -0
  187. data/lib/generators/observ/install/USAGE +27 -0
  188. data/lib/generators/observ/install/install_generator.rb +270 -0
  189. data/lib/generators/observ/install_chat/install_chat_generator.rb +313 -0
  190. data/lib/generators/observ/install_chat/templates/agents/base_agent.rb.tt +147 -0
  191. data/lib/generators/observ/install_chat/templates/agents/simple_agent.rb.tt +55 -0
  192. data/lib/generators/observ/install_chat/templates/concerns/observ_chat_enhancements.rb.tt +34 -0
  193. data/lib/generators/observ/install_chat/templates/concerns/observ_message_enhancements.rb.tt +18 -0
  194. data/lib/generators/observ/install_chat/templates/initializers/observability.rb.tt +20 -0
  195. data/lib/generators/observ/install_chat/templates/jobs/chat_response_job.rb.tt +56 -0
  196. data/lib/generators/observ/install_chat/templates/migrations/add_agent_class_name.rb.tt +6 -0
  197. data/lib/generators/observ/install_chat/templates/migrations/add_observability_session_id.rb.tt +6 -0
  198. data/lib/generators/observ/install_chat/templates/tools/think_tool.rb.tt +29 -0
  199. data/lib/generators/observ/install_chat/templates/views/messages/_content.html.erb.tt +1 -0
  200. data/lib/observ/asset_installer.rb +130 -0
  201. data/lib/observ/asset_syncer.rb +104 -0
  202. data/lib/observ/configuration.rb +108 -0
  203. data/lib/observ/engine.rb +50 -0
  204. data/lib/observ/index_file_generator.rb +142 -0
  205. data/lib/observ/instrumenter/ruby_llm.rb +6 -0
  206. data/lib/observ/version.rb +3 -0
  207. data/lib/observ.rb +29 -0
  208. data/lib/tasks/observ_tasks.rake +75 -0
  209. metadata +453 -0
@@ -0,0 +1,124 @@
1
+ module Observ
2
+ module ObservabilityInstrumentation
3
+ extend ActiveSupport::Concern
4
+
5
+ included do
6
+ belongs_to :observ_session, class_name: "Observ::Session", foreign_key: :observability_session_id,
7
+ primary_key: :session_id, optional: true
8
+
9
+ after_create :initialize_observability_session
10
+ after_find :ensure_instrumented_if_needed
11
+
12
+ attr_accessor :instrumenter
13
+ end
14
+
15
+ def ask_with_observability(message, **options)
16
+ ensure_instrumented!
17
+ ask(message, **options)
18
+ end
19
+
20
+ def complete_with_observability(&block)
21
+ ensure_instrumented!
22
+ complete(&block)
23
+ end
24
+
25
+ def update_observability_context(new_context)
26
+ return unless observ_session && @instrumenter
27
+
28
+ observ_session.update_metadata(new_context)
29
+ @instrumenter.instance_variable_get(:@context).merge!(new_context)
30
+ end
31
+
32
+ def finalize_observability_session
33
+ return unless observ_session
34
+
35
+ observ_session.finalize
36
+ Rails.logger.info "[Observability] Session finalized: #{observ_session.session_id}"
37
+ end
38
+
39
+ private
40
+
41
+ # Hook method for building observability session metadata
42
+ # Override this in your model or concerns to add custom metadata
43
+ # @return [Hash] metadata hash
44
+ def observability_metadata
45
+ {
46
+ agent_type: agent_class_name || "standard",
47
+ chat_id: id
48
+ }
49
+ end
50
+
51
+ # Hook method for building observability context
52
+ # Override this in your model or concerns to add custom context
53
+ # @return [Hash] context hash
54
+ def observability_context
55
+ context = {
56
+ agent_type: agent_class_name || "standard",
57
+ chat_id: id
58
+ }
59
+
60
+ # Include agent_class if available for prompt metadata extraction
61
+ if respond_to?(:agent_class)
62
+ context[:agent_class] = agent_class
63
+ end
64
+
65
+ # Include prompt version override if specified
66
+ if respond_to?(:prompt_version) && prompt_version.present?
67
+ context[:prompt_version_override] = prompt_version
68
+ end
69
+
70
+ context
71
+ end
72
+
73
+ def initialize_observability_session
74
+ return unless Rails.configuration.observability.enabled
75
+
76
+ session = Observ::Session.create!(
77
+ user_id: "chat_#{id}",
78
+ metadata: observability_metadata
79
+ )
80
+
81
+ update_column(:observability_session_id, session.session_id)
82
+
83
+ instrument_rubyllm_chat if Rails.configuration.observability.auto_instrument_chats
84
+ rescue StandardError => e
85
+ Rails.logger.error "[Observability] Failed to initialize session: #{e.message}"
86
+ end
87
+
88
+ def instrument_rubyllm_chat
89
+ return unless observ_session
90
+ return if @instrumenter
91
+
92
+ @instrumenter = Observ::ChatInstrumenter.new(
93
+ observ_session,
94
+ self,
95
+ context: observability_context
96
+ )
97
+ @instrumenter.instrument!
98
+
99
+ rescue StandardError => e
100
+ Rails.logger.error "[Observability] Failed to instrument chat: #{e.message}"
101
+ end
102
+
103
+ def ensure_instrumented!
104
+ return if @instrumenter
105
+
106
+ reload_observ_session if observability_session_id && !observ_session
107
+ instrument_rubyllm_chat if observ_session
108
+ end
109
+
110
+ def reload_observ_session
111
+ self.observ_session = Observ::Session.find_by(session_id: observability_session_id)
112
+ end
113
+
114
+ def ensure_instrumented_if_needed
115
+ return unless Rails.configuration.observability.enabled
116
+ return if @instrumenter
117
+ return unless observability_session_id
118
+
119
+ ensure_instrumented!
120
+ rescue StandardError => e
121
+ Rails.logger.error "[Observability] Failed to auto-instrument on find: #{e.message}"
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,320 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Observ
4
+ # Concern for agents that want to use the prompt management system
5
+ # Provides functionality to fetch prompts from the database with fallback support,
6
+ # caching, variable interpolation, and model configuration from prompt metadata.
7
+ #
8
+ # Usage:
9
+ # class MyAgent < BaseAgent
10
+ # include Observ::PromptManagement
11
+ #
12
+ # FALLBACK_PROMPT = "You are a helpful assistant."
13
+ #
14
+ # use_prompt_management(
15
+ # prompt_name: "my-agent-system-prompt",
16
+ # fallback: FALLBACK_PROMPT
17
+ # )
18
+ #
19
+ # def self.default_model
20
+ # "gpt-4.1-nano"
21
+ # end
22
+ # end
23
+ module PromptManagement
24
+ extend ActiveSupport::Concern
25
+
26
+ included do
27
+ class_attribute :prompt_config, default: {}
28
+ # Cache the fetched prompt template object for metadata access
29
+ class_attribute :cached_prompt_template, default: nil
30
+ # Instance variable for version override (set per instance, not shared across class)
31
+ attr_accessor :prompt_version_override
32
+ end
33
+
34
+ class_methods do
35
+ # Enable/disable prompt management per agent
36
+ def use_prompt_management(enabled: true, prompt_name: nil, fallback: nil)
37
+ self.prompt_config = {
38
+ enabled: enabled,
39
+ prompt_name: prompt_name || default_prompt_name,
40
+ fallback: fallback || default_fallback_prompt
41
+ }
42
+ end
43
+
44
+ # Default prompt name based on agent class name
45
+ def default_prompt_name
46
+ name.underscore.tr("_/", "--") + "-system-prompt"
47
+ # Example: ResearchAgent => 'research-agent-system-prompt'
48
+ end
49
+
50
+ # Override in subclasses if needed
51
+ def default_fallback_prompt
52
+ "You are a helpful AI assistant."
53
+ end
54
+
55
+ # Check if prompt management is enabled for this agent
56
+ def prompt_management_enabled?
57
+ Observ.config.prompt_management_enabled &&
58
+ prompt_config[:enabled] != false
59
+ end
60
+
61
+ # Fetch prompt with fallback
62
+ # Supports version override via thread-local storage or parameter
63
+ def fetch_prompt(variables: {}, version: nil)
64
+ # Ensure defaults if prompt_config was never initialized
65
+ config = prompt_config.presence || {}
66
+ fallback = config[:fallback] || default_fallback_prompt
67
+ prompt_name = config[:prompt_name] || default_prompt_name
68
+
69
+ return fallback unless prompt_management_enabled?
70
+
71
+ start_time = Time.current
72
+
73
+ begin
74
+ # Check for version override from thread-local storage or parameter
75
+ version_to_use = Thread.current[:observ_prompt_version_override] || version
76
+
77
+ # Fetch prompt with version or state
78
+ if version_to_use.present?
79
+ prompt_template = Observ::PromptManager.fetch(
80
+ name: prompt_name,
81
+ version: version_to_use,
82
+ fallback: fallback
83
+ )
84
+ else
85
+ prompt_template = Observ::PromptManager.fetch(
86
+ name: prompt_name,
87
+ state: :production,
88
+ fallback: fallback
89
+ )
90
+ end
91
+
92
+ # Cache the template for metadata access
93
+ @_prompt_template = prompt_template
94
+
95
+ # Log fetch result
96
+ duration_ms = ((Time.current - start_time) * 1000).round(2)
97
+ if prompt_template.version
98
+ version_info = version_to_use.present? ? "(version: #{prompt_template.version})" : "(production, version: #{prompt_template.version})"
99
+ Rails.logger.info(
100
+ "Prompt fetched for #{name}: #{prompt_name} " \
101
+ "#{version_info}, " \
102
+ "duration: #{duration_ms}ms)"
103
+ )
104
+ else
105
+ Rails.logger.info(
106
+ "Using fallback prompt for #{name}: prompt '#{prompt_name}' not found " \
107
+ "(duration: #{duration_ms}ms)"
108
+ )
109
+ end
110
+
111
+ # Compile with variables (works for both Prompt and NullPrompt)
112
+ if variables.any?
113
+ prompt_template.compile(variables)
114
+ else
115
+ prompt_template.prompt
116
+ end
117
+ rescue => e
118
+ Rails.logger.error(
119
+ "Failed to fetch prompt for #{name}: #{e.message}\n" \
120
+ "#{e.backtrace.first(5).join("\n")}"
121
+ )
122
+ # Clear cached template on error
123
+ @_prompt_template = nil
124
+ fallback
125
+ end
126
+ end
127
+
128
+ # Override this in subclasses to provide dynamic variables
129
+ def prompt_variables
130
+ {
131
+ current_date: Time.current.strftime("%B %d, %Y"),
132
+ current_time: Time.current.strftime("%I:%M %p %Z")
133
+ }
134
+ end
135
+
136
+ # Override system_prompt to use prompt management
137
+ def system_prompt
138
+ @_system_prompt ||= fetch_prompt(variables: prompt_variables)
139
+ end
140
+
141
+ # Get the cached prompt template (with name and version)
142
+ def current_prompt_template
143
+ @_prompt_template
144
+ end
145
+
146
+ # Get prompt metadata for observability
147
+ # @return [Hash] Hash with :prompt_name and :prompt_version keys
148
+ def prompt_metadata
149
+ template = current_prompt_template
150
+ return {} unless template
151
+
152
+ {
153
+ prompt_name: template.respond_to?(:name) ? template.name : nil,
154
+ prompt_version: template.respond_to?(:version) ? template.version : nil
155
+ }.compact
156
+ end
157
+
158
+ # Clear cached prompt (useful for tests or when prompt is updated)
159
+ def reset_prompt_cache!
160
+ @_system_prompt = nil
161
+ @_prompt_template = nil
162
+ end
163
+
164
+ # Override model to check prompt metadata first
165
+ # Priority:
166
+ # 1. Prompt metadata (config['model']) - if prompt management is enabled
167
+ # 2. Agent's default_model - fallback
168
+ def model
169
+ # Check if prompt has model in metadata
170
+ if prompt_management_enabled?
171
+ model_from_prompt = fetch_model_from_prompt
172
+ return model_from_prompt if model_from_prompt.present?
173
+ end
174
+
175
+ # Fallback to agent's default
176
+ default_model
177
+ end
178
+
179
+ # Fetch model from prompt metadata
180
+ # @return [String, nil] The model from prompt config, or nil
181
+ def fetch_model_from_prompt(version: nil)
182
+ return nil unless prompt_management_enabled?
183
+
184
+ config = prompt_config.presence || {}
185
+ fallback = config[:fallback] || default_fallback_prompt
186
+ prompt_name = config[:prompt_name] || default_prompt_name
187
+
188
+ begin
189
+ # Check for version override from thread-local storage or parameter
190
+ version_to_use = Thread.current[:observ_prompt_version_override] || version
191
+
192
+ # Fetch prompt with version or state
193
+ if version_to_use.present?
194
+ prompt_template = Observ::PromptManager.fetch(
195
+ name: prompt_name,
196
+ version: version_to_use,
197
+ fallback: fallback
198
+ )
199
+ else
200
+ prompt_template = Observ::PromptManager.fetch(
201
+ name: prompt_name,
202
+ state: :production,
203
+ fallback: fallback
204
+ )
205
+ end
206
+
207
+ # If we got a real Prompt object (not NullPrompt), check its config
208
+ if prompt_template.respond_to?(:config) && prompt_template.config.is_a?(Hash)
209
+ prompt_template.config["model"]
210
+ end
211
+ rescue => e
212
+ Rails.logger.debug(
213
+ "Could not fetch model from prompt #{prompt_name}: #{e.message}"
214
+ )
215
+ nil
216
+ end
217
+ end
218
+
219
+ # Override model_parameters to check prompt metadata first
220
+ # Priority:
221
+ # 1. Prompt metadata (config['temperature'], config['max_tokens'], etc.)
222
+ # 2. Agent's default_model_parameters - fallback
223
+ # @return [Hash] The model parameters to use
224
+ def model_parameters
225
+ # Check if prompt has parameters in metadata
226
+ if prompt_management_enabled?
227
+ params_from_prompt = fetch_model_parameters_from_prompt
228
+ return params_from_prompt if params_from_prompt.present?
229
+ end
230
+
231
+ # Fallback to agent's defaults
232
+ default_model_parameters
233
+ end
234
+
235
+ # Fetch model parameters from prompt metadata
236
+ # @return [Hash] The model parameters from prompt config
237
+ def fetch_model_parameters_from_prompt(version: nil)
238
+ return {} unless prompt_management_enabled?
239
+
240
+ config = prompt_config.presence || {}
241
+ fallback = config[:fallback] || default_fallback_prompt
242
+ prompt_name = config[:prompt_name] || default_prompt_name
243
+
244
+ begin
245
+ # Check for version override from thread-local storage or parameter
246
+ version_to_use = Thread.current[:observ_prompt_version_override] || version
247
+
248
+ # Fetch prompt with version or state
249
+ if version_to_use.present?
250
+ prompt_template = Observ::PromptManager.fetch(
251
+ name: prompt_name,
252
+ version: version_to_use,
253
+ fallback: fallback
254
+ )
255
+ else
256
+ prompt_template = Observ::PromptManager.fetch(
257
+ name: prompt_name,
258
+ state: :production,
259
+ fallback: fallback
260
+ )
261
+ end
262
+
263
+ # If we got a real Prompt object (not NullPrompt), extract parameters
264
+ if prompt_template.respond_to?(:config) && prompt_template.config.is_a?(Hash)
265
+ extract_llm_parameters(prompt_template.config)
266
+ else
267
+ {}
268
+ end
269
+ rescue => e
270
+ Rails.logger.debug(
271
+ "Could not fetch parameters from prompt #{prompt_name}: #{e.message}"
272
+ )
273
+ {}
274
+ end
275
+ end
276
+
277
+ private
278
+
279
+ # Extract LLM parameters from config hash
280
+ # @param config [Hash] The prompt config
281
+ # @return [Hash] Extracted parameters (temperature, max_tokens, etc.)
282
+ def extract_llm_parameters(config)
283
+ params = config.slice(
284
+ "temperature",
285
+ "max_tokens",
286
+ "top_p",
287
+ "frequency_penalty",
288
+ "presence_penalty",
289
+ "stop",
290
+ "response_format",
291
+ "seed"
292
+ ).transform_keys(&:to_sym).compact
293
+
294
+ # Convert string numbers to proper types (JSON returns strings)
295
+ params.transform_values do |value|
296
+ convert_to_numeric_if_needed(value)
297
+ end
298
+ end
299
+
300
+ # Convert string numbers to proper numeric types
301
+ # @param value [Object] The value to convert
302
+ # @return [Object] Converted value (or original if not a numeric string)
303
+ def convert_to_numeric_if_needed(value)
304
+ case value
305
+ when String
306
+ # Check if it's a numeric string (integer or float)
307
+ if value.match?(/\A-?\d+\.\d+\z/)
308
+ value.to_f
309
+ elsif value.match?(/\A-?\d+\z/)
310
+ value.to_i
311
+ else
312
+ value
313
+ end
314
+ else
315
+ value
316
+ end
317
+ end
318
+ end
319
+ end
320
+ end
@@ -0,0 +1,9 @@
1
+ module Observ
2
+ module TraceAssociation
3
+ extend ActiveSupport::Concern
4
+
5
+ included do
6
+ has_many :traces, class_name: "Observ::Trace", dependent: :nullify
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Observ
4
+ class Annotation < ApplicationRecord
5
+ belongs_to :annotatable, polymorphic: true
6
+
7
+ validates :content, presence: true
8
+
9
+ # Serialize tags as JSON for SQLite compatibility
10
+ serialize :tags, coder: JSON
11
+
12
+ scope :recent, -> { order(created_at: :desc) }
13
+
14
+ # Ensure tags is always an array
15
+ after_initialize :ensure_tags_array
16
+
17
+ private
18
+
19
+ def ensure_tags_array
20
+ self.tags ||= []
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,5 @@
1
+ module Observ
2
+ class ApplicationRecord < ActiveRecord::Base
3
+ self.abstract_class = true
4
+ end
5
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Observ
4
+ class Dataset < ApplicationRecord
5
+ self.table_name = "observ_datasets"
6
+
7
+ has_many :items, class_name: "Observ::DatasetItem",
8
+ foreign_key: :dataset_id, dependent: :destroy, inverse_of: :dataset
9
+ has_many :runs, class_name: "Observ::DatasetRun",
10
+ foreign_key: :dataset_id, dependent: :destroy, inverse_of: :dataset
11
+
12
+ validates :name, presence: true, uniqueness: true
13
+ validates :agent_class, presence: true
14
+ validate :agent_class_exists, if: -> { agent_class.present? }
15
+
16
+ # Returns the agent class constant
17
+ def agent
18
+ agent_class.constantize
19
+ end
20
+
21
+ # Returns only active items for running evaluations
22
+ def active_items
23
+ items.active
24
+ end
25
+
26
+ # Count helpers for UI
27
+ def items_count
28
+ items.count
29
+ end
30
+
31
+ def active_items_count
32
+ items.active.count
33
+ end
34
+
35
+ def runs_count
36
+ runs.count
37
+ end
38
+
39
+ def last_run
40
+ runs.order(created_at: :desc).first
41
+ end
42
+
43
+ private
44
+
45
+ def agent_class_exists
46
+ agent_class.constantize
47
+ rescue NameError
48
+ errors.add(:agent_class, "must be a valid agent class")
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Observ
4
+ class DatasetItem < ApplicationRecord
5
+ self.table_name = "observ_dataset_items"
6
+
7
+ belongs_to :dataset, class_name: "Observ::Dataset", inverse_of: :items
8
+ belongs_to :source_trace, class_name: "Observ::Trace", optional: true
9
+ has_many :run_items, class_name: "Observ::DatasetRunItem",
10
+ foreign_key: :dataset_item_id, dependent: :destroy, inverse_of: :dataset_item
11
+
12
+ enum :status, { active: 0, archived: 1 }
13
+
14
+ validates :input, presence: true
15
+
16
+ scope :active, -> { where(status: :active) }
17
+ scope :archived, -> { where(status: :archived) }
18
+
19
+ # Preview helpers for UI display
20
+ def input_preview(max_length: 100)
21
+ return nil if input.blank?
22
+ text = input.is_a?(Hash) ? input.to_json : input.to_s
23
+ text.length > max_length ? "#{text[0...max_length]}..." : text
24
+ end
25
+
26
+ def expected_output_preview(max_length: 100)
27
+ return nil if expected_output.blank?
28
+ text = expected_output.is_a?(Hash) ? expected_output.to_json : expected_output.to_s
29
+ text.length > max_length ? "#{text[0...max_length]}..." : text
30
+ end
31
+
32
+ # Check if this item has been run
33
+ def run_count
34
+ run_items.count
35
+ end
36
+
37
+ def last_run_item
38
+ run_items.order(created_at: :desc).first
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Observ
4
+ class DatasetRun < ApplicationRecord
5
+ self.table_name = "observ_dataset_runs"
6
+
7
+ belongs_to :dataset, class_name: "Observ::Dataset", inverse_of: :runs
8
+ has_many :run_items, class_name: "Observ::DatasetRunItem",
9
+ foreign_key: :dataset_run_id, dependent: :destroy, inverse_of: :dataset_run
10
+ has_many :items, through: :run_items, source: :dataset_item
11
+ has_many :scores, through: :run_items
12
+
13
+ enum :status, { pending: 0, running: 1, completed: 2, failed: 3 }
14
+
15
+ validates :name, presence: true, uniqueness: { scope: :dataset_id }
16
+
17
+ # Progress tracking
18
+ def progress_percentage
19
+ return 0 if total_items.zero?
20
+ ((completed_items + failed_items).to_f / total_items * 100).round(1)
21
+ end
22
+
23
+ def finished?
24
+ completed? || failed?
25
+ end
26
+
27
+ def in_progress?
28
+ pending? || running?
29
+ end
30
+
31
+ # Update aggregate metrics from run items
32
+ def update_metrics!
33
+ completed = run_items.where.not(trace_id: nil).where(error: nil).count
34
+ failed = run_items.where.not(error: nil).count
35
+
36
+ # Calculate cost and tokens from associated traces
37
+ trace_ids = run_items.where.not(trace_id: nil).pluck(:trace_id)
38
+ traces = Observ::Trace.where(id: trace_ids)
39
+
40
+ update!(
41
+ completed_items: completed,
42
+ failed_items: failed,
43
+ total_cost: traces.sum(:total_cost) || 0,
44
+ total_tokens: traces.sum(:total_tokens) || 0
45
+ )
46
+ end
47
+
48
+ # Initialize run items for all active dataset items
49
+ def initialize_run_items!
50
+ dataset.active_items.find_each do |item|
51
+ run_items.find_or_create_by!(dataset_item: item)
52
+ end
53
+ update!(total_items: run_items.count)
54
+ end
55
+
56
+ # Summary helpers for UI
57
+ def success_rate
58
+ return 0 if total_items.zero?
59
+ (completed_items.to_f / total_items * 100).round(1)
60
+ end
61
+
62
+ def failure_rate
63
+ return 0 if total_items.zero?
64
+ (failed_items.to_f / total_items * 100).round(1)
65
+ end
66
+
67
+ def pending_items_count
68
+ total_items - completed_items - failed_items
69
+ end
70
+
71
+ def duration_seconds
72
+ return nil unless finished? && run_items.any?
73
+ first_item = run_items.order(created_at: :asc).first
74
+ last_item = run_items.order(updated_at: :desc).first
75
+ (last_item.updated_at - first_item.created_at).round(1)
76
+ end
77
+
78
+ # Score aggregation
79
+ def average_score(name)
80
+ relevant_scores = scores.where(name: name)
81
+ return nil if relevant_scores.empty?
82
+ relevant_scores.average(:value)&.round(4)
83
+ end
84
+
85
+ def score_summary
86
+ scores.group(:name).average(:value).transform_values { |v| v.round(4) }
87
+ end
88
+
89
+ def pass_rate(score_name = nil)
90
+ scope = scores
91
+ scope = scope.where(name: score_name) if score_name
92
+ return nil if scope.empty?
93
+ (scope.where("value >= 0.5").count.to_f / scope.count * 100).round(1)
94
+ end
95
+
96
+ def items_with_scores_count
97
+ run_items.joins(:scores).distinct.count
98
+ end
99
+
100
+ def items_without_scores_count
101
+ total_items - items_with_scores_count
102
+ end
103
+ end
104
+ end