rubyllm-observ 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (209) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +778 -0
  3. data/Rakefile +49 -0
  4. data/app/assets/javascripts/observ/application.js +12 -0
  5. data/app/assets/javascripts/observ/controllers/autoscroll_controller.js +33 -0
  6. data/app/assets/javascripts/observ/controllers/chat_form_controller.js +93 -0
  7. data/app/assets/javascripts/observ/controllers/copy_controller.js +43 -0
  8. data/app/assets/javascripts/observ/controllers/dashboard_controller.js +58 -0
  9. data/app/assets/javascripts/observ/controllers/drawer_controller.js +58 -0
  10. data/app/assets/javascripts/observ/controllers/expandable_controller.js +33 -0
  11. data/app/assets/javascripts/observ/controllers/filter_controller.js +36 -0
  12. data/app/assets/javascripts/observ/controllers/index.js +52 -0
  13. data/app/assets/javascripts/observ/controllers/json_viewer_controller.js +260 -0
  14. data/app/assets/javascripts/observ/controllers/message_form_controller.js +58 -0
  15. data/app/assets/javascripts/observ/controllers/prompt_variables_controller.js +64 -0
  16. data/app/assets/javascripts/observ/controllers/text_select_controller.js +14 -0
  17. data/app/assets/stylesheets/observ/_annotations.scss +127 -0
  18. data/app/assets/stylesheets/observ/_card.scss +52 -0
  19. data/app/assets/stylesheets/observ/_chat.scss +156 -0
  20. data/app/assets/stylesheets/observ/_components.scss +460 -0
  21. data/app/assets/stylesheets/observ/_dashboard.scss +40 -0
  22. data/app/assets/stylesheets/observ/_datasets.scss +697 -0
  23. data/app/assets/stylesheets/observ/_drawer.scss +273 -0
  24. data/app/assets/stylesheets/observ/_json_viewer.scss +120 -0
  25. data/app/assets/stylesheets/observ/_layout.scss +256 -0
  26. data/app/assets/stylesheets/observ/_metrics.scss +99 -0
  27. data/app/assets/stylesheets/observ/_observations.scss +160 -0
  28. data/app/assets/stylesheets/observ/_pagination.scss +143 -0
  29. data/app/assets/stylesheets/observ/_prompts.scss +365 -0
  30. data/app/assets/stylesheets/observ/_table.scss +53 -0
  31. data/app/assets/stylesheets/observ/_variables.scss +53 -0
  32. data/app/assets/stylesheets/observ/application.scss +15 -0
  33. data/app/controllers/observ/annotations_controller.rb +144 -0
  34. data/app/controllers/observ/application_controller.rb +8 -0
  35. data/app/controllers/observ/chats_controller.rb +58 -0
  36. data/app/controllers/observ/dashboard_controller.rb +159 -0
  37. data/app/controllers/observ/dataset_items_controller.rb +85 -0
  38. data/app/controllers/observ/dataset_run_items_controller.rb +84 -0
  39. data/app/controllers/observ/dataset_runs_controller.rb +110 -0
  40. data/app/controllers/observ/datasets_controller.rb +74 -0
  41. data/app/controllers/observ/messages_controller.rb +26 -0
  42. data/app/controllers/observ/observations_controller.rb +59 -0
  43. data/app/controllers/observ/prompt_versions_controller.rb +148 -0
  44. data/app/controllers/observ/prompts_controller.rb +205 -0
  45. data/app/controllers/observ/sessions_controller.rb +45 -0
  46. data/app/controllers/observ/traces_controller.rb +86 -0
  47. data/app/forms/observ/prompt_form.rb +96 -0
  48. data/app/helpers/observ/application_helper.rb +9 -0
  49. data/app/helpers/observ/chats_helper.rb +47 -0
  50. data/app/helpers/observ/dashboard_helper.rb +154 -0
  51. data/app/helpers/observ/datasets_helper.rb +62 -0
  52. data/app/helpers/observ/pagination_helper.rb +38 -0
  53. data/app/jobs/observ/application_job.rb +4 -0
  54. data/app/jobs/observ/dataset_runner_job.rb +49 -0
  55. data/app/mailers/observ/application_mailer.rb +6 -0
  56. data/app/models/concerns/observ/agent_phaseable.rb +124 -0
  57. data/app/models/concerns/observ/agent_selectable.rb +50 -0
  58. data/app/models/concerns/observ/chat_enhancements.rb +109 -0
  59. data/app/models/concerns/observ/message_enhancements.rb +31 -0
  60. data/app/models/concerns/observ/observability_instrumentation.rb +124 -0
  61. data/app/models/concerns/observ/prompt_management.rb +320 -0
  62. data/app/models/concerns/observ/trace_association.rb +9 -0
  63. data/app/models/observ/annotation.rb +23 -0
  64. data/app/models/observ/application_record.rb +5 -0
  65. data/app/models/observ/dataset.rb +51 -0
  66. data/app/models/observ/dataset_item.rb +41 -0
  67. data/app/models/observ/dataset_run.rb +104 -0
  68. data/app/models/observ/dataset_run_item.rb +111 -0
  69. data/app/models/observ/generation.rb +56 -0
  70. data/app/models/observ/null_prompt.rb +59 -0
  71. data/app/models/observ/observation.rb +38 -0
  72. data/app/models/observ/prompt.rb +315 -0
  73. data/app/models/observ/score.rb +51 -0
  74. data/app/models/observ/session.rb +131 -0
  75. data/app/models/observ/span.rb +13 -0
  76. data/app/models/observ/trace.rb +135 -0
  77. data/app/presenters/observ/agent_select_presenter.rb +59 -0
  78. data/app/services/observ/agent_executor_service.rb +174 -0
  79. data/app/services/observ/agent_provider.rb +60 -0
  80. data/app/services/observ/agent_selection_service.rb +53 -0
  81. data/app/services/observ/chat_instrumenter.rb +523 -0
  82. data/app/services/observ/dataset_runner_service.rb +153 -0
  83. data/app/services/observ/evaluator_runner_service.rb +58 -0
  84. data/app/services/observ/evaluators/base_evaluator.rb +51 -0
  85. data/app/services/observ/evaluators/contains_evaluator.rb +53 -0
  86. data/app/services/observ/evaluators/exact_match_evaluator.rb +23 -0
  87. data/app/services/observ/evaluators/json_structure_evaluator.rb +44 -0
  88. data/app/services/observ/prompt_manager/cache_statistics.rb +82 -0
  89. data/app/services/observ/prompt_manager/caching.rb +167 -0
  90. data/app/services/observ/prompt_manager/comparison.rb +49 -0
  91. data/app/services/observ/prompt_manager/version_management.rb +96 -0
  92. data/app/services/observ/prompt_manager.rb +40 -0
  93. data/app/services/observ/trace_text_formatter.rb +349 -0
  94. data/app/validators/observ/prompt_config_validator.rb +187 -0
  95. data/app/views/kaminari/_first_page.html.erb +11 -0
  96. data/app/views/kaminari/_gap.html.erb +8 -0
  97. data/app/views/kaminari/_last_page.html.erb +11 -0
  98. data/app/views/kaminari/_next_page.html.erb +11 -0
  99. data/app/views/kaminari/_page.html.erb +12 -0
  100. data/app/views/kaminari/_paginator.html.erb +25 -0
  101. data/app/views/kaminari/_prev_page.html.erb +11 -0
  102. data/app/views/kaminari/observ/_first_page.html.erb +11 -0
  103. data/app/views/kaminari/observ/_gap.html.erb +8 -0
  104. data/app/views/kaminari/observ/_last_page.html.erb +11 -0
  105. data/app/views/kaminari/observ/_next_page.html.erb +11 -0
  106. data/app/views/kaminari/observ/_page.html.erb +12 -0
  107. data/app/views/kaminari/observ/_paginator.html.erb +25 -0
  108. data/app/views/kaminari/observ/_prev_page.html.erb +11 -0
  109. data/app/views/layouts/observ/application.html.erb +88 -0
  110. data/app/views/observ/annotations/_annotation.html.erb +13 -0
  111. data/app/views/observ/annotations/_form.html.erb +28 -0
  112. data/app/views/observ/annotations/index.html.erb +28 -0
  113. data/app/views/observ/annotations/sessions_index.html.erb +48 -0
  114. data/app/views/observ/annotations/traces_index.html.erb +48 -0
  115. data/app/views/observ/chats/_form.html.erb +45 -0
  116. data/app/views/observ/chats/index.html.erb +67 -0
  117. data/app/views/observ/chats/new.html.erb +17 -0
  118. data/app/views/observ/chats/show.html.erb +34 -0
  119. data/app/views/observ/dashboard/index.html.erb +236 -0
  120. data/app/views/observ/dataset_items/_form.html.erb +49 -0
  121. data/app/views/observ/dataset_items/edit.html.erb +18 -0
  122. data/app/views/observ/dataset_items/index.html.erb +95 -0
  123. data/app/views/observ/dataset_items/new.html.erb +18 -0
  124. data/app/views/observ/dataset_run_items/_score_close_drawer.html.erb +4 -0
  125. data/app/views/observ/dataset_run_items/_score_drawer.html.erb +75 -0
  126. data/app/views/observ/dataset_run_items/_score_success.html.erb +29 -0
  127. data/app/views/observ/dataset_run_items/_scores_cell.html.erb +19 -0
  128. data/app/views/observ/dataset_run_items/details_drawer.turbo_stream.erb +80 -0
  129. data/app/views/observ/dataset_run_items/score_drawer.turbo_stream.erb +7 -0
  130. data/app/views/observ/dataset_runs/index.html.erb +108 -0
  131. data/app/views/observ/dataset_runs/new.html.erb +57 -0
  132. data/app/views/observ/dataset_runs/review.html.erb +155 -0
  133. data/app/views/observ/dataset_runs/show.html.erb +166 -0
  134. data/app/views/observ/datasets/_form.html.erb +62 -0
  135. data/app/views/observ/datasets/_items_tab.html.erb +66 -0
  136. data/app/views/observ/datasets/_runs_tab.html.erb +82 -0
  137. data/app/views/observ/datasets/edit.html.erb +32 -0
  138. data/app/views/observ/datasets/index.html.erb +105 -0
  139. data/app/views/observ/datasets/new.html.erb +18 -0
  140. data/app/views/observ/datasets/show.html.erb +67 -0
  141. data/app/views/observ/messages/_content.html.erb +1 -0
  142. data/app/views/observ/messages/_form.html.erb +33 -0
  143. data/app/views/observ/messages/_message.html.erb +14 -0
  144. data/app/views/observ/messages/_tool_calls.html.erb +10 -0
  145. data/app/views/observ/messages/create.turbo_stream.erb +9 -0
  146. data/app/views/observ/observations/index.html.erb +97 -0
  147. data/app/views/observ/observations/show_generation.html.erb +195 -0
  148. data/app/views/observ/observations/show_span.html.erb +93 -0
  149. data/app/views/observ/prompts/_diff_content.html.erb +16 -0
  150. data/app/views/observ/prompts/_form.html.erb +111 -0
  151. data/app/views/observ/prompts/_new_form.html.erb +102 -0
  152. data/app/views/observ/prompts/_prompt_actions.html.erb +4 -0
  153. data/app/views/observ/prompts/_prompt_content_highlighted.html.erb +4 -0
  154. data/app/views/observ/prompts/_version_actions.html.erb +40 -0
  155. data/app/views/observ/prompts/compare.html.erb +155 -0
  156. data/app/views/observ/prompts/edit.html.erb +17 -0
  157. data/app/views/observ/prompts/index.html.erb +108 -0
  158. data/app/views/observ/prompts/new.html.erb +17 -0
  159. data/app/views/observ/prompts/show.html.erb +138 -0
  160. data/app/views/observ/prompts/versions.html.erb +87 -0
  161. data/app/views/observ/sessions/annotations_drawer.turbo_stream.erb +25 -0
  162. data/app/views/observ/sessions/drawer_test.turbo_stream.erb +49 -0
  163. data/app/views/observ/sessions/index.html.erb +91 -0
  164. data/app/views/observ/sessions/show.html.erb +251 -0
  165. data/app/views/observ/traces/add_to_dataset_drawer.turbo_stream.erb +48 -0
  166. data/app/views/observ/traces/annotations_drawer.turbo_stream.erb +25 -0
  167. data/app/views/observ/traces/index.html.erb +87 -0
  168. data/app/views/observ/traces/show.html.erb +285 -0
  169. data/app/views/observ/traces/text_output_drawer.turbo_stream.erb +48 -0
  170. data/app/views/shared/_drawer.html.erb +26 -0
  171. data/config/routes.rb +80 -0
  172. data/db/migrate/001_create_observ_sessions.rb +21 -0
  173. data/db/migrate/002_create_observ_traces.rb +25 -0
  174. data/db/migrate/003_create_observ_observations.rb +42 -0
  175. data/db/migrate/004_add_message_id_to_observ_traces.rb +7 -0
  176. data/db/migrate/005_create_observ_prompts.rb +21 -0
  177. data/db/migrate/006_fix_prompt_config_strings.rb +23 -0
  178. data/db/migrate/007_create_observ_annotations.rb +12 -0
  179. data/db/migrate/009_add_prompt_fields_to_observ_chats.rb +11 -0
  180. data/db/migrate/010_create_observ_datasets.rb +15 -0
  181. data/db/migrate/011_create_observ_dataset_items.rb +17 -0
  182. data/db/migrate/012_create_observ_dataset_runs.rb +22 -0
  183. data/db/migrate/013_create_observ_dataset_run_items.rb +16 -0
  184. data/db/migrate/014_create_observ_scores.rb +26 -0
  185. data/lib/generators/observ/add_phase_tracking/add_phase_tracking_generator.rb +150 -0
  186. data/lib/generators/observ/add_phase_tracking/templates/migration.rb.tt +6 -0
  187. data/lib/generators/observ/install/USAGE +27 -0
  188. data/lib/generators/observ/install/install_generator.rb +270 -0
  189. data/lib/generators/observ/install_chat/install_chat_generator.rb +313 -0
  190. data/lib/generators/observ/install_chat/templates/agents/base_agent.rb.tt +147 -0
  191. data/lib/generators/observ/install_chat/templates/agents/simple_agent.rb.tt +55 -0
  192. data/lib/generators/observ/install_chat/templates/concerns/observ_chat_enhancements.rb.tt +34 -0
  193. data/lib/generators/observ/install_chat/templates/concerns/observ_message_enhancements.rb.tt +18 -0
  194. data/lib/generators/observ/install_chat/templates/initializers/observability.rb.tt +20 -0
  195. data/lib/generators/observ/install_chat/templates/jobs/chat_response_job.rb.tt +56 -0
  196. data/lib/generators/observ/install_chat/templates/migrations/add_agent_class_name.rb.tt +6 -0
  197. data/lib/generators/observ/install_chat/templates/migrations/add_observability_session_id.rb.tt +6 -0
  198. data/lib/generators/observ/install_chat/templates/tools/think_tool.rb.tt +29 -0
  199. data/lib/generators/observ/install_chat/templates/views/messages/_content.html.erb.tt +1 -0
  200. data/lib/observ/asset_installer.rb +130 -0
  201. data/lib/observ/asset_syncer.rb +104 -0
  202. data/lib/observ/configuration.rb +108 -0
  203. data/lib/observ/engine.rb +50 -0
  204. data/lib/observ/index_file_generator.rb +142 -0
  205. data/lib/observ/instrumenter/ruby_llm.rb +6 -0
  206. data/lib/observ/version.rb +3 -0
  207. data/lib/observ.rb +29 -0
  208. data/lib/tasks/observ_tasks.rake +75 -0
  209. metadata +453 -0
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Observ
4
+ class DatasetRunItem < ApplicationRecord
5
+ self.table_name = "observ_dataset_run_items"
6
+
7
+ belongs_to :dataset_run, class_name: "Observ::DatasetRun", inverse_of: :run_items
8
+ belongs_to :dataset_item, class_name: "Observ::DatasetItem", inverse_of: :run_items
9
+ belongs_to :trace, class_name: "Observ::Trace", optional: true
10
+ belongs_to :observation, class_name: "Observ::Observation", optional: true
11
+ has_many :scores, class_name: "Observ::Score",
12
+ foreign_key: :dataset_run_item_id, dependent: :destroy, inverse_of: :dataset_run_item
13
+
14
+ validates :dataset_run_id, uniqueness: { scope: :dataset_item_id }
15
+
16
+ # Status scopes
17
+ scope :succeeded, -> { where.not(trace_id: nil).where(error: nil) }
18
+ scope :failed, -> { where.not(error: nil) }
19
+ scope :pending, -> { where(trace_id: nil, error: nil) }
20
+
21
+ # Status helpers
22
+ def succeeded?
23
+ trace_id.present? && error.blank?
24
+ end
25
+
26
+ def failed?
27
+ error.present?
28
+ end
29
+
30
+ def pending?
31
+ trace_id.nil? && error.nil?
32
+ end
33
+
34
+ def status
35
+ return :failed if failed?
36
+ return :succeeded if succeeded?
37
+ :pending
38
+ end
39
+
40
+ # Access helpers
41
+ def input
42
+ dataset_item.input
43
+ end
44
+
45
+ def expected_output
46
+ dataset_item.expected_output
47
+ end
48
+
49
+ def actual_output
50
+ trace&.output
51
+ end
52
+
53
+ # Comparison helpers
54
+ def output_matches?
55
+ return nil if expected_output.blank? || actual_output.blank?
56
+
57
+ normalize_for_comparison(expected_output) == normalize_for_comparison(actual_output)
58
+ end
59
+
60
+ # Metrics from trace
61
+ def cost
62
+ trace&.total_cost
63
+ end
64
+
65
+ def tokens
66
+ trace&.total_tokens
67
+ end
68
+
69
+ def duration_ms
70
+ trace&.duration_ms
71
+ end
72
+
73
+ # Score helpers
74
+ def score_for(name, source: nil)
75
+ scope = scores.where(name: name)
76
+ scope = scope.where(source: source) if source
77
+ scope.order(created_at: :desc).first
78
+ end
79
+
80
+ def scored?
81
+ scores.any?
82
+ end
83
+
84
+ def passing_scores_count
85
+ scores.where("value >= 0.5").count
86
+ end
87
+
88
+ def failing_scores_count
89
+ scores.where("value < 0.5").count
90
+ end
91
+
92
+ private
93
+
94
+ # Normalize output for comparison by parsing JSON strings into comparable structures
95
+ def normalize_for_comparison(output)
96
+ case output
97
+ when Hash
98
+ output.deep_symbolize_keys
99
+ when String
100
+ begin
101
+ parsed = JSON.parse(output)
102
+ parsed.is_a?(Hash) ? parsed.deep_symbolize_keys : parsed
103
+ rescue JSON::ParserError
104
+ output.strip
105
+ end
106
+ else
107
+ output
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Observ
4
+ class Generation < Observation
5
+ def set_input(input, messages: nil)
6
+ update!(
7
+ input: input.is_a?(String) ? input : input.to_json,
8
+ messages: messages || self.messages
9
+ )
10
+ end
11
+
12
+ def set_messages(messages)
13
+ update!(messages: messages)
14
+ end
15
+
16
+ def set_tools(tools, tool_choice: nil)
17
+ update!(tools: tools, tool_choice: tool_choice)
18
+ end
19
+
20
+ def finalize(output:, usage: {}, cost_usd: 0.0, status_message: nil, finish_reason: nil,
21
+ completion_start_time: nil, provider_metadata: {}, messages: nil, raw_response: nil)
22
+ merged_usage = (self.usage || {}).merge(usage.stringify_keys)
23
+ merged_provider_metadata = (self.provider_metadata || {}).merge(provider_metadata.stringify_keys)
24
+
25
+ update!(
26
+ output: output.is_a?(String) ? output : output.to_json,
27
+ usage: merged_usage,
28
+ cost_usd: cost_usd,
29
+ finish_reason: finish_reason,
30
+ completion_start_time: completion_start_time,
31
+ provider_metadata: merged_provider_metadata,
32
+ messages: messages || self.messages,
33
+ raw_response: raw_response,
34
+ end_time: Time.current,
35
+ status_message: status_message
36
+ )
37
+ end
38
+
39
+ def time_to_first_token_ms
40
+ return nil unless completion_start_time && start_time
41
+ ((completion_start_time - start_time) * 1000).round(2)
42
+ end
43
+
44
+ def total_tokens
45
+ usage&.dig("total_tokens") || 0
46
+ end
47
+
48
+ def input_tokens
49
+ usage&.dig("input_tokens") || 0
50
+ end
51
+
52
+ def output_tokens
53
+ usage&.dig("output_tokens") || 0
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Observ
4
+ # Null Object pattern for Prompt
5
+ # Used when a prompt is not found, providing a fallback with the same interface
6
+ class NullPrompt
7
+ attr_reader :name, :prompt, :config
8
+
9
+ def initialize(name:, fallback_text:)
10
+ @name = name
11
+ @prompt = fallback_text
12
+ @config = {}
13
+ end
14
+
15
+ # Returns nil to indicate this is not a real prompt
16
+ def version
17
+ nil
18
+ end
19
+
20
+ # Returns the fallback text as-is (no variable compilation)
21
+ def compile(variables = {})
22
+ @prompt
23
+ end
24
+
25
+ # Null prompts are always in a "fallback" state
26
+ def state
27
+ "fallback"
28
+ end
29
+
30
+ def draft?
31
+ false
32
+ end
33
+
34
+ def production?
35
+ false
36
+ end
37
+
38
+ def archived?
39
+ false
40
+ end
41
+
42
+ def persisted?
43
+ false
44
+ end
45
+
46
+ def id
47
+ nil
48
+ end
49
+
50
+ # For logging/debugging
51
+ def to_s
52
+ "NullPrompt(#{name})"
53
+ end
54
+
55
+ def inspect
56
+ "#<Observ::NullPrompt name: #{name.inspect}, fallback: #{prompt[0..50].inspect}...>"
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Observ
4
+ class Observation < ApplicationRecord
5
+ self.table_name = "observ_observations"
6
+ self.inheritance_column = :type
7
+
8
+ belongs_to :trace, class_name: "Observ::Trace", foreign_key: :observ_trace_id, inverse_of: :observations
9
+
10
+ validates :observation_id, presence: true, uniqueness: true
11
+ validates :start_time, presence: true
12
+ validates :type, presence: true, inclusion: { in: %w[Observ::Generation Observ::Span] }
13
+
14
+ after_save :update_trace_metrics, if: :saved_change_to_cost_or_usage?
15
+
16
+ def finalize(status_message: nil)
17
+ update!(
18
+ end_time: Time.current,
19
+ status_message: status_message
20
+ )
21
+ end
22
+
23
+ def duration_ms
24
+ return nil unless end_time
25
+ ((end_time - start_time) * 1000).round(2)
26
+ end
27
+
28
+ private
29
+
30
+ def saved_change_to_cost_or_usage?
31
+ saved_change_to_cost_usd? || saved_change_to_usage?
32
+ end
33
+
34
+ def update_trace_metrics
35
+ trace&.update_aggregated_metrics
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,315 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mustache"
4
+
5
+ module Observ
6
+ class Prompt < ApplicationRecord
7
+ include AASM
8
+
9
+ self.table_name = "observ_prompts"
10
+
11
+ # ============================================
12
+ # VALIDATIONS
13
+ # ============================================
14
+ validates :name, presence: true
15
+ validates :prompt, presence: true
16
+ validates :version, presence: true, numericality: { only_integer: true, greater_than: 0 }
17
+ validates :state, presence: true, inclusion: { in: %w[draft production archived] }
18
+
19
+ # Only ONE production version per prompt name
20
+ validate :only_one_production_per_name, if: :production?
21
+
22
+ # Validate config format
23
+ validate :validate_config_format
24
+
25
+ # Prevent editing immutable prompts
26
+ before_update :ensure_editable!, if: :content_changed?
27
+
28
+ # Ensure config is always a Hash, not a String
29
+ before_validation :normalize_config
30
+
31
+ # ============================================
32
+ # SCOPES
33
+ # ============================================
34
+ scope :by_name, ->(name) { where(name: name) }
35
+ scope :latest_version, -> { order(version: :desc).limit(1) }
36
+
37
+ # ============================================
38
+ # AASM STATE MACHINE
39
+ # ============================================
40
+ aasm column: :state, after_commit: true do
41
+ state :draft, initial: true
42
+ state :production
43
+ state :archived
44
+
45
+ event :promote do
46
+ transitions from: :draft, to: :production, after: :demote_other_production_versions
47
+ end
48
+
49
+ event :demote do
50
+ transitions from: :production, to: :archived
51
+ end
52
+
53
+ event :restore do
54
+ transitions from: :archived, to: :production, after: :demote_other_production_versions
55
+ end
56
+
57
+ # Invalidate cache after any state transition
58
+ after_all_transitions :invalidate_cache_after_transition
59
+ end
60
+
61
+ # ============================================
62
+ # CALLBACKS
63
+ # ============================================
64
+
65
+ # Invalidate cache after updates or deletion
66
+ after_save :invalidate_cache_if_changed
67
+ after_destroy :invalidate_cache_on_destroy
68
+
69
+ # ============================================
70
+ # CLASS METHODS
71
+ # ============================================
72
+
73
+ # Fetch prompt by name, state, or version
74
+ def self.fetch(name:, version: nil, state: :production, fallback: nil)
75
+ state ||= Observ.config.prompt_default_state
76
+ cache_key = cache_key_for(name: name, version: version, state: state)
77
+ cache_ttl = Observ.config.prompt_cache_ttl
78
+
79
+ Rails.cache.fetch(cache_key, expires_in: cache_ttl) do
80
+ fetch_from_database(name: name, version: version, state: state, fallback: fallback)
81
+ end
82
+ end
83
+
84
+ def self.fetch_from_database(name:, version:, state:, fallback:)
85
+ query = where(name: name)
86
+
87
+ prompt = if version.present?
88
+ query.find_by(version: version)
89
+ else
90
+ query.public_send(state).first
91
+ end
92
+
93
+ return prompt if prompt
94
+ return fallback if fallback
95
+ raise PromptNotFoundError, "Prompt '#{name}' not found"
96
+ end
97
+
98
+ def self.cache_key_for(name:, version:, state:)
99
+ "observ:prompt:#{name}:#{version || state}"
100
+ end
101
+
102
+ def self.clear_cache(name:)
103
+ # Clear all cache keys for this prompt
104
+ [ :draft, :production, :archived ].each do |state|
105
+ Rails.cache.delete(cache_key_for(name: name, version: nil, state: state))
106
+ end
107
+ end
108
+
109
+ # Create new version (auto-increment)
110
+ def self.create_version(name:, prompt:, config: {}, commit_message: nil, created_by: nil, promote_to_production: false)
111
+ latest_version = where(name: name).maximum(:version) || 0
112
+ new_version = latest_version + 1
113
+
114
+ new_prompt = create!(
115
+ name: name,
116
+ prompt: prompt,
117
+ version: new_version,
118
+ config: config,
119
+ commit_message: commit_message,
120
+ created_by: created_by,
121
+ state: :draft
122
+ )
123
+
124
+ new_prompt.promote! if promote_to_production
125
+ new_prompt
126
+ end
127
+
128
+ # ============================================
129
+ # INSTANCE METHODS
130
+ # ============================================
131
+
132
+ # Compile prompt with Mustache templating
133
+ # Supports: variables {{name}}, loops {{#items}}...{{/items}},
134
+ # conditionals {{#flag}}...{{/flag}}, inverted sections {{^items}}...{{/items}}
135
+ def compile(variables = {})
136
+ Mustache.render(prompt, variables)
137
+ end
138
+
139
+ # Compile with validation (raises if missing top-level variables)
140
+ # Note: Variables inside sections (loops) are validated at render time by Mustache
141
+ def compile_with_validation(variables = {})
142
+ # Extract top-level variables (outside of sections)
143
+ # This is a simplified approach - we strip section content and check remaining vars
144
+ template_without_sections = strip_sections(prompt)
145
+
146
+ # Matches: {{name}}, {{user.name}} but not {{#section}}, {{/section}}, {{^section}}, {{!comment}}, {{>partial}}, {{{raw}}}
147
+ required_vars = template_without_sections.scan(/\{\{([^#\^\/!>\{\s][^}\s]*)\}\}/).flatten.uniq
148
+
149
+ # Check which variables are missing (convert all keys to strings for comparison)
150
+ provided_keys = variables.keys.map(&:to_s)
151
+ missing_vars = required_vars.reject do |var|
152
+ # Handle dot notation (e.g., "user.name" - check if "user" key exists)
153
+ root_key = var.split(".").first
154
+ provided_keys.include?(var) || provided_keys.include?(root_key)
155
+ end
156
+
157
+ if missing_vars.any?
158
+ raise VariableSubstitutionError, "Missing variables: #{missing_vars.join(', ')}"
159
+ end
160
+
161
+ compile(variables)
162
+ end
163
+
164
+ # Extract top-level variables from template (for validation purposes)
165
+ def required_variables
166
+ template_without_sections = strip_sections(prompt)
167
+ template_without_sections.scan(/\{\{([^#\^\/!>\{\s][^}\s]*)\}\}/).flatten.uniq
168
+ end
169
+
170
+ # Immutability checks
171
+ def editable?
172
+ draft?
173
+ end
174
+
175
+ def immutable?
176
+ production? || archived?
177
+ end
178
+
179
+ def can_delete?
180
+ draft? || archived?
181
+ end
182
+
183
+ # Clone to new draft version
184
+ def clone_to_draft
185
+ self.class.create_version(
186
+ name: name,
187
+ prompt: prompt,
188
+ config: config,
189
+ commit_message: "Cloned from v#{version} (#{state})",
190
+ created_by: nil
191
+ )
192
+ end
193
+
194
+ # Version navigation
195
+ def previous_version
196
+ self.class.where(name: name).where("version < ?", version).order(version: :desc).first
197
+ end
198
+
199
+ def next_version
200
+ self.class.where(name: name).where("version > ?", version).order(version: :asc).first
201
+ end
202
+
203
+ def latest_version
204
+ self.class.where(name: name).order(version: :desc).first
205
+ end
206
+
207
+ # Export
208
+ def to_json_export
209
+ as_json(except: [ :id, :created_at, :updated_at ])
210
+ end
211
+
212
+ def to_yaml_export
213
+ to_json_export.to_yaml
214
+ end
215
+
216
+ private
217
+
218
+ # ============================================
219
+ # VALIDATIONS
220
+ # ============================================
221
+
222
+ def only_one_production_per_name
223
+ existing_production = self.class.where(name: name, state: :production).where.not(id: id).exists?
224
+ if existing_production
225
+ errors.add(:state, "Only one production version allowed per prompt name")
226
+ end
227
+ end
228
+
229
+ def validate_config_format
230
+ return if config.blank?
231
+
232
+ validator = Observ::PromptConfigValidator.new(config)
233
+ unless validator.valid?
234
+ validator.errors.each do |error|
235
+ errors.add(:config, error)
236
+ end
237
+ end
238
+ end
239
+
240
+ def ensure_editable!
241
+ if immutable?
242
+ errors.add(:base, "Cannot edit #{state} prompt. Clone to draft first.")
243
+ raise ActiveRecord::RecordInvalid, self
244
+ end
245
+ end
246
+
247
+ def content_changed?
248
+ prompt_changed? || config_changed?
249
+ end
250
+
251
+ # ============================================
252
+ # CALLBACKS
253
+ # ============================================
254
+
255
+ def demote_other_production_versions
256
+ self.class.where(name: name, state: :production).where.not(id: id).update_all(state: :archived)
257
+ end
258
+
259
+ def invalidate_cache_after_transition
260
+ Observ::PromptManager.invalidate_cache(name: name)
261
+ Rails.logger.info("Cache invalidated after state transition for #{name} v#{version}")
262
+ end
263
+
264
+ def invalidate_cache_if_changed
265
+ return unless saved_change_to_prompt? || saved_change_to_config? || saved_change_to_state?
266
+
267
+ Observ::PromptManager.invalidate_cache(name: name)
268
+ end
269
+
270
+ def invalidate_cache_on_destroy
271
+ Observ::PromptManager.invalidate_cache(name: name)
272
+ end
273
+
274
+ def clear_prompt_cache
275
+ self.class.clear_cache(name: name)
276
+ end
277
+
278
+ def normalize_config
279
+ return if config.nil?
280
+
281
+ # If config is a String, parse it to a Hash
282
+ if config.is_a?(String)
283
+ self.config = begin
284
+ JSON.parse(config)
285
+ rescue JSON::ParserError
286
+ {} # Default to empty hash if parsing fails
287
+ end
288
+ end
289
+
290
+ # Ensure it's a Hash (could be other types in edge cases)
291
+ self.config = {} unless config.is_a?(Hash)
292
+ end
293
+
294
+ # Strip section content from template for top-level variable extraction
295
+ # Removes content between {{#section}}...{{/section}} and {{^section}}...{{/section}}
296
+ def strip_sections(template)
297
+ # Recursively strip nested sections
298
+ result = template.dup
299
+
300
+ # Match sections: {{#name}}...{{/name}} or {{^name}}...{{/name}}
301
+ # Use non-greedy matching and handle nesting by repeating until stable
302
+ loop do
303
+ previous = result
304
+ result = result.gsub(/\{\{[#\^](\w+)\}\}.*?\{\{\/\1\}\}/m, "")
305
+ break if result == previous
306
+ end
307
+
308
+ result
309
+ end
310
+ end
311
+
312
+ # Custom exceptions
313
+ class PromptNotFoundError < StandardError; end
314
+ class VariableSubstitutionError < StandardError; end
315
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Observ
4
+ class Score < ApplicationRecord
5
+ self.table_name = "observ_scores"
6
+
7
+ belongs_to :dataset_run_item, class_name: "Observ::DatasetRunItem", inverse_of: :scores
8
+ belongs_to :trace, class_name: "Observ::Trace"
9
+ belongs_to :observation, class_name: "Observ::Observation", optional: true
10
+
11
+ enum :data_type, { numeric: 0, boolean: 1, categorical: 2 }
12
+ enum :source, { programmatic: 0, manual: 1, llm_judge: 2 }
13
+
14
+ validates :name, presence: true
15
+ validates :value, presence: true, numericality: true
16
+ validates :dataset_run_item_id, uniqueness: { scope: [ :name, :source ], message: "already has a score with this name and source" }
17
+
18
+ # Delegations for convenience
19
+ delegate :dataset_run, to: :dataset_run_item
20
+ delegate :dataset_item, to: :dataset_run_item
21
+
22
+ # Boolean helpers
23
+ def passed?
24
+ value >= 0.5
25
+ end
26
+
27
+ def failed?
28
+ !passed?
29
+ end
30
+
31
+ # Display helpers
32
+ def display_value
33
+ case data_type
34
+ when "boolean"
35
+ passed? ? "Pass" : "Fail"
36
+ when "categorical"
37
+ string_value.presence || value.to_s
38
+ else
39
+ value.round(2).to_s
40
+ end
41
+ end
42
+
43
+ def badge_class
44
+ if boolean?
45
+ passed? ? "observ-badge--success" : "observ-badge--danger"
46
+ else
47
+ value >= 0.7 ? "observ-badge--success" : (value >= 0.4 ? "observ-badge--warning" : "observ-badge--danger")
48
+ end
49
+ end
50
+ end
51
+ end