rubyllm-observ 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (209) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +778 -0
  3. data/Rakefile +49 -0
  4. data/app/assets/javascripts/observ/application.js +12 -0
  5. data/app/assets/javascripts/observ/controllers/autoscroll_controller.js +33 -0
  6. data/app/assets/javascripts/observ/controllers/chat_form_controller.js +93 -0
  7. data/app/assets/javascripts/observ/controllers/copy_controller.js +43 -0
  8. data/app/assets/javascripts/observ/controllers/dashboard_controller.js +58 -0
  9. data/app/assets/javascripts/observ/controllers/drawer_controller.js +58 -0
  10. data/app/assets/javascripts/observ/controllers/expandable_controller.js +33 -0
  11. data/app/assets/javascripts/observ/controllers/filter_controller.js +36 -0
  12. data/app/assets/javascripts/observ/controllers/index.js +52 -0
  13. data/app/assets/javascripts/observ/controllers/json_viewer_controller.js +260 -0
  14. data/app/assets/javascripts/observ/controllers/message_form_controller.js +58 -0
  15. data/app/assets/javascripts/observ/controllers/prompt_variables_controller.js +64 -0
  16. data/app/assets/javascripts/observ/controllers/text_select_controller.js +14 -0
  17. data/app/assets/stylesheets/observ/_annotations.scss +127 -0
  18. data/app/assets/stylesheets/observ/_card.scss +52 -0
  19. data/app/assets/stylesheets/observ/_chat.scss +156 -0
  20. data/app/assets/stylesheets/observ/_components.scss +460 -0
  21. data/app/assets/stylesheets/observ/_dashboard.scss +40 -0
  22. data/app/assets/stylesheets/observ/_datasets.scss +697 -0
  23. data/app/assets/stylesheets/observ/_drawer.scss +273 -0
  24. data/app/assets/stylesheets/observ/_json_viewer.scss +120 -0
  25. data/app/assets/stylesheets/observ/_layout.scss +256 -0
  26. data/app/assets/stylesheets/observ/_metrics.scss +99 -0
  27. data/app/assets/stylesheets/observ/_observations.scss +160 -0
  28. data/app/assets/stylesheets/observ/_pagination.scss +143 -0
  29. data/app/assets/stylesheets/observ/_prompts.scss +365 -0
  30. data/app/assets/stylesheets/observ/_table.scss +53 -0
  31. data/app/assets/stylesheets/observ/_variables.scss +53 -0
  32. data/app/assets/stylesheets/observ/application.scss +15 -0
  33. data/app/controllers/observ/annotations_controller.rb +144 -0
  34. data/app/controllers/observ/application_controller.rb +8 -0
  35. data/app/controllers/observ/chats_controller.rb +58 -0
  36. data/app/controllers/observ/dashboard_controller.rb +159 -0
  37. data/app/controllers/observ/dataset_items_controller.rb +85 -0
  38. data/app/controllers/observ/dataset_run_items_controller.rb +84 -0
  39. data/app/controllers/observ/dataset_runs_controller.rb +110 -0
  40. data/app/controllers/observ/datasets_controller.rb +74 -0
  41. data/app/controllers/observ/messages_controller.rb +26 -0
  42. data/app/controllers/observ/observations_controller.rb +59 -0
  43. data/app/controllers/observ/prompt_versions_controller.rb +148 -0
  44. data/app/controllers/observ/prompts_controller.rb +205 -0
  45. data/app/controllers/observ/sessions_controller.rb +45 -0
  46. data/app/controllers/observ/traces_controller.rb +86 -0
  47. data/app/forms/observ/prompt_form.rb +96 -0
  48. data/app/helpers/observ/application_helper.rb +9 -0
  49. data/app/helpers/observ/chats_helper.rb +47 -0
  50. data/app/helpers/observ/dashboard_helper.rb +154 -0
  51. data/app/helpers/observ/datasets_helper.rb +62 -0
  52. data/app/helpers/observ/pagination_helper.rb +38 -0
  53. data/app/jobs/observ/application_job.rb +4 -0
  54. data/app/jobs/observ/dataset_runner_job.rb +49 -0
  55. data/app/mailers/observ/application_mailer.rb +6 -0
  56. data/app/models/concerns/observ/agent_phaseable.rb +124 -0
  57. data/app/models/concerns/observ/agent_selectable.rb +50 -0
  58. data/app/models/concerns/observ/chat_enhancements.rb +109 -0
  59. data/app/models/concerns/observ/message_enhancements.rb +31 -0
  60. data/app/models/concerns/observ/observability_instrumentation.rb +124 -0
  61. data/app/models/concerns/observ/prompt_management.rb +320 -0
  62. data/app/models/concerns/observ/trace_association.rb +9 -0
  63. data/app/models/observ/annotation.rb +23 -0
  64. data/app/models/observ/application_record.rb +5 -0
  65. data/app/models/observ/dataset.rb +51 -0
  66. data/app/models/observ/dataset_item.rb +41 -0
  67. data/app/models/observ/dataset_run.rb +104 -0
  68. data/app/models/observ/dataset_run_item.rb +111 -0
  69. data/app/models/observ/generation.rb +56 -0
  70. data/app/models/observ/null_prompt.rb +59 -0
  71. data/app/models/observ/observation.rb +38 -0
  72. data/app/models/observ/prompt.rb +315 -0
  73. data/app/models/observ/score.rb +51 -0
  74. data/app/models/observ/session.rb +131 -0
  75. data/app/models/observ/span.rb +13 -0
  76. data/app/models/observ/trace.rb +135 -0
  77. data/app/presenters/observ/agent_select_presenter.rb +59 -0
  78. data/app/services/observ/agent_executor_service.rb +174 -0
  79. data/app/services/observ/agent_provider.rb +60 -0
  80. data/app/services/observ/agent_selection_service.rb +53 -0
  81. data/app/services/observ/chat_instrumenter.rb +523 -0
  82. data/app/services/observ/dataset_runner_service.rb +153 -0
  83. data/app/services/observ/evaluator_runner_service.rb +58 -0
  84. data/app/services/observ/evaluators/base_evaluator.rb +51 -0
  85. data/app/services/observ/evaluators/contains_evaluator.rb +53 -0
  86. data/app/services/observ/evaluators/exact_match_evaluator.rb +23 -0
  87. data/app/services/observ/evaluators/json_structure_evaluator.rb +44 -0
  88. data/app/services/observ/prompt_manager/cache_statistics.rb +82 -0
  89. data/app/services/observ/prompt_manager/caching.rb +167 -0
  90. data/app/services/observ/prompt_manager/comparison.rb +49 -0
  91. data/app/services/observ/prompt_manager/version_management.rb +96 -0
  92. data/app/services/observ/prompt_manager.rb +40 -0
  93. data/app/services/observ/trace_text_formatter.rb +349 -0
  94. data/app/validators/observ/prompt_config_validator.rb +187 -0
  95. data/app/views/kaminari/_first_page.html.erb +11 -0
  96. data/app/views/kaminari/_gap.html.erb +8 -0
  97. data/app/views/kaminari/_last_page.html.erb +11 -0
  98. data/app/views/kaminari/_next_page.html.erb +11 -0
  99. data/app/views/kaminari/_page.html.erb +12 -0
  100. data/app/views/kaminari/_paginator.html.erb +25 -0
  101. data/app/views/kaminari/_prev_page.html.erb +11 -0
  102. data/app/views/kaminari/observ/_first_page.html.erb +11 -0
  103. data/app/views/kaminari/observ/_gap.html.erb +8 -0
  104. data/app/views/kaminari/observ/_last_page.html.erb +11 -0
  105. data/app/views/kaminari/observ/_next_page.html.erb +11 -0
  106. data/app/views/kaminari/observ/_page.html.erb +12 -0
  107. data/app/views/kaminari/observ/_paginator.html.erb +25 -0
  108. data/app/views/kaminari/observ/_prev_page.html.erb +11 -0
  109. data/app/views/layouts/observ/application.html.erb +88 -0
  110. data/app/views/observ/annotations/_annotation.html.erb +13 -0
  111. data/app/views/observ/annotations/_form.html.erb +28 -0
  112. data/app/views/observ/annotations/index.html.erb +28 -0
  113. data/app/views/observ/annotations/sessions_index.html.erb +48 -0
  114. data/app/views/observ/annotations/traces_index.html.erb +48 -0
  115. data/app/views/observ/chats/_form.html.erb +45 -0
  116. data/app/views/observ/chats/index.html.erb +67 -0
  117. data/app/views/observ/chats/new.html.erb +17 -0
  118. data/app/views/observ/chats/show.html.erb +34 -0
  119. data/app/views/observ/dashboard/index.html.erb +236 -0
  120. data/app/views/observ/dataset_items/_form.html.erb +49 -0
  121. data/app/views/observ/dataset_items/edit.html.erb +18 -0
  122. data/app/views/observ/dataset_items/index.html.erb +95 -0
  123. data/app/views/observ/dataset_items/new.html.erb +18 -0
  124. data/app/views/observ/dataset_run_items/_score_close_drawer.html.erb +4 -0
  125. data/app/views/observ/dataset_run_items/_score_drawer.html.erb +75 -0
  126. data/app/views/observ/dataset_run_items/_score_success.html.erb +29 -0
  127. data/app/views/observ/dataset_run_items/_scores_cell.html.erb +19 -0
  128. data/app/views/observ/dataset_run_items/details_drawer.turbo_stream.erb +80 -0
  129. data/app/views/observ/dataset_run_items/score_drawer.turbo_stream.erb +7 -0
  130. data/app/views/observ/dataset_runs/index.html.erb +108 -0
  131. data/app/views/observ/dataset_runs/new.html.erb +57 -0
  132. data/app/views/observ/dataset_runs/review.html.erb +155 -0
  133. data/app/views/observ/dataset_runs/show.html.erb +166 -0
  134. data/app/views/observ/datasets/_form.html.erb +62 -0
  135. data/app/views/observ/datasets/_items_tab.html.erb +66 -0
  136. data/app/views/observ/datasets/_runs_tab.html.erb +82 -0
  137. data/app/views/observ/datasets/edit.html.erb +32 -0
  138. data/app/views/observ/datasets/index.html.erb +105 -0
  139. data/app/views/observ/datasets/new.html.erb +18 -0
  140. data/app/views/observ/datasets/show.html.erb +67 -0
  141. data/app/views/observ/messages/_content.html.erb +1 -0
  142. data/app/views/observ/messages/_form.html.erb +33 -0
  143. data/app/views/observ/messages/_message.html.erb +14 -0
  144. data/app/views/observ/messages/_tool_calls.html.erb +10 -0
  145. data/app/views/observ/messages/create.turbo_stream.erb +9 -0
  146. data/app/views/observ/observations/index.html.erb +97 -0
  147. data/app/views/observ/observations/show_generation.html.erb +195 -0
  148. data/app/views/observ/observations/show_span.html.erb +93 -0
  149. data/app/views/observ/prompts/_diff_content.html.erb +16 -0
  150. data/app/views/observ/prompts/_form.html.erb +111 -0
  151. data/app/views/observ/prompts/_new_form.html.erb +102 -0
  152. data/app/views/observ/prompts/_prompt_actions.html.erb +4 -0
  153. data/app/views/observ/prompts/_prompt_content_highlighted.html.erb +4 -0
  154. data/app/views/observ/prompts/_version_actions.html.erb +40 -0
  155. data/app/views/observ/prompts/compare.html.erb +155 -0
  156. data/app/views/observ/prompts/edit.html.erb +17 -0
  157. data/app/views/observ/prompts/index.html.erb +108 -0
  158. data/app/views/observ/prompts/new.html.erb +17 -0
  159. data/app/views/observ/prompts/show.html.erb +138 -0
  160. data/app/views/observ/prompts/versions.html.erb +87 -0
  161. data/app/views/observ/sessions/annotations_drawer.turbo_stream.erb +25 -0
  162. data/app/views/observ/sessions/drawer_test.turbo_stream.erb +49 -0
  163. data/app/views/observ/sessions/index.html.erb +91 -0
  164. data/app/views/observ/sessions/show.html.erb +251 -0
  165. data/app/views/observ/traces/add_to_dataset_drawer.turbo_stream.erb +48 -0
  166. data/app/views/observ/traces/annotations_drawer.turbo_stream.erb +25 -0
  167. data/app/views/observ/traces/index.html.erb +87 -0
  168. data/app/views/observ/traces/show.html.erb +285 -0
  169. data/app/views/observ/traces/text_output_drawer.turbo_stream.erb +48 -0
  170. data/app/views/shared/_drawer.html.erb +26 -0
  171. data/config/routes.rb +80 -0
  172. data/db/migrate/001_create_observ_sessions.rb +21 -0
  173. data/db/migrate/002_create_observ_traces.rb +25 -0
  174. data/db/migrate/003_create_observ_observations.rb +42 -0
  175. data/db/migrate/004_add_message_id_to_observ_traces.rb +7 -0
  176. data/db/migrate/005_create_observ_prompts.rb +21 -0
  177. data/db/migrate/006_fix_prompt_config_strings.rb +23 -0
  178. data/db/migrate/007_create_observ_annotations.rb +12 -0
  179. data/db/migrate/009_add_prompt_fields_to_observ_chats.rb +11 -0
  180. data/db/migrate/010_create_observ_datasets.rb +15 -0
  181. data/db/migrate/011_create_observ_dataset_items.rb +17 -0
  182. data/db/migrate/012_create_observ_dataset_runs.rb +22 -0
  183. data/db/migrate/013_create_observ_dataset_run_items.rb +16 -0
  184. data/db/migrate/014_create_observ_scores.rb +26 -0
  185. data/lib/generators/observ/add_phase_tracking/add_phase_tracking_generator.rb +150 -0
  186. data/lib/generators/observ/add_phase_tracking/templates/migration.rb.tt +6 -0
  187. data/lib/generators/observ/install/USAGE +27 -0
  188. data/lib/generators/observ/install/install_generator.rb +270 -0
  189. data/lib/generators/observ/install_chat/install_chat_generator.rb +313 -0
  190. data/lib/generators/observ/install_chat/templates/agents/base_agent.rb.tt +147 -0
  191. data/lib/generators/observ/install_chat/templates/agents/simple_agent.rb.tt +55 -0
  192. data/lib/generators/observ/install_chat/templates/concerns/observ_chat_enhancements.rb.tt +34 -0
  193. data/lib/generators/observ/install_chat/templates/concerns/observ_message_enhancements.rb.tt +18 -0
  194. data/lib/generators/observ/install_chat/templates/initializers/observability.rb.tt +20 -0
  195. data/lib/generators/observ/install_chat/templates/jobs/chat_response_job.rb.tt +56 -0
  196. data/lib/generators/observ/install_chat/templates/migrations/add_agent_class_name.rb.tt +6 -0
  197. data/lib/generators/observ/install_chat/templates/migrations/add_observability_session_id.rb.tt +6 -0
  198. data/lib/generators/observ/install_chat/templates/tools/think_tool.rb.tt +29 -0
  199. data/lib/generators/observ/install_chat/templates/views/messages/_content.html.erb.tt +1 -0
  200. data/lib/observ/asset_installer.rb +130 -0
  201. data/lib/observ/asset_syncer.rb +104 -0
  202. data/lib/observ/configuration.rb +108 -0
  203. data/lib/observ/engine.rb +50 -0
  204. data/lib/observ/index_file_generator.rb +142 -0
  205. data/lib/observ/instrumenter/ruby_llm.rb +6 -0
  206. data/lib/observ/version.rb +3 -0
  207. data/lib/observ.rb +29 -0
  208. data/lib/tasks/observ_tasks.rake +75 -0
  209. metadata +453 -0
@@ -0,0 +1,159 @@
1
+ module Observ
2
+ class DashboardController < ApplicationController
3
+ def index
4
+ @time_period = params[:period] || "24h"
5
+ @metrics = calculate_dashboard_metrics
6
+ @recent_sessions = Observ::Session.order(start_time: :desc).limit(10)
7
+ @cost_by_model = calculate_cost_by_model
8
+ @token_usage_over_time = calculate_token_usage_over_time
9
+ @metrics_by_agent = calculate_metrics_by_agent
10
+ end
11
+
12
+ def metrics
13
+ render json: calculate_dashboard_metrics
14
+ end
15
+
16
+ def cost_analysis
17
+ render json: {
18
+ by_model: calculate_cost_by_model,
19
+ over_time: calculate_cost_over_time
20
+ }
21
+ end
22
+
23
+ private
24
+
25
+ def calculate_dashboard_metrics
26
+ time_range = time_range_from_period(@time_period)
27
+ sessions = Observ::Session.where("start_time >= ?", time_range)
28
+
29
+ period_duration = Time.current - time_range
30
+ previous_time_range = time_range - period_duration
31
+ previous_sessions = Observ::Session.where("start_time >= ? AND start_time < ?", previous_time_range, time_range)
32
+
33
+ current_metrics = {
34
+ total_sessions: sessions.count,
35
+ total_traces: Observ::Trace.joins(:observ_session).where("observ_sessions.start_time >= ?", time_range).count,
36
+ total_llm_calls: Observ::Generation.joins(trace: :observ_session).where("observ_sessions.start_time >= ?", time_range).count,
37
+ total_tokens: Observ::Trace.joins(:observ_session).where("observ_sessions.start_time >= ?", time_range).sum(:total_tokens),
38
+ total_cost: Observ::Trace.joins(:observ_session).where("observ_sessions.start_time >= ?", time_range).sum(:total_cost).to_f,
39
+ avg_latency_ms: calculate_average_llm_latency(time_range),
40
+ success_rate: calculate_success_rate(time_range),
41
+ avg_cost_per_call: calculate_avg_cost_per_call(sessions)
42
+ }
43
+
44
+ previous_metrics = {
45
+ total_sessions: previous_sessions.count,
46
+ total_tokens: Observ::Trace.joins(:observ_session).where("observ_sessions.start_time >= ? AND observ_sessions.start_time < ?", previous_time_range, time_range).sum(:total_tokens),
47
+ total_cost: Observ::Trace.joins(:observ_session).where("observ_sessions.start_time >= ? AND observ_sessions.start_time < ?", previous_time_range, time_range).sum(:total_cost).to_f
48
+ }
49
+
50
+ current_metrics.merge(
51
+ trends: calculate_trends(current_metrics, previous_metrics)
52
+ )
53
+ end
54
+
55
+ def calculate_cost_by_model
56
+ Observ::Generation
57
+ .where("created_at >= ?", time_range_from_period(@time_period))
58
+ .group(:model)
59
+ .sum(:cost_usd)
60
+ .transform_values(&:to_f)
61
+ end
62
+
63
+ def calculate_token_usage_over_time
64
+ Observ::Session
65
+ .where("start_time >= ?", time_range_from_period(@time_period))
66
+ .group("DATE(start_time)")
67
+ .sum(:total_tokens)
68
+ end
69
+
70
+ def calculate_cost_over_time
71
+ Observ::Session
72
+ .where("start_time >= ?", time_range_from_period(@time_period))
73
+ .group("DATE(start_time)")
74
+ .sum(:total_cost)
75
+ .transform_values(&:to_f)
76
+ end
77
+
78
+ def calculate_average_llm_latency(time_range)
79
+ generations = Observ::Generation
80
+ .joins(trace: :observ_session)
81
+ .where("observ_sessions.start_time >= ?", time_range)
82
+ .where.not(end_time: nil)
83
+
84
+ return 0 if generations.empty?
85
+
86
+ total_duration = generations.sum { |g| ((g.end_time - g.start_time) * 1000).round(2) }
87
+ (total_duration / generations.count).round(0)
88
+ end
89
+
90
+ def calculate_success_rate(time_range)
91
+ total = Observ::Generation
92
+ .joins(trace: :observ_session)
93
+ .where("observ_sessions.start_time >= ?", time_range)
94
+ .count
95
+
96
+ return 100.0 if total.zero?
97
+
98
+ failed = Observ::Generation
99
+ .joins(trace: :observ_session)
100
+ .where("observ_sessions.start_time >= ?", time_range)
101
+ .where.not(status_message: nil)
102
+ .count
103
+
104
+ (((total - failed).to_f / total) * 100).round(1)
105
+ end
106
+
107
+ def calculate_avg_cost_per_call(sessions)
108
+ time_range = time_range_from_period(@time_period)
109
+ total_cost = Observ::Trace.joins(:observ_session).where("observ_sessions.start_time >= ?", time_range).sum(:total_cost).to_f
110
+ total_calls = Observ::Generation.joins(trace: :observ_session).where("observ_sessions.start_time >= ?", time_range).count
111
+
112
+ return 0.0 if total_calls.zero?
113
+
114
+ (total_cost / total_calls).round(6)
115
+ end
116
+
117
+ def calculate_trends(current, previous)
118
+ {
119
+ sessions: calculate_percentage_change(current[:total_sessions], previous[:total_sessions]),
120
+ tokens: calculate_percentage_change(current[:total_tokens], previous[:total_tokens]),
121
+ cost: calculate_percentage_change(current[:total_cost], previous[:total_cost])
122
+ }
123
+ end
124
+
125
+ def calculate_percentage_change(current, previous)
126
+ return 0 if previous.zero?
127
+ (((current - previous).to_f / previous) * 100).round(1)
128
+ end
129
+
130
+ def calculate_metrics_by_agent
131
+ time_range = time_range_from_period(@time_period)
132
+ sessions = Observ::Session.where("start_time >= ?", time_range)
133
+
134
+ sessions.group_by { |s| s.metadata&.dig("agent_type") || "Unknown" }.map do |agent_type, agent_sessions|
135
+ session_ids = agent_sessions.map(&:id)
136
+ traces = Observ::Trace.where(observ_session_id: session_ids)
137
+ generations = Observ::Generation.joins(:trace).where(observ_traces: { observ_session_id: session_ids })
138
+
139
+ {
140
+ agent_type: agent_type,
141
+ sessions: agent_sessions.count,
142
+ traces: traces.count,
143
+ llm_calls: generations.count,
144
+ tokens: traces.sum(:total_tokens),
145
+ cost: traces.sum(:total_cost).to_f
146
+ }
147
+ end.sort_by { |m| -m[:cost] }
148
+ end
149
+
150
+ def time_range_from_period(period)
151
+ case period
152
+ when "24h" then 24.hours.ago
153
+ when "7d" then 7.days.ago
154
+ when "30d" then 30.days.ago
155
+ else 100.years.ago
156
+ end
157
+ end
158
+ end
159
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Observ
4
+ class DatasetItemsController < ApplicationController
5
+ before_action :set_dataset
6
+ before_action :set_item, only: [ :edit, :update, :destroy ]
7
+
8
+ def index
9
+ @items = @dataset.items.order(created_at: :desc)
10
+
11
+ if params[:status].present?
12
+ @items = @items.where(status: params[:status])
13
+ end
14
+
15
+ @items = @items.page(params[:page]).per(Observ.config.pagination_per_page)
16
+ end
17
+
18
+ def new
19
+ @item = @dataset.items.build
20
+ end
21
+
22
+ def create
23
+ @item = @dataset.items.build(item_params)
24
+
25
+ if @item.save
26
+ redirect_to dataset_path(@dataset, tab: "items"),
27
+ notice: "Item added to dataset successfully."
28
+ else
29
+ render :new, status: :unprocessable_entity
30
+ end
31
+ end
32
+
33
+ def edit
34
+ end
35
+
36
+ def update
37
+ if @item.update(item_params)
38
+ redirect_to dataset_path(@dataset, tab: "items"),
39
+ notice: "Item updated successfully."
40
+ else
41
+ render :edit, status: :unprocessable_entity
42
+ end
43
+ end
44
+
45
+ def destroy
46
+ @item.destroy
47
+ redirect_to dataset_path(@dataset, tab: "items"),
48
+ notice: "Item removed from dataset."
49
+ end
50
+
51
+ private
52
+
53
+ def set_dataset
54
+ @dataset = Observ::Dataset.find(params[:dataset_id])
55
+ end
56
+
57
+ def set_item
58
+ @item = @dataset.items.find(params[:id])
59
+ end
60
+
61
+ def item_params
62
+ permitted = params.require(:observ_dataset_item).permit(:status, :expected_output_text)
63
+
64
+ # Handle input as JSON text
65
+ if params[:observ_dataset_item][:input_text].present?
66
+ permitted[:input] = parse_json_field(params[:observ_dataset_item][:input_text])
67
+ end
68
+
69
+ # Handle expected_output as JSON text
70
+ if params[:observ_dataset_item][:expected_output_text].present?
71
+ permitted[:expected_output] = parse_json_field(params[:observ_dataset_item][:expected_output_text])
72
+ end
73
+
74
+ permitted.except(:expected_output_text)
75
+ end
76
+
77
+ def parse_json_field(text)
78
+ return text if text.blank?
79
+ JSON.parse(text)
80
+ rescue JSON::ParserError
81
+ # If it's not valid JSON, treat it as a plain string
82
+ text
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Observ
4
+ class DatasetRunItemsController < ApplicationController
5
+ before_action :set_dataset
6
+ before_action :set_run
7
+ before_action :set_run_item
8
+
9
+ def details_drawer
10
+ # The drawer template will use @run_item
11
+ end
12
+
13
+ def score_drawer
14
+ # Renders the score drawer partial
15
+ end
16
+
17
+ def score
18
+ value = params[:value].to_i == 1 ? 1.0 : 0.0
19
+
20
+ score = @run_item.scores.find_or_initialize_by(name: "manual", source: :manual)
21
+ score.assign_attributes(
22
+ trace: @run_item.trace,
23
+ value: value,
24
+ data_type: :boolean,
25
+ comment: params[:comment],
26
+ created_by: params[:created_by]
27
+ )
28
+
29
+ if score.save
30
+ respond_to do |format|
31
+ format.turbo_stream do
32
+ render turbo_stream: [
33
+ turbo_stream.replace("run-item-#{@run_item.id}-scores",
34
+ partial: "observ/dataset_run_items/scores_cell",
35
+ locals: { run_item: @run_item }),
36
+ turbo_stream.update("drawer-content",
37
+ partial: "observ/dataset_run_items/score_close_drawer")
38
+ ]
39
+ end
40
+ format.html do
41
+ if params[:review_mode].present?
42
+ redirect_to review_dataset_run_path(@dataset, @run),
43
+ notice: "Score saved!"
44
+ else
45
+ redirect_to dataset_run_path(@dataset, @run)
46
+ end
47
+ end
48
+ end
49
+ else
50
+ respond_to do |format|
51
+ format.turbo_stream do
52
+ render turbo_stream: turbo_stream.replace(
53
+ "drawer-content",
54
+ partial: "observ/dataset_run_items/score_drawer",
55
+ locals: { run_item: @run_item, error: score.errors.full_messages.join(", ") }
56
+ )
57
+ end
58
+ format.html do
59
+ if params[:review_mode].present?
60
+ redirect_to review_dataset_run_path(@dataset, @run),
61
+ alert: "Failed to save score: #{score.errors.full_messages.join(', ')}"
62
+ else
63
+ redirect_to dataset_run_path(@dataset, @run), alert: "Failed to save score."
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
69
+
70
+ private
71
+
72
+ def set_dataset
73
+ @dataset = Observ::Dataset.find(params[:dataset_id])
74
+ end
75
+
76
+ def set_run
77
+ @run = @dataset.runs.find(params[:run_id])
78
+ end
79
+
80
+ def set_run_item
81
+ @run_item = @run.run_items.find(params[:id])
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Observ
4
+ class DatasetRunsController < ApplicationController
5
+ before_action :set_dataset
6
+ before_action :set_run, only: [ :show, :destroy, :run_evaluators, :review ]
7
+
8
+ def index
9
+ @runs = @dataset.runs.order(created_at: :desc)
10
+
11
+ if params[:status].present?
12
+ @runs = @runs.where(status: params[:status])
13
+ end
14
+
15
+ @runs = @runs.page(params[:page]).per(Observ.config.pagination_per_page)
16
+ end
17
+
18
+ def show
19
+ @run_items = @run.run_items
20
+ .includes(:dataset_item, :trace)
21
+ .order(created_at: :asc)
22
+ .page(params[:page])
23
+ .per(Observ.config.pagination_per_page)
24
+ end
25
+
26
+ def new
27
+ @run = @dataset.runs.build
28
+ end
29
+
30
+ def create
31
+ @run = @dataset.runs.build(run_params)
32
+
33
+ if @run.save
34
+ # Initialize run items for all active dataset items
35
+ @run.initialize_run_items!
36
+
37
+ # Queue the run for async execution
38
+ Observ::DatasetRunnerJob.perform_later(@run.id)
39
+
40
+ redirect_to dataset_run_path(@dataset, @run),
41
+ notice: "Run '#{@run.name}' created with #{@run.total_items} items. Execution will begin shortly."
42
+ else
43
+ render :new, status: :unprocessable_entity
44
+ end
45
+ end
46
+
47
+ def destroy
48
+ name = @run.name
49
+ @run.destroy
50
+ redirect_to dataset_path(@dataset, tab: "runs"),
51
+ notice: "Run '#{name}' deleted successfully."
52
+ end
53
+
54
+ def run_evaluators
55
+ evaluator_configs = @dataset.metadata&.dig("evaluators") || [ { "type" => "exact_match" } ]
56
+ Observ::EvaluatorRunnerService.new(@run, evaluator_configs: evaluator_configs).call
57
+
58
+ redirect_to dataset_run_path(@dataset, @run),
59
+ notice: "Evaluators completed. #{@run.items_with_scores_count} items scored."
60
+ end
61
+
62
+ def review
63
+ @run_item = next_item_to_review(@run)
64
+
65
+ if @run_item.nil?
66
+ redirect_to dataset_run_path(@dataset, @run),
67
+ notice: "All items have been reviewed!"
68
+ return
69
+ end
70
+
71
+ @progress = review_progress(@run)
72
+ @existing_manual = @run_item.score_for("manual", source: :manual)
73
+ end
74
+
75
+ private
76
+
77
+ def next_item_to_review(run, after_item: nil)
78
+ items = run.run_items.succeeded.includes(:dataset_item, :scores).order(:id)
79
+
80
+ if after_item
81
+ items = items.where("id > ?", after_item.id)
82
+ end
83
+
84
+ # Find first item without a manual score
85
+ items.find { |item| item.score_for("manual", source: :manual).nil? } ||
86
+ # If all scored after current, wrap around to find any unscored
87
+ (after_item ? run.run_items.succeeded.includes(:dataset_item, :scores).order(:id).find { |item| item.score_for("manual", source: :manual).nil? } : nil)
88
+ end
89
+
90
+ def review_progress(run)
91
+ succeeded_items = run.run_items.succeeded
92
+ total = succeeded_items.count
93
+ scored = succeeded_items.joins(:scores).where(observ_scores: { name: "manual", source: :manual }).distinct.count
94
+ { scored: scored, total: total }
95
+ end
96
+
97
+ def set_dataset
98
+ @dataset = Observ::Dataset.find(params[:dataset_id])
99
+ end
100
+
101
+ def set_run
102
+ @run = @dataset.runs.find(params[:id])
103
+ end
104
+
105
+ def run_params
106
+ # form_with generates param key based on model class name without module prefix
107
+ params.require(:dataset_run).permit(:name, :description)
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Observ
4
+ class DatasetsController < ApplicationController
5
+ before_action :set_dataset, only: [ :show, :edit, :update, :destroy ]
6
+
7
+ def index
8
+ @datasets = Observ::Dataset.order(updated_at: :desc)
9
+
10
+ if params[:search].present?
11
+ @datasets = @datasets.where("name LIKE ?", "%#{params[:search]}%")
12
+ end
13
+
14
+ @datasets = @datasets.page(params[:page]).per(Observ.config.pagination_per_page)
15
+ end
16
+
17
+ def show
18
+ @items = @dataset.items.order(created_at: :desc).page(params[:items_page]).per(10)
19
+ @runs = @dataset.runs.order(created_at: :desc).page(params[:runs_page]).per(10)
20
+ @active_tab = params[:tab] || "items"
21
+ end
22
+
23
+ def new
24
+ @dataset = Observ::Dataset.new
25
+ @agents = available_agents
26
+ end
27
+
28
+ def create
29
+ @dataset = Observ::Dataset.new(dataset_params)
30
+
31
+ if @dataset.save
32
+ redirect_to dataset_path(@dataset), notice: "Dataset '#{@dataset.name}' created successfully."
33
+ else
34
+ @agents = available_agents
35
+ render :new, status: :unprocessable_entity
36
+ end
37
+ end
38
+
39
+ def edit
40
+ @agents = available_agents
41
+ end
42
+
43
+ def update
44
+ if @dataset.update(dataset_params)
45
+ redirect_to dataset_path(@dataset), notice: "Dataset '#{@dataset.name}' updated successfully."
46
+ else
47
+ @agents = available_agents
48
+ render :edit, status: :unprocessable_entity
49
+ end
50
+ end
51
+
52
+ def destroy
53
+ name = @dataset.name
54
+ @dataset.destroy
55
+ redirect_to datasets_path, notice: "Dataset '#{name}' deleted successfully."
56
+ end
57
+
58
+ private
59
+
60
+ def set_dataset
61
+ @dataset = Observ::Dataset.find(params[:id])
62
+ end
63
+
64
+ def dataset_params
65
+ params.require(:observ_dataset).permit(:name, :description, :agent_class)
66
+ end
67
+
68
+ def available_agents
69
+ Observ::AgentProvider.all_agents.map do |agent|
70
+ [ agent.display_name, agent.name ]
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,26 @@
1
+ module Observ
2
+ class MessagesController < ApplicationController
3
+ before_action :set_chat
4
+
5
+ def create
6
+ return unless content.present?
7
+
8
+ ChatResponseJob.perform_later(@chat.id, content)
9
+
10
+ respond_to do |format|
11
+ format.turbo_stream
12
+ format.html { redirect_to chat_path(@chat) }
13
+ end
14
+ end
15
+
16
+ private
17
+
18
+ def set_chat
19
+ @chat = Chat.find(params[:chat_id])
20
+ end
21
+
22
+ def content
23
+ params[:message][:content]
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,59 @@
1
+ module Observ
2
+ class ObservationsController < ApplicationController
3
+ def index
4
+ @observations = Observ::Observation
5
+ .includes(:trace)
6
+ .order(start_time: :desc)
7
+ .page(params[:page])
8
+ .per(Observ.config.pagination_per_page)
9
+
10
+ apply_filters if params[:filter].present?
11
+ end
12
+
13
+ def show
14
+ @observation = Observ::Observation.includes(:trace).find(params[:id])
15
+
16
+ if @observation.is_a?(Observ::Generation)
17
+ render :show_generation
18
+ else
19
+ render :show_span
20
+ end
21
+ end
22
+
23
+ def generations
24
+ @observations = Observ::Generation
25
+ .includes(:trace)
26
+ .order(start_time: :desc)
27
+ .page(params[:page])
28
+ .per(Observ.config.pagination_per_page)
29
+
30
+ render :index
31
+ end
32
+
33
+ def spans
34
+ @observations = Observ::Span
35
+ .includes(:trace)
36
+ .order(start_time: :desc)
37
+ .page(params[:page])
38
+ .per(Observ.config.pagination_per_page)
39
+
40
+ render :index
41
+ end
42
+
43
+ private
44
+
45
+ def apply_filters
46
+ @observations = @observations.where(type: params[:filter][:type]) if params[:filter][:type].present?
47
+ @observations = @observations.where(name: params[:filter][:name]) if params[:filter][:name].present?
48
+ @observations = @observations.where(model: params[:filter][:model]) if params[:filter][:model].present?
49
+
50
+ if params[:filter][:start_date].present?
51
+ @observations = @observations.where("start_time >= ?", params[:filter][:start_date])
52
+ end
53
+
54
+ if params[:filter][:end_date].present?
55
+ @observations = @observations.where("start_time <= ?", params[:filter][:end_date])
56
+ end
57
+ end
58
+ end
59
+ end