rubyllm-observ 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (209) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +778 -0
  3. data/Rakefile +49 -0
  4. data/app/assets/javascripts/observ/application.js +12 -0
  5. data/app/assets/javascripts/observ/controllers/autoscroll_controller.js +33 -0
  6. data/app/assets/javascripts/observ/controllers/chat_form_controller.js +93 -0
  7. data/app/assets/javascripts/observ/controllers/copy_controller.js +43 -0
  8. data/app/assets/javascripts/observ/controllers/dashboard_controller.js +58 -0
  9. data/app/assets/javascripts/observ/controllers/drawer_controller.js +58 -0
  10. data/app/assets/javascripts/observ/controllers/expandable_controller.js +33 -0
  11. data/app/assets/javascripts/observ/controllers/filter_controller.js +36 -0
  12. data/app/assets/javascripts/observ/controllers/index.js +52 -0
  13. data/app/assets/javascripts/observ/controllers/json_viewer_controller.js +260 -0
  14. data/app/assets/javascripts/observ/controllers/message_form_controller.js +58 -0
  15. data/app/assets/javascripts/observ/controllers/prompt_variables_controller.js +64 -0
  16. data/app/assets/javascripts/observ/controllers/text_select_controller.js +14 -0
  17. data/app/assets/stylesheets/observ/_annotations.scss +127 -0
  18. data/app/assets/stylesheets/observ/_card.scss +52 -0
  19. data/app/assets/stylesheets/observ/_chat.scss +156 -0
  20. data/app/assets/stylesheets/observ/_components.scss +460 -0
  21. data/app/assets/stylesheets/observ/_dashboard.scss +40 -0
  22. data/app/assets/stylesheets/observ/_datasets.scss +697 -0
  23. data/app/assets/stylesheets/observ/_drawer.scss +273 -0
  24. data/app/assets/stylesheets/observ/_json_viewer.scss +120 -0
  25. data/app/assets/stylesheets/observ/_layout.scss +256 -0
  26. data/app/assets/stylesheets/observ/_metrics.scss +99 -0
  27. data/app/assets/stylesheets/observ/_observations.scss +160 -0
  28. data/app/assets/stylesheets/observ/_pagination.scss +143 -0
  29. data/app/assets/stylesheets/observ/_prompts.scss +365 -0
  30. data/app/assets/stylesheets/observ/_table.scss +53 -0
  31. data/app/assets/stylesheets/observ/_variables.scss +53 -0
  32. data/app/assets/stylesheets/observ/application.scss +15 -0
  33. data/app/controllers/observ/annotations_controller.rb +144 -0
  34. data/app/controllers/observ/application_controller.rb +8 -0
  35. data/app/controllers/observ/chats_controller.rb +58 -0
  36. data/app/controllers/observ/dashboard_controller.rb +159 -0
  37. data/app/controllers/observ/dataset_items_controller.rb +85 -0
  38. data/app/controllers/observ/dataset_run_items_controller.rb +84 -0
  39. data/app/controllers/observ/dataset_runs_controller.rb +110 -0
  40. data/app/controllers/observ/datasets_controller.rb +74 -0
  41. data/app/controllers/observ/messages_controller.rb +26 -0
  42. data/app/controllers/observ/observations_controller.rb +59 -0
  43. data/app/controllers/observ/prompt_versions_controller.rb +148 -0
  44. data/app/controllers/observ/prompts_controller.rb +205 -0
  45. data/app/controllers/observ/sessions_controller.rb +45 -0
  46. data/app/controllers/observ/traces_controller.rb +86 -0
  47. data/app/forms/observ/prompt_form.rb +96 -0
  48. data/app/helpers/observ/application_helper.rb +9 -0
  49. data/app/helpers/observ/chats_helper.rb +47 -0
  50. data/app/helpers/observ/dashboard_helper.rb +154 -0
  51. data/app/helpers/observ/datasets_helper.rb +62 -0
  52. data/app/helpers/observ/pagination_helper.rb +38 -0
  53. data/app/jobs/observ/application_job.rb +4 -0
  54. data/app/jobs/observ/dataset_runner_job.rb +49 -0
  55. data/app/mailers/observ/application_mailer.rb +6 -0
  56. data/app/models/concerns/observ/agent_phaseable.rb +124 -0
  57. data/app/models/concerns/observ/agent_selectable.rb +50 -0
  58. data/app/models/concerns/observ/chat_enhancements.rb +109 -0
  59. data/app/models/concerns/observ/message_enhancements.rb +31 -0
  60. data/app/models/concerns/observ/observability_instrumentation.rb +124 -0
  61. data/app/models/concerns/observ/prompt_management.rb +320 -0
  62. data/app/models/concerns/observ/trace_association.rb +9 -0
  63. data/app/models/observ/annotation.rb +23 -0
  64. data/app/models/observ/application_record.rb +5 -0
  65. data/app/models/observ/dataset.rb +51 -0
  66. data/app/models/observ/dataset_item.rb +41 -0
  67. data/app/models/observ/dataset_run.rb +104 -0
  68. data/app/models/observ/dataset_run_item.rb +111 -0
  69. data/app/models/observ/generation.rb +56 -0
  70. data/app/models/observ/null_prompt.rb +59 -0
  71. data/app/models/observ/observation.rb +38 -0
  72. data/app/models/observ/prompt.rb +315 -0
  73. data/app/models/observ/score.rb +51 -0
  74. data/app/models/observ/session.rb +131 -0
  75. data/app/models/observ/span.rb +13 -0
  76. data/app/models/observ/trace.rb +135 -0
  77. data/app/presenters/observ/agent_select_presenter.rb +59 -0
  78. data/app/services/observ/agent_executor_service.rb +174 -0
  79. data/app/services/observ/agent_provider.rb +60 -0
  80. data/app/services/observ/agent_selection_service.rb +53 -0
  81. data/app/services/observ/chat_instrumenter.rb +523 -0
  82. data/app/services/observ/dataset_runner_service.rb +153 -0
  83. data/app/services/observ/evaluator_runner_service.rb +58 -0
  84. data/app/services/observ/evaluators/base_evaluator.rb +51 -0
  85. data/app/services/observ/evaluators/contains_evaluator.rb +53 -0
  86. data/app/services/observ/evaluators/exact_match_evaluator.rb +23 -0
  87. data/app/services/observ/evaluators/json_structure_evaluator.rb +44 -0
  88. data/app/services/observ/prompt_manager/cache_statistics.rb +82 -0
  89. data/app/services/observ/prompt_manager/caching.rb +167 -0
  90. data/app/services/observ/prompt_manager/comparison.rb +49 -0
  91. data/app/services/observ/prompt_manager/version_management.rb +96 -0
  92. data/app/services/observ/prompt_manager.rb +40 -0
  93. data/app/services/observ/trace_text_formatter.rb +349 -0
  94. data/app/validators/observ/prompt_config_validator.rb +187 -0
  95. data/app/views/kaminari/_first_page.html.erb +11 -0
  96. data/app/views/kaminari/_gap.html.erb +8 -0
  97. data/app/views/kaminari/_last_page.html.erb +11 -0
  98. data/app/views/kaminari/_next_page.html.erb +11 -0
  99. data/app/views/kaminari/_page.html.erb +12 -0
  100. data/app/views/kaminari/_paginator.html.erb +25 -0
  101. data/app/views/kaminari/_prev_page.html.erb +11 -0
  102. data/app/views/kaminari/observ/_first_page.html.erb +11 -0
  103. data/app/views/kaminari/observ/_gap.html.erb +8 -0
  104. data/app/views/kaminari/observ/_last_page.html.erb +11 -0
  105. data/app/views/kaminari/observ/_next_page.html.erb +11 -0
  106. data/app/views/kaminari/observ/_page.html.erb +12 -0
  107. data/app/views/kaminari/observ/_paginator.html.erb +25 -0
  108. data/app/views/kaminari/observ/_prev_page.html.erb +11 -0
  109. data/app/views/layouts/observ/application.html.erb +88 -0
  110. data/app/views/observ/annotations/_annotation.html.erb +13 -0
  111. data/app/views/observ/annotations/_form.html.erb +28 -0
  112. data/app/views/observ/annotations/index.html.erb +28 -0
  113. data/app/views/observ/annotations/sessions_index.html.erb +48 -0
  114. data/app/views/observ/annotations/traces_index.html.erb +48 -0
  115. data/app/views/observ/chats/_form.html.erb +45 -0
  116. data/app/views/observ/chats/index.html.erb +67 -0
  117. data/app/views/observ/chats/new.html.erb +17 -0
  118. data/app/views/observ/chats/show.html.erb +34 -0
  119. data/app/views/observ/dashboard/index.html.erb +236 -0
  120. data/app/views/observ/dataset_items/_form.html.erb +49 -0
  121. data/app/views/observ/dataset_items/edit.html.erb +18 -0
  122. data/app/views/observ/dataset_items/index.html.erb +95 -0
  123. data/app/views/observ/dataset_items/new.html.erb +18 -0
  124. data/app/views/observ/dataset_run_items/_score_close_drawer.html.erb +4 -0
  125. data/app/views/observ/dataset_run_items/_score_drawer.html.erb +75 -0
  126. data/app/views/observ/dataset_run_items/_score_success.html.erb +29 -0
  127. data/app/views/observ/dataset_run_items/_scores_cell.html.erb +19 -0
  128. data/app/views/observ/dataset_run_items/details_drawer.turbo_stream.erb +80 -0
  129. data/app/views/observ/dataset_run_items/score_drawer.turbo_stream.erb +7 -0
  130. data/app/views/observ/dataset_runs/index.html.erb +108 -0
  131. data/app/views/observ/dataset_runs/new.html.erb +57 -0
  132. data/app/views/observ/dataset_runs/review.html.erb +155 -0
  133. data/app/views/observ/dataset_runs/show.html.erb +166 -0
  134. data/app/views/observ/datasets/_form.html.erb +62 -0
  135. data/app/views/observ/datasets/_items_tab.html.erb +66 -0
  136. data/app/views/observ/datasets/_runs_tab.html.erb +82 -0
  137. data/app/views/observ/datasets/edit.html.erb +32 -0
  138. data/app/views/observ/datasets/index.html.erb +105 -0
  139. data/app/views/observ/datasets/new.html.erb +18 -0
  140. data/app/views/observ/datasets/show.html.erb +67 -0
  141. data/app/views/observ/messages/_content.html.erb +1 -0
  142. data/app/views/observ/messages/_form.html.erb +33 -0
  143. data/app/views/observ/messages/_message.html.erb +14 -0
  144. data/app/views/observ/messages/_tool_calls.html.erb +10 -0
  145. data/app/views/observ/messages/create.turbo_stream.erb +9 -0
  146. data/app/views/observ/observations/index.html.erb +97 -0
  147. data/app/views/observ/observations/show_generation.html.erb +195 -0
  148. data/app/views/observ/observations/show_span.html.erb +93 -0
  149. data/app/views/observ/prompts/_diff_content.html.erb +16 -0
  150. data/app/views/observ/prompts/_form.html.erb +111 -0
  151. data/app/views/observ/prompts/_new_form.html.erb +102 -0
  152. data/app/views/observ/prompts/_prompt_actions.html.erb +4 -0
  153. data/app/views/observ/prompts/_prompt_content_highlighted.html.erb +4 -0
  154. data/app/views/observ/prompts/_version_actions.html.erb +40 -0
  155. data/app/views/observ/prompts/compare.html.erb +155 -0
  156. data/app/views/observ/prompts/edit.html.erb +17 -0
  157. data/app/views/observ/prompts/index.html.erb +108 -0
  158. data/app/views/observ/prompts/new.html.erb +17 -0
  159. data/app/views/observ/prompts/show.html.erb +138 -0
  160. data/app/views/observ/prompts/versions.html.erb +87 -0
  161. data/app/views/observ/sessions/annotations_drawer.turbo_stream.erb +25 -0
  162. data/app/views/observ/sessions/drawer_test.turbo_stream.erb +49 -0
  163. data/app/views/observ/sessions/index.html.erb +91 -0
  164. data/app/views/observ/sessions/show.html.erb +251 -0
  165. data/app/views/observ/traces/add_to_dataset_drawer.turbo_stream.erb +48 -0
  166. data/app/views/observ/traces/annotations_drawer.turbo_stream.erb +25 -0
  167. data/app/views/observ/traces/index.html.erb +87 -0
  168. data/app/views/observ/traces/show.html.erb +285 -0
  169. data/app/views/observ/traces/text_output_drawer.turbo_stream.erb +48 -0
  170. data/app/views/shared/_drawer.html.erb +26 -0
  171. data/config/routes.rb +80 -0
  172. data/db/migrate/001_create_observ_sessions.rb +21 -0
  173. data/db/migrate/002_create_observ_traces.rb +25 -0
  174. data/db/migrate/003_create_observ_observations.rb +42 -0
  175. data/db/migrate/004_add_message_id_to_observ_traces.rb +7 -0
  176. data/db/migrate/005_create_observ_prompts.rb +21 -0
  177. data/db/migrate/006_fix_prompt_config_strings.rb +23 -0
  178. data/db/migrate/007_create_observ_annotations.rb +12 -0
  179. data/db/migrate/009_add_prompt_fields_to_observ_chats.rb +11 -0
  180. data/db/migrate/010_create_observ_datasets.rb +15 -0
  181. data/db/migrate/011_create_observ_dataset_items.rb +17 -0
  182. data/db/migrate/012_create_observ_dataset_runs.rb +22 -0
  183. data/db/migrate/013_create_observ_dataset_run_items.rb +16 -0
  184. data/db/migrate/014_create_observ_scores.rb +26 -0
  185. data/lib/generators/observ/add_phase_tracking/add_phase_tracking_generator.rb +150 -0
  186. data/lib/generators/observ/add_phase_tracking/templates/migration.rb.tt +6 -0
  187. data/lib/generators/observ/install/USAGE +27 -0
  188. data/lib/generators/observ/install/install_generator.rb +270 -0
  189. data/lib/generators/observ/install_chat/install_chat_generator.rb +313 -0
  190. data/lib/generators/observ/install_chat/templates/agents/base_agent.rb.tt +147 -0
  191. data/lib/generators/observ/install_chat/templates/agents/simple_agent.rb.tt +55 -0
  192. data/lib/generators/observ/install_chat/templates/concerns/observ_chat_enhancements.rb.tt +34 -0
  193. data/lib/generators/observ/install_chat/templates/concerns/observ_message_enhancements.rb.tt +18 -0
  194. data/lib/generators/observ/install_chat/templates/initializers/observability.rb.tt +20 -0
  195. data/lib/generators/observ/install_chat/templates/jobs/chat_response_job.rb.tt +56 -0
  196. data/lib/generators/observ/install_chat/templates/migrations/add_agent_class_name.rb.tt +6 -0
  197. data/lib/generators/observ/install_chat/templates/migrations/add_observability_session_id.rb.tt +6 -0
  198. data/lib/generators/observ/install_chat/templates/tools/think_tool.rb.tt +29 -0
  199. data/lib/generators/observ/install_chat/templates/views/messages/_content.html.erb.tt +1 -0
  200. data/lib/observ/asset_installer.rb +130 -0
  201. data/lib/observ/asset_syncer.rb +104 -0
  202. data/lib/observ/configuration.rb +108 -0
  203. data/lib/observ/engine.rb +50 -0
  204. data/lib/observ/index_file_generator.rb +142 -0
  205. data/lib/observ/instrumenter/ruby_llm.rb +6 -0
  206. data/lib/observ/version.rb +3 -0
  207. data/lib/observ.rb +29 -0
  208. data/lib/tasks/observ_tasks.rake +75 -0
  209. metadata +453 -0
@@ -0,0 +1,236 @@
1
+ <% content_for :title, "Dashboard" %>
2
+
3
+ <% content_for :page_header do %>
4
+ <div class="observ-page-header__content">
5
+ <h1 class="observ-page-header__title">Observability Dashboard</h1>
6
+ <div class="observ-page-header__actions">
7
+ <%= form_with url: dashboard_path, method: :get, class: "observ-period-selector" do |f| %>
8
+ <%= f.select :period,
9
+ options_for_select([["Last 24 Hours", "24h"], ["Last 7 Days", "7d"], ["Last 30 Days", "30d"], ["All Time", "all"]], @time_period),
10
+ {},
11
+ { class: "observ-period-selector__select", onchange: "this.form.requestSubmit()" } %>
12
+ <% end %>
13
+ </div>
14
+ </div>
15
+ <% end %>
16
+
17
+ <div class="observ-dashboard">
18
+ <section class="observ-metrics-grid">
19
+ <div class="observ-metric-card">
20
+ <div class="observ-metric-card__header">
21
+ <h3 class="observ-metric-card__label">Total Sessions</h3>
22
+ <%= observ_trend_badge(@metrics[:trends][:sessions]) %>
23
+ </div>
24
+ <div class="observ-metric-card__body">
25
+ <p class="observ-metric-card__value"><%= format_number(@metrics[:total_sessions]) %></p>
26
+ </div>
27
+ </div>
28
+
29
+ <div class="observ-metric-card">
30
+ <div class="observ-metric-card__header">
31
+ <h3 class="observ-metric-card__label">Total Traces</h3>
32
+ </div>
33
+ <div class="observ-metric-card__body">
34
+ <p class="observ-metric-card__value"><%= format_number(@metrics[:total_traces]) %></p>
35
+ </div>
36
+ </div>
37
+
38
+ <div class="observ-metric-card">
39
+ <div class="observ-metric-card__header">
40
+ <h3 class="observ-metric-card__label">LLM Calls</h3>
41
+ </div>
42
+ <div class="observ-metric-card__body">
43
+ <p class="observ-metric-card__value"><%= format_number(@metrics[:total_llm_calls]) %></p>
44
+ </div>
45
+ </div>
46
+
47
+ <div class="observ-metric-card observ-metric-card--highlighted">
48
+ <div class="observ-metric-card__header">
49
+ <h3 class="observ-metric-card__label">Total Cost</h3>
50
+ <%= observ_trend_badge(@metrics[:trends][:cost]) %>
51
+ </div>
52
+ <div class="observ-metric-card__body">
53
+ <p class="observ-metric-card__value"><%= format_currency(@metrics[:total_cost]) %></p>
54
+ </div>
55
+ </div>
56
+
57
+ <div class="observ-metric-card">
58
+ <div class="observ-metric-card__header">
59
+ <h3 class="observ-metric-card__label">Total Tokens</h3>
60
+ <%= observ_trend_badge(@metrics[:trends][:tokens]) %>
61
+ </div>
62
+ <div class="observ-metric-card__body">
63
+ <p class="observ-metric-card__value"><%= format_tokens(@metrics[:total_tokens]) %></p>
64
+ </div>
65
+ </div>
66
+
67
+ <div class="observ-metric-card">
68
+ <div class="observ-metric-card__header">
69
+ <h3 class="observ-metric-card__label">Avg Latency</h3>
70
+ </div>
71
+ <div class="observ-metric-card__body">
72
+ <p class="observ-metric-card__value"><%= format_duration_ms(@metrics[:avg_latency_ms]) %></p>
73
+ </div>
74
+ </div>
75
+
76
+ <div class="observ-metric-card">
77
+ <div class="observ-metric-card__header">
78
+ <h3 class="observ-metric-card__label">Success Rate</h3>
79
+ </div>
80
+ <div class="observ-metric-card__body">
81
+ <p class="observ-metric-card__value"><%= observ_percentage(@metrics[:success_rate]) %></p>
82
+ </div>
83
+ </div>
84
+
85
+ <div class="observ-metric-card">
86
+ <div class="observ-metric-card__header">
87
+ <h3 class="observ-metric-card__label">Avg Cost/Call</h3>
88
+ </div>
89
+ <div class="observ-metric-card__body">
90
+ <p class="observ-metric-card__value"><%= format_currency(@metrics[:avg_cost_per_call]) %></p>
91
+ </div>
92
+ </div>
93
+ </section>
94
+
95
+ <div class="observ-dashboard__grid">
96
+ <section class="observ-card observ-card--span-2">
97
+ <header class="observ-card__header">
98
+ <h2 class="observ-card__title">Metrics by Agent</h2>
99
+ </header>
100
+ <div class="observ-card__body">
101
+ <% if @metrics_by_agent.any? %>
102
+ <table class="observ-table observ-table--compact">
103
+ <thead class="observ-table__header">
104
+ <tr class="observ-table__row">
105
+ <th class="observ-table__cell">Agent</th>
106
+ <th class="observ-table__cell observ-table__cell--numeric">Sessions</th>
107
+ <th class="observ-table__cell observ-table__cell--numeric">Traces</th>
108
+ <th class="observ-table__cell observ-table__cell--numeric">LLM Calls</th>
109
+ <th class="observ-table__cell observ-table__cell--numeric">Tokens</th>
110
+ <th class="observ-table__cell observ-table__cell--numeric">Cost</th>
111
+ </tr>
112
+ </thead>
113
+ <tbody>
114
+ <% @metrics_by_agent.each do |metrics| %>
115
+ <tr class="observ-table__row">
116
+ <td class="observ-table__cell">
117
+ <%= metrics[:agent_type] %>
118
+ </td>
119
+ <td class="observ-table__cell observ-table__cell--numeric">
120
+ <%= format_number(metrics[:sessions]) %>
121
+ </td>
122
+ <td class="observ-table__cell observ-table__cell--numeric">
123
+ <%= format_number(metrics[:traces]) %>
124
+ </td>
125
+ <td class="observ-table__cell observ-table__cell--numeric">
126
+ <%= format_number(metrics[:llm_calls]) %>
127
+ </td>
128
+ <td class="observ-table__cell observ-table__cell--numeric">
129
+ <%= format_tokens(metrics[:tokens]) %>
130
+ </td>
131
+ <td class="observ-table__cell observ-table__cell--numeric">
132
+ <%= format_currency(metrics[:cost]) %>
133
+ </td>
134
+ </tr>
135
+ <% end %>
136
+ </tbody>
137
+ </table>
138
+ <% else %>
139
+ <p class="observ-card__empty">No agent metrics available for this period.</p>
140
+ <% end %>
141
+ </div>
142
+ </section>
143
+
144
+ <section class="observ-card observ-card--span-2">
145
+ <header class="observ-card__header">
146
+ <h2 class="observ-card__title">Cost by Model</h2>
147
+ </header>
148
+ <div class="observ-card__body">
149
+ <% if @cost_by_model.any? %>
150
+ <table class="observ-table observ-table--compact">
151
+ <thead class="observ-table__header">
152
+ <tr class="observ-table__row">
153
+ <th class="observ-table__cell">Model</th>
154
+ <th class="observ-table__cell observ-table__cell--numeric">Cost</th>
155
+ <th class="observ-table__cell observ-table__cell--numeric">Percentage</th>
156
+ </tr>
157
+ </thead>
158
+ <tbody>
159
+ <% total_cost = @cost_by_model.values.sum %>
160
+ <% @cost_by_model.sort_by { |_, cost| -cost }.each do |model, cost| %>
161
+ <tr class="observ-table__row">
162
+ <td class="observ-table__cell">
163
+ <%= observ_model_badge(model) %>
164
+ </td>
165
+ <td class="observ-table__cell observ-table__cell--numeric">
166
+ <%= format_currency(cost) %>
167
+ </td>
168
+ <td class="observ-table__cell observ-table__cell--numeric">
169
+ <%= observ_percentage((cost / total_cost) * 100) %>
170
+ </td>
171
+ </tr>
172
+ <% end %>
173
+ </tbody>
174
+ </table>
175
+ <% else %>
176
+ <p class="observ-card__empty">No cost data available for this period.</p>
177
+ <% end %>
178
+ </div>
179
+ </section>
180
+
181
+ <section class="observ-card observ-card--span-2">
182
+ <header class="observ-card__header">
183
+ <h2 class="observ-card__title">Recent Sessions</h2>
184
+ <div class="observ-card__actions">
185
+ <%= link_to "View All →", sessions_path, class: "observ-link" %>
186
+ </div>
187
+ </header>
188
+ <div class="observ-card__body">
189
+ <% if @recent_sessions.any? %>
190
+ <table class="observ-table observ-table--compact">
191
+ <thead class="observ-table__header">
192
+ <tr class="observ-table__row">
193
+ <th class="observ-table__cell">Session ID</th>
194
+ <th class="observ-table__cell">Agent</th>
195
+ <th class="observ-table__cell">Started</th>
196
+ <th class="observ-table__cell observ-table__cell--numeric">Traces</th>
197
+ <th class="observ-table__cell observ-table__cell--numeric">Cost</th>
198
+ <th class="observ-table__cell">Status</th>
199
+ <th class="observ-table__cell"></th>
200
+ </tr>
201
+ </thead>
202
+ <tbody>
203
+ <% @recent_sessions.each do |session| %>
204
+ <tr class="observ-table__row">
205
+ <td class="observ-table__cell">
206
+ <code class="observ-code observ-code--inline"><%= truncate_id(session.session_id, 12) %></code>
207
+ </td>
208
+ <td class="observ-table__cell">
209
+ <%= session.metadata&.dig("agent_type") || "—" %>
210
+ </td>
211
+ <td class="observ-table__cell">
212
+ <%= observ_relative_time(session.start_time) %>
213
+ </td>
214
+ <td class="observ-table__cell observ-table__cell--numeric">
215
+ <%= session.end_time ? session.total_traces_count : session.traces.count %>
216
+ </td>
217
+ <td class="observ-table__cell observ-table__cell--numeric">
218
+ <%= format_currency(session.end_time ? session.total_cost : session.traces.sum(:total_cost)) %>
219
+ </td>
220
+ <td class="observ-table__cell">
221
+ <%= observ_status_badge(observ_session_status(session)) %>
222
+ </td>
223
+ <td class="observ-table__cell observ-table__cell--actions">
224
+ <%= link_to "View", session_path(session), class: "observ-button observ-button--sm" %>
225
+ </td>
226
+ </tr>
227
+ <% end %>
228
+ </tbody>
229
+ </table>
230
+ <% else %>
231
+ <p class="observ-card__empty">No sessions found for this period.</p>
232
+ <% end %>
233
+ </div>
234
+ </section>
235
+ </div>
236
+ </div>
@@ -0,0 +1,49 @@
1
+ <%= form_with model: item, scope: :observ_dataset_item, url: item.persisted? ? dataset_item_path(dataset, item) : dataset_items_path(dataset),
2
+ method: item.persisted? ? :patch : :post, class: "observ-form" do |f| %>
3
+ <% if item.errors.any? %>
4
+ <div class="observ-alert observ-alert--danger">
5
+ <h3 class="observ-alert__title">Please fix the following errors:</h3>
6
+ <ul class="observ-alert__list">
7
+ <% item.errors.full_messages.each do |message| %>
8
+ <li><%= message %></li>
9
+ <% end %>
10
+ </ul>
11
+ </div>
12
+ <% end %>
13
+
14
+ <div class="observ-form__group">
15
+ <%= f.label :input_text, "Input", class: "observ-form__label" %>
16
+ <%= f.text_area :input_text,
17
+ value: item.input.is_a?(Hash) ? JSON.pretty_generate(item.input) : item.input,
18
+ class: "observ-form__textarea observ-form__textarea--code",
19
+ rows: 6,
20
+ placeholder: '{"text": "What is the capital of France?"}',
21
+ required: true %>
22
+ <p class="observ-form__hint">The input data for this test case. Can be JSON or plain text.</p>
23
+ </div>
24
+
25
+ <div class="observ-form__group">
26
+ <%= f.label :expected_output_text, "Expected Output", class: "observ-form__label" %>
27
+ <%= f.text_area :expected_output_text,
28
+ value: item.expected_output.is_a?(Hash) ? JSON.pretty_generate(item.expected_output) : item.expected_output,
29
+ class: "observ-form__textarea observ-form__textarea--code",
30
+ rows: 6,
31
+ placeholder: '{"answer": "Paris"}' %>
32
+ <p class="observ-form__hint">Optional. The expected output for comparison. Can be JSON or plain text.</p>
33
+ </div>
34
+
35
+ <div class="observ-form__group">
36
+ <%= f.label :status, class: "observ-form__label" %>
37
+ <%= f.select :status,
38
+ options_for_select([["Active", "active"], ["Archived", "archived"]], item.status),
39
+ {},
40
+ class: "observ-form__select" %>
41
+ <p class="observ-form__hint">Archived items are excluded from new runs</p>
42
+ </div>
43
+
44
+ <div class="observ-form__actions">
45
+ <%= f.submit item.persisted? ? "Update Item" : "Add Item",
46
+ class: "observ-button observ-button--primary" %>
47
+ <%= link_to "Cancel", dataset_path(dataset, tab: "items"), class: "observ-button" %>
48
+ </div>
49
+ <% end %>
@@ -0,0 +1,18 @@
1
+ <% content_for :title, "Edit Item - #{@dataset.name}" %>
2
+
3
+ <% content_for :page_header do %>
4
+ <div class="observ-page-header__content">
5
+ <div>
6
+ <%= link_to "← Back to Dataset", dataset_path(@dataset, tab: "items"), class: "observ-datasets__back-link" %>
7
+ <h1 class="observ-page-header__title">Edit Item</h1>
8
+ </div>
9
+ </div>
10
+ <% end %>
11
+
12
+ <div class="observ-container">
13
+ <section class="observ-card">
14
+ <div class="observ-card__body">
15
+ <%= render "form", item: @item, dataset: @dataset %>
16
+ </div>
17
+ </section>
18
+ </div>
@@ -0,0 +1,95 @@
1
+ <% content_for :title, "Items - #{@dataset.name}" %>
2
+
3
+ <% content_for :page_header do %>
4
+ <div class="observ-page-header__content">
5
+ <div>
6
+ <%= link_to "← Back to Dataset", dataset_path(@dataset), class: "observ-datasets__back-link" %>
7
+ <h1 class="observ-page-header__title">Items: <%= @dataset.name %></h1>
8
+ </div>
9
+ <%= link_to "Add Item", new_dataset_item_path(@dataset), class: "observ-button observ-button--primary" %>
10
+ </div>
11
+ <% end %>
12
+
13
+ <div class="observ-container">
14
+ <!-- Filters -->
15
+ <section class="observ-card">
16
+ <div class="observ-card__body">
17
+ <%= form_with url: dataset_items_path(@dataset), method: :get, class: "observ-datasets-filters__form" do |f| %>
18
+ <div class="observ-datasets-filters__field">
19
+ <%= f.label :status, "Filter by status", class: "observ-datasets-filters__label" %>
20
+ <%= f.select :status,
21
+ options_for_select([["All", ""], ["Active", "active"], ["Archived", "archived"]], params[:status]),
22
+ {},
23
+ class: "observ-datasets-filters__select" %>
24
+ </div>
25
+ <div class="observ-datasets-filters__actions">
26
+ <%= f.submit "Filter", class: "observ-button observ-button--secondary" %>
27
+ <%= link_to "Clear", dataset_items_path(@dataset), class: "observ-button" %>
28
+ </div>
29
+ <% end %>
30
+ </div>
31
+ </section>
32
+
33
+ <!-- Items Table -->
34
+ <section class="observ-card">
35
+ <div class="observ-card__body">
36
+ <% if @items.any? %>
37
+ <table class="observ-table">
38
+ <thead class="observ-table__header">
39
+ <tr class="observ-table__row">
40
+ <th class="observ-table__cell">Input</th>
41
+ <th class="observ-table__cell">Expected Output</th>
42
+ <th class="observ-table__cell">Status</th>
43
+ <th class="observ-table__cell observ-table__cell--numeric">Runs</th>
44
+ <th class="observ-table__cell">Created</th>
45
+ <th class="observ-table__cell observ-table__cell--actions"></th>
46
+ </tr>
47
+ </thead>
48
+ <tbody>
49
+ <% @items.each do |item| %>
50
+ <tr class="observ-table__row">
51
+ <td class="observ-table__cell observ-datasets__cell--preview">
52
+ <code class="observ-datasets__preview"><%= item.input_preview(max_length: 100) %></code>
53
+ </td>
54
+ <td class="observ-table__cell observ-datasets__cell--preview">
55
+ <% if item.expected_output.present? %>
56
+ <code class="observ-datasets__preview"><%= item.expected_output_preview(max_length: 100) %></code>
57
+ <% else %>
58
+ <span class="observ-text--muted">Not set</span>
59
+ <% end %>
60
+ </td>
61
+ <td class="observ-table__cell">
62
+ <span class="observ-badge <%= item.active? ? 'observ-badge--success' : 'observ-badge--default' %>">
63
+ <%= item.status %>
64
+ </span>
65
+ </td>
66
+ <td class="observ-table__cell observ-table__cell--numeric">
67
+ <%= item.run_count %>
68
+ </td>
69
+ <td class="observ-table__cell">
70
+ <%= time_ago_in_words(item.created_at) %> ago
71
+ </td>
72
+ <td class="observ-table__cell observ-table__cell--actions">
73
+ <div class="observ-datasets-table__action-group">
74
+ <%= link_to "Edit", edit_dataset_item_path(@dataset, item), class: "observ-button observ-button--sm" %>
75
+ <%= button_to "Delete", dataset_item_path(@dataset, item),
76
+ method: :delete,
77
+ class: "observ-button observ-button--sm observ-button--danger",
78
+ data: { confirm: "Are you sure?" } %>
79
+ </div>
80
+ </td>
81
+ </tr>
82
+ <% end %>
83
+ </tbody>
84
+ </table>
85
+ <% else %>
86
+ <div class="observ-card__empty">
87
+ <p class="observ-card__empty-text">No items found</p>
88
+ <%= link_to "Add an item", new_dataset_item_path(@dataset), class: "observ-button observ-button--primary" %>
89
+ </div>
90
+ <% end %>
91
+ </div>
92
+ </section>
93
+
94
+ <%= observ_pagination(@items) %>
95
+ </div>
@@ -0,0 +1,18 @@
1
+ <% content_for :title, "Add Item - #{@dataset.name}" %>
2
+
3
+ <% content_for :page_header do %>
4
+ <div class="observ-page-header__content">
5
+ <div>
6
+ <%= link_to "← Back to Dataset", dataset_path(@dataset, tab: "items"), class: "observ-datasets__back-link" %>
7
+ <h1 class="observ-page-header__title">Add Item to <%= @dataset.name %></h1>
8
+ </div>
9
+ </div>
10
+ <% end %>
11
+
12
+ <div class="observ-container">
13
+ <section class="observ-card">
14
+ <div class="observ-card__body">
15
+ <%= render "form", item: @item, dataset: @dataset %>
16
+ </div>
17
+ </section>
18
+ </div>
@@ -0,0 +1,4 @@
1
+ <script>
2
+ // Close the drawer after successful score save
3
+ document.querySelector('.observ-drawer')?.classList.remove('open');
4
+ </script>
@@ -0,0 +1,75 @@
1
+ <div class="observ-drawer__header">
2
+ <h2 class="observ-drawer__title">Score Item</h2>
3
+ </div>
4
+
5
+ <div class="observ-drawer__body">
6
+ <% if local_assigns[:error] %>
7
+ <div class="observ-alert observ-alert--danger"><%= error %></div>
8
+ <% end %>
9
+
10
+ <div class="observ-datasets__score-context">
11
+ <h4 class="observ-text--label">Input</h4>
12
+ <pre class="observ-code-block"><%= format_trace_data(run_item.input) %></pre>
13
+
14
+ <% if run_item.expected_output.present? %>
15
+ <h4 class="observ-text--label">Expected Output</h4>
16
+ <pre class="observ-code-block"><%= format_trace_data(run_item.expected_output) %></pre>
17
+ <% end %>
18
+
19
+ <h4 class="observ-text--label">Actual Output</h4>
20
+ <pre class="observ-code-block"><%= format_trace_data(run_item.actual_output) %></pre>
21
+ </div>
22
+
23
+ <% existing_manual = run_item.score_for("manual", source: :manual) %>
24
+
25
+ <%= form_with url: score_dataset_run_run_item_path(run_item.dataset_run.dataset, run_item.dataset_run, run_item), method: :post, class: "observ-form" do |f| %>
26
+ <div class="observ-form__group">
27
+ <label class="observ-form__label">Is the output correct?</label>
28
+ <div class="observ-datasets__score-buttons">
29
+ <label class="observ-datasets__score-button">
30
+ <input type="radio" name="value" value="1" <%= "checked" if existing_manual&.passed? %>>
31
+ <span class="observ-datasets__score-icon observ-datasets__score-icon--pass">&#10003;</span>
32
+ Correct
33
+ </label>
34
+ <label class="observ-datasets__score-button">
35
+ <input type="radio" name="value" value="0" <%= "checked" if existing_manual&.failed? %>>
36
+ <span class="observ-datasets__score-icon observ-datasets__score-icon--fail">&#10005;</span>
37
+ Incorrect
38
+ </label>
39
+ </div>
40
+ </div>
41
+
42
+ <div class="observ-form__group">
43
+ <label class="observ-form__label" for="comment">Comment (optional)</label>
44
+ <textarea name="comment" id="comment" class="observ-form__textarea" rows="3"><%= existing_manual&.comment %></textarea>
45
+ </div>
46
+
47
+ <div class="observ-form__actions">
48
+ <button type="submit" class="observ-button observ-button--primary">Save Score</button>
49
+ <button type="button" class="observ-button" data-action="click->observ--drawer#close">Cancel</button>
50
+ </div>
51
+ <% end %>
52
+
53
+ <% if run_item.scores.any? %>
54
+ <div class="observ-datasets__existing-scores">
55
+ <h4 class="observ-text--label">Existing Scores</h4>
56
+ <ul class="observ-scores-list">
57
+ <% run_item.scores.each do |score| %>
58
+ <li class="observ-scores-list__item">
59
+ <span class="observ-scores-list__indicator <%= score.passed? ? 'observ-scores-list__indicator--pass' : 'observ-scores-list__indicator--fail' %>">
60
+ <%= score.passed? ? '✓' : '✗' %>
61
+ </span>
62
+ <span class="observ-scores-list__name"><%= score.name %></span>
63
+ <% unless score.boolean? %>
64
+ <span class="observ-scores-list__value"><%= score.display_value %></span>
65
+ <% end %>
66
+ <span class="observ-scores-list__source"><%= score.source %></span>
67
+ <% if score.comment.present? %>
68
+ <span class="observ-scores-list__comment"><%= score.comment %></span>
69
+ <% end %>
70
+ </li>
71
+ <% end %>
72
+ </ul>
73
+ </div>
74
+ <% end %>
75
+ </div>
@@ -0,0 +1,29 @@
1
+ <div class="observ-drawer__body">
2
+ <div class="observ-alert observ-alert--success">
3
+ <strong>Score saved successfully!</strong>
4
+ <p>The item has been marked as <%= score.passed? ? "correct" : "incorrect" %>.</p>
5
+ </div>
6
+
7
+ <div class="observ-datasets__score-result">
8
+ <div class="observ-drawer__field">
9
+ <label class="observ-drawer__field-label">Score</label>
10
+ <span class="observ-badge <%= score.badge_class %>"><%= score.display_value %></span>
11
+ </div>
12
+
13
+ <% if score.comment.present? %>
14
+ <div class="observ-drawer__field">
15
+ <label class="observ-drawer__field-label">Comment</label>
16
+ <p><%= score.comment %></p>
17
+ </div>
18
+ <% end %>
19
+ </div>
20
+
21
+ <div class="observ-drawer__actions">
22
+ <%= link_to "Score Another Item",
23
+ "#",
24
+ class: "observ-button observ-button--primary",
25
+ data: {
26
+ action: "click->observ--drawer#close"
27
+ } %>
28
+ </div>
29
+ </div>
@@ -0,0 +1,19 @@
1
+ <td class="observ-table__cell" id="run-item-<%= run_item.id %>-scores">
2
+ <% if run_item.scores.any? %>
3
+ <ul class="observ-scores-list observ-scores-list--compact">
4
+ <% run_item.scores.limit(3).each do |score| %>
5
+ <li class="observ-scores-list__item" title="<%= score.source %>">
6
+ <span class="observ-scores-list__indicator <%= score.passed? ? 'observ-scores-list__indicator--pass' : 'observ-scores-list__indicator--fail' %>">
7
+ <%= score.passed? ? '✓' : '✗' %>
8
+ </span>
9
+ <span class="observ-scores-list__name"><%= score.name %></span>
10
+ </li>
11
+ <% end %>
12
+ <% if run_item.scores.count > 3 %>
13
+ <li class="observ-scores-list__item observ-scores-list__more">+<%= run_item.scores.count - 3 %></li>
14
+ <% end %>
15
+ </ul>
16
+ <% else %>
17
+ <span class="observ-text--muted">-</span>
18
+ <% end %>
19
+ </td>
@@ -0,0 +1,80 @@
1
+ <%= turbo_stream.update "drawer-header-title" do %>
2
+ Run Item Details
3
+ <% end %>
4
+
5
+ <%= turbo_stream.update "drawer-content" do %>
6
+ <div class="observ-drawer__section">
7
+ <!-- Status -->
8
+ <div class="observ-drawer__field">
9
+ <label class="observ-drawer__field-label">Status</label>
10
+ <div class="observ-drawer__field-value">
11
+ <span class="observ-badge <%= run_item_status_badge_class(@run_item.status) %>">
12
+ <%= @run_item.status %>
13
+ </span>
14
+ <% if @run_item.output_matches? == true %>
15
+ <span class="observ-badge observ-badge--success">Match</span>
16
+ <% elsif @run_item.output_matches? == false %>
17
+ <span class="observ-badge observ-badge--danger">Mismatch</span>
18
+ <% end %>
19
+ </div>
20
+ </div>
21
+
22
+ <!-- Metrics -->
23
+ <% if @run_item.succeeded? %>
24
+ <div class="observ-drawer__metrics">
25
+ <div class="observ-drawer__metric">
26
+ <span class="observ-drawer__metric-label">Cost</span>
27
+ <span class="observ-drawer__metric-value">$<%= number_with_precision(@run_item.cost || 0, precision: 4) %></span>
28
+ </div>
29
+ <div class="observ-drawer__metric">
30
+ <span class="observ-drawer__metric-label">Tokens</span>
31
+ <span class="observ-drawer__metric-value"><%= number_with_delimiter(@run_item.tokens || 0) %></span>
32
+ </div>
33
+ <div class="observ-drawer__metric">
34
+ <span class="observ-drawer__metric-label">Duration</span>
35
+ <span class="observ-drawer__metric-value"><%= @run_item.duration_ms ? "#{@run_item.duration_ms}ms" : "-" %></span>
36
+ </div>
37
+ </div>
38
+ <% end %>
39
+
40
+ <!-- Input -->
41
+ <div class="observ-drawer__field">
42
+ <label class="observ-drawer__field-label">Input</label>
43
+ <pre class="observ-code-block"><%= format_trace_data(@run_item.input) %></pre>
44
+ </div>
45
+
46
+ <!-- Expected Output -->
47
+ <div class="observ-drawer__field">
48
+ <label class="observ-drawer__field-label">Expected Output</label>
49
+ <% if @run_item.expected_output.present? %>
50
+ <pre class="observ-code-block"><%= format_trace_data(@run_item.expected_output) %></pre>
51
+ <% else %>
52
+ <p class="observ-text--muted">No expected output defined</p>
53
+ <% end %>
54
+ </div>
55
+
56
+ <!-- Actual Output -->
57
+ <div class="observ-drawer__field">
58
+ <label class="observ-drawer__field-label">Actual Output</label>
59
+ <% if @run_item.failed? %>
60
+ <div class="observ-alert observ-alert--danger">
61
+ <strong>Error:</strong> <%= @run_item.error %>
62
+ </div>
63
+ <% elsif @run_item.actual_output.present? %>
64
+ <pre class="observ-code-block"><%= format_trace_data(@run_item.actual_output) %></pre>
65
+ <% else %>
66
+ <p class="observ-text--muted">Pending execution</p>
67
+ <% end %>
68
+ </div>
69
+
70
+ <!-- Actions -->
71
+ <div class="observ-drawer__actions">
72
+ <% if @run_item.trace %>
73
+ <%= link_to "View Full Trace", trace_path(@run_item.trace), class: "observ-button observ-button--primary" %>
74
+ <% end %>
75
+ <button type="button" class="observ-button" data-action="click->observ--drawer#close">
76
+ Close
77
+ </button>
78
+ </div>
79
+ </div>
80
+ <% end %>
@@ -0,0 +1,7 @@
1
+ <%= turbo_stream.update "drawer-header-title" do %>
2
+ Score Item
3
+ <% end %>
4
+
5
+ <%= turbo_stream.update "drawer-content" do %>
6
+ <%= render "observ/dataset_run_items/score_drawer", run_item: @run_item %>
7
+ <% end %>