rubyllm-observ 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +778 -0
- data/Rakefile +49 -0
- data/app/assets/javascripts/observ/application.js +12 -0
- data/app/assets/javascripts/observ/controllers/autoscroll_controller.js +33 -0
- data/app/assets/javascripts/observ/controllers/chat_form_controller.js +93 -0
- data/app/assets/javascripts/observ/controllers/copy_controller.js +43 -0
- data/app/assets/javascripts/observ/controllers/dashboard_controller.js +58 -0
- data/app/assets/javascripts/observ/controllers/drawer_controller.js +58 -0
- data/app/assets/javascripts/observ/controllers/expandable_controller.js +33 -0
- data/app/assets/javascripts/observ/controllers/filter_controller.js +36 -0
- data/app/assets/javascripts/observ/controllers/index.js +52 -0
- data/app/assets/javascripts/observ/controllers/json_viewer_controller.js +260 -0
- data/app/assets/javascripts/observ/controllers/message_form_controller.js +58 -0
- data/app/assets/javascripts/observ/controllers/prompt_variables_controller.js +64 -0
- data/app/assets/javascripts/observ/controllers/text_select_controller.js +14 -0
- data/app/assets/stylesheets/observ/_annotations.scss +127 -0
- data/app/assets/stylesheets/observ/_card.scss +52 -0
- data/app/assets/stylesheets/observ/_chat.scss +156 -0
- data/app/assets/stylesheets/observ/_components.scss +460 -0
- data/app/assets/stylesheets/observ/_dashboard.scss +40 -0
- data/app/assets/stylesheets/observ/_datasets.scss +697 -0
- data/app/assets/stylesheets/observ/_drawer.scss +273 -0
- data/app/assets/stylesheets/observ/_json_viewer.scss +120 -0
- data/app/assets/stylesheets/observ/_layout.scss +256 -0
- data/app/assets/stylesheets/observ/_metrics.scss +99 -0
- data/app/assets/stylesheets/observ/_observations.scss +160 -0
- data/app/assets/stylesheets/observ/_pagination.scss +143 -0
- data/app/assets/stylesheets/observ/_prompts.scss +365 -0
- data/app/assets/stylesheets/observ/_table.scss +53 -0
- data/app/assets/stylesheets/observ/_variables.scss +53 -0
- data/app/assets/stylesheets/observ/application.scss +15 -0
- data/app/controllers/observ/annotations_controller.rb +144 -0
- data/app/controllers/observ/application_controller.rb +8 -0
- data/app/controllers/observ/chats_controller.rb +58 -0
- data/app/controllers/observ/dashboard_controller.rb +159 -0
- data/app/controllers/observ/dataset_items_controller.rb +85 -0
- data/app/controllers/observ/dataset_run_items_controller.rb +84 -0
- data/app/controllers/observ/dataset_runs_controller.rb +110 -0
- data/app/controllers/observ/datasets_controller.rb +74 -0
- data/app/controllers/observ/messages_controller.rb +26 -0
- data/app/controllers/observ/observations_controller.rb +59 -0
- data/app/controllers/observ/prompt_versions_controller.rb +148 -0
- data/app/controllers/observ/prompts_controller.rb +205 -0
- data/app/controllers/observ/sessions_controller.rb +45 -0
- data/app/controllers/observ/traces_controller.rb +86 -0
- data/app/forms/observ/prompt_form.rb +96 -0
- data/app/helpers/observ/application_helper.rb +9 -0
- data/app/helpers/observ/chats_helper.rb +47 -0
- data/app/helpers/observ/dashboard_helper.rb +154 -0
- data/app/helpers/observ/datasets_helper.rb +62 -0
- data/app/helpers/observ/pagination_helper.rb +38 -0
- data/app/jobs/observ/application_job.rb +4 -0
- data/app/jobs/observ/dataset_runner_job.rb +49 -0
- data/app/mailers/observ/application_mailer.rb +6 -0
- data/app/models/concerns/observ/agent_phaseable.rb +124 -0
- data/app/models/concerns/observ/agent_selectable.rb +50 -0
- data/app/models/concerns/observ/chat_enhancements.rb +109 -0
- data/app/models/concerns/observ/message_enhancements.rb +31 -0
- data/app/models/concerns/observ/observability_instrumentation.rb +124 -0
- data/app/models/concerns/observ/prompt_management.rb +320 -0
- data/app/models/concerns/observ/trace_association.rb +9 -0
- data/app/models/observ/annotation.rb +23 -0
- data/app/models/observ/application_record.rb +5 -0
- data/app/models/observ/dataset.rb +51 -0
- data/app/models/observ/dataset_item.rb +41 -0
- data/app/models/observ/dataset_run.rb +104 -0
- data/app/models/observ/dataset_run_item.rb +111 -0
- data/app/models/observ/generation.rb +56 -0
- data/app/models/observ/null_prompt.rb +59 -0
- data/app/models/observ/observation.rb +38 -0
- data/app/models/observ/prompt.rb +315 -0
- data/app/models/observ/score.rb +51 -0
- data/app/models/observ/session.rb +131 -0
- data/app/models/observ/span.rb +13 -0
- data/app/models/observ/trace.rb +135 -0
- data/app/presenters/observ/agent_select_presenter.rb +59 -0
- data/app/services/observ/agent_executor_service.rb +174 -0
- data/app/services/observ/agent_provider.rb +60 -0
- data/app/services/observ/agent_selection_service.rb +53 -0
- data/app/services/observ/chat_instrumenter.rb +523 -0
- data/app/services/observ/dataset_runner_service.rb +153 -0
- data/app/services/observ/evaluator_runner_service.rb +58 -0
- data/app/services/observ/evaluators/base_evaluator.rb +51 -0
- data/app/services/observ/evaluators/contains_evaluator.rb +53 -0
- data/app/services/observ/evaluators/exact_match_evaluator.rb +23 -0
- data/app/services/observ/evaluators/json_structure_evaluator.rb +44 -0
- data/app/services/observ/prompt_manager/cache_statistics.rb +82 -0
- data/app/services/observ/prompt_manager/caching.rb +167 -0
- data/app/services/observ/prompt_manager/comparison.rb +49 -0
- data/app/services/observ/prompt_manager/version_management.rb +96 -0
- data/app/services/observ/prompt_manager.rb +40 -0
- data/app/services/observ/trace_text_formatter.rb +349 -0
- data/app/validators/observ/prompt_config_validator.rb +187 -0
- data/app/views/kaminari/_first_page.html.erb +11 -0
- data/app/views/kaminari/_gap.html.erb +8 -0
- data/app/views/kaminari/_last_page.html.erb +11 -0
- data/app/views/kaminari/_next_page.html.erb +11 -0
- data/app/views/kaminari/_page.html.erb +12 -0
- data/app/views/kaminari/_paginator.html.erb +25 -0
- data/app/views/kaminari/_prev_page.html.erb +11 -0
- data/app/views/kaminari/observ/_first_page.html.erb +11 -0
- data/app/views/kaminari/observ/_gap.html.erb +8 -0
- data/app/views/kaminari/observ/_last_page.html.erb +11 -0
- data/app/views/kaminari/observ/_next_page.html.erb +11 -0
- data/app/views/kaminari/observ/_page.html.erb +12 -0
- data/app/views/kaminari/observ/_paginator.html.erb +25 -0
- data/app/views/kaminari/observ/_prev_page.html.erb +11 -0
- data/app/views/layouts/observ/application.html.erb +88 -0
- data/app/views/observ/annotations/_annotation.html.erb +13 -0
- data/app/views/observ/annotations/_form.html.erb +28 -0
- data/app/views/observ/annotations/index.html.erb +28 -0
- data/app/views/observ/annotations/sessions_index.html.erb +48 -0
- data/app/views/observ/annotations/traces_index.html.erb +48 -0
- data/app/views/observ/chats/_form.html.erb +45 -0
- data/app/views/observ/chats/index.html.erb +67 -0
- data/app/views/observ/chats/new.html.erb +17 -0
- data/app/views/observ/chats/show.html.erb +34 -0
- data/app/views/observ/dashboard/index.html.erb +236 -0
- data/app/views/observ/dataset_items/_form.html.erb +49 -0
- data/app/views/observ/dataset_items/edit.html.erb +18 -0
- data/app/views/observ/dataset_items/index.html.erb +95 -0
- data/app/views/observ/dataset_items/new.html.erb +18 -0
- data/app/views/observ/dataset_run_items/_score_close_drawer.html.erb +4 -0
- data/app/views/observ/dataset_run_items/_score_drawer.html.erb +75 -0
- data/app/views/observ/dataset_run_items/_score_success.html.erb +29 -0
- data/app/views/observ/dataset_run_items/_scores_cell.html.erb +19 -0
- data/app/views/observ/dataset_run_items/details_drawer.turbo_stream.erb +80 -0
- data/app/views/observ/dataset_run_items/score_drawer.turbo_stream.erb +7 -0
- data/app/views/observ/dataset_runs/index.html.erb +108 -0
- data/app/views/observ/dataset_runs/new.html.erb +57 -0
- data/app/views/observ/dataset_runs/review.html.erb +155 -0
- data/app/views/observ/dataset_runs/show.html.erb +166 -0
- data/app/views/observ/datasets/_form.html.erb +62 -0
- data/app/views/observ/datasets/_items_tab.html.erb +66 -0
- data/app/views/observ/datasets/_runs_tab.html.erb +82 -0
- data/app/views/observ/datasets/edit.html.erb +32 -0
- data/app/views/observ/datasets/index.html.erb +105 -0
- data/app/views/observ/datasets/new.html.erb +18 -0
- data/app/views/observ/datasets/show.html.erb +67 -0
- data/app/views/observ/messages/_content.html.erb +1 -0
- data/app/views/observ/messages/_form.html.erb +33 -0
- data/app/views/observ/messages/_message.html.erb +14 -0
- data/app/views/observ/messages/_tool_calls.html.erb +10 -0
- data/app/views/observ/messages/create.turbo_stream.erb +9 -0
- data/app/views/observ/observations/index.html.erb +97 -0
- data/app/views/observ/observations/show_generation.html.erb +195 -0
- data/app/views/observ/observations/show_span.html.erb +93 -0
- data/app/views/observ/prompts/_diff_content.html.erb +16 -0
- data/app/views/observ/prompts/_form.html.erb +111 -0
- data/app/views/observ/prompts/_new_form.html.erb +102 -0
- data/app/views/observ/prompts/_prompt_actions.html.erb +4 -0
- data/app/views/observ/prompts/_prompt_content_highlighted.html.erb +4 -0
- data/app/views/observ/prompts/_version_actions.html.erb +40 -0
- data/app/views/observ/prompts/compare.html.erb +155 -0
- data/app/views/observ/prompts/edit.html.erb +17 -0
- data/app/views/observ/prompts/index.html.erb +108 -0
- data/app/views/observ/prompts/new.html.erb +17 -0
- data/app/views/observ/prompts/show.html.erb +138 -0
- data/app/views/observ/prompts/versions.html.erb +87 -0
- data/app/views/observ/sessions/annotations_drawer.turbo_stream.erb +25 -0
- data/app/views/observ/sessions/drawer_test.turbo_stream.erb +49 -0
- data/app/views/observ/sessions/index.html.erb +91 -0
- data/app/views/observ/sessions/show.html.erb +251 -0
- data/app/views/observ/traces/add_to_dataset_drawer.turbo_stream.erb +48 -0
- data/app/views/observ/traces/annotations_drawer.turbo_stream.erb +25 -0
- data/app/views/observ/traces/index.html.erb +87 -0
- data/app/views/observ/traces/show.html.erb +285 -0
- data/app/views/observ/traces/text_output_drawer.turbo_stream.erb +48 -0
- data/app/views/shared/_drawer.html.erb +26 -0
- data/config/routes.rb +80 -0
- data/db/migrate/001_create_observ_sessions.rb +21 -0
- data/db/migrate/002_create_observ_traces.rb +25 -0
- data/db/migrate/003_create_observ_observations.rb +42 -0
- data/db/migrate/004_add_message_id_to_observ_traces.rb +7 -0
- data/db/migrate/005_create_observ_prompts.rb +21 -0
- data/db/migrate/006_fix_prompt_config_strings.rb +23 -0
- data/db/migrate/007_create_observ_annotations.rb +12 -0
- data/db/migrate/009_add_prompt_fields_to_observ_chats.rb +11 -0
- data/db/migrate/010_create_observ_datasets.rb +15 -0
- data/db/migrate/011_create_observ_dataset_items.rb +17 -0
- data/db/migrate/012_create_observ_dataset_runs.rb +22 -0
- data/db/migrate/013_create_observ_dataset_run_items.rb +16 -0
- data/db/migrate/014_create_observ_scores.rb +26 -0
- data/lib/generators/observ/add_phase_tracking/add_phase_tracking_generator.rb +150 -0
- data/lib/generators/observ/add_phase_tracking/templates/migration.rb.tt +6 -0
- data/lib/generators/observ/install/USAGE +27 -0
- data/lib/generators/observ/install/install_generator.rb +270 -0
- data/lib/generators/observ/install_chat/install_chat_generator.rb +313 -0
- data/lib/generators/observ/install_chat/templates/agents/base_agent.rb.tt +147 -0
- data/lib/generators/observ/install_chat/templates/agents/simple_agent.rb.tt +55 -0
- data/lib/generators/observ/install_chat/templates/concerns/observ_chat_enhancements.rb.tt +34 -0
- data/lib/generators/observ/install_chat/templates/concerns/observ_message_enhancements.rb.tt +18 -0
- data/lib/generators/observ/install_chat/templates/initializers/observability.rb.tt +20 -0
- data/lib/generators/observ/install_chat/templates/jobs/chat_response_job.rb.tt +56 -0
- data/lib/generators/observ/install_chat/templates/migrations/add_agent_class_name.rb.tt +6 -0
- data/lib/generators/observ/install_chat/templates/migrations/add_observability_session_id.rb.tt +6 -0
- data/lib/generators/observ/install_chat/templates/tools/think_tool.rb.tt +29 -0
- data/lib/generators/observ/install_chat/templates/views/messages/_content.html.erb.tt +1 -0
- data/lib/observ/asset_installer.rb +130 -0
- data/lib/observ/asset_syncer.rb +104 -0
- data/lib/observ/configuration.rb +108 -0
- data/lib/observ/engine.rb +50 -0
- data/lib/observ/index_file_generator.rb +142 -0
- data/lib/observ/instrumenter/ruby_llm.rb +6 -0
- data/lib/observ/version.rb +3 -0
- data/lib/observ.rb +29 -0
- data/lib/tasks/observ_tasks.rake +75 -0
- metadata +453 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
class DatasetRunItem < ApplicationRecord
|
|
5
|
+
self.table_name = "observ_dataset_run_items"
|
|
6
|
+
|
|
7
|
+
belongs_to :dataset_run, class_name: "Observ::DatasetRun", inverse_of: :run_items
|
|
8
|
+
belongs_to :dataset_item, class_name: "Observ::DatasetItem", inverse_of: :run_items
|
|
9
|
+
belongs_to :trace, class_name: "Observ::Trace", optional: true
|
|
10
|
+
belongs_to :observation, class_name: "Observ::Observation", optional: true
|
|
11
|
+
has_many :scores, class_name: "Observ::Score",
|
|
12
|
+
foreign_key: :dataset_run_item_id, dependent: :destroy, inverse_of: :dataset_run_item
|
|
13
|
+
|
|
14
|
+
validates :dataset_run_id, uniqueness: { scope: :dataset_item_id }
|
|
15
|
+
|
|
16
|
+
# Status scopes
|
|
17
|
+
scope :succeeded, -> { where.not(trace_id: nil).where(error: nil) }
|
|
18
|
+
scope :failed, -> { where.not(error: nil) }
|
|
19
|
+
scope :pending, -> { where(trace_id: nil, error: nil) }
|
|
20
|
+
|
|
21
|
+
# Status helpers
|
|
22
|
+
def succeeded?
|
|
23
|
+
trace_id.present? && error.blank?
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def failed?
|
|
27
|
+
error.present?
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def pending?
|
|
31
|
+
trace_id.nil? && error.nil?
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def status
|
|
35
|
+
return :failed if failed?
|
|
36
|
+
return :succeeded if succeeded?
|
|
37
|
+
:pending
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Access helpers
|
|
41
|
+
def input
|
|
42
|
+
dataset_item.input
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def expected_output
|
|
46
|
+
dataset_item.expected_output
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def actual_output
|
|
50
|
+
trace&.output
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Comparison helpers
|
|
54
|
+
def output_matches?
|
|
55
|
+
return nil if expected_output.blank? || actual_output.blank?
|
|
56
|
+
|
|
57
|
+
normalize_for_comparison(expected_output) == normalize_for_comparison(actual_output)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Metrics from trace
|
|
61
|
+
def cost
|
|
62
|
+
trace&.total_cost
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def tokens
|
|
66
|
+
trace&.total_tokens
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def duration_ms
|
|
70
|
+
trace&.duration_ms
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Score helpers
|
|
74
|
+
def score_for(name, source: nil)
|
|
75
|
+
scope = scores.where(name: name)
|
|
76
|
+
scope = scope.where(source: source) if source
|
|
77
|
+
scope.order(created_at: :desc).first
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def scored?
|
|
81
|
+
scores.any?
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def passing_scores_count
|
|
85
|
+
scores.where("value >= 0.5").count
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def failing_scores_count
|
|
89
|
+
scores.where("value < 0.5").count
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
private
|
|
93
|
+
|
|
94
|
+
# Normalize output for comparison by parsing JSON strings into comparable structures
|
|
95
|
+
def normalize_for_comparison(output)
|
|
96
|
+
case output
|
|
97
|
+
when Hash
|
|
98
|
+
output.deep_symbolize_keys
|
|
99
|
+
when String
|
|
100
|
+
begin
|
|
101
|
+
parsed = JSON.parse(output)
|
|
102
|
+
parsed.is_a?(Hash) ? parsed.deep_symbolize_keys : parsed
|
|
103
|
+
rescue JSON::ParserError
|
|
104
|
+
output.strip
|
|
105
|
+
end
|
|
106
|
+
else
|
|
107
|
+
output
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
class Generation < Observation
|
|
5
|
+
def set_input(input, messages: nil)
|
|
6
|
+
update!(
|
|
7
|
+
input: input.is_a?(String) ? input : input.to_json,
|
|
8
|
+
messages: messages || self.messages
|
|
9
|
+
)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def set_messages(messages)
|
|
13
|
+
update!(messages: messages)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def set_tools(tools, tool_choice: nil)
|
|
17
|
+
update!(tools: tools, tool_choice: tool_choice)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def finalize(output:, usage: {}, cost_usd: 0.0, status_message: nil, finish_reason: nil,
|
|
21
|
+
completion_start_time: nil, provider_metadata: {}, messages: nil, raw_response: nil)
|
|
22
|
+
merged_usage = (self.usage || {}).merge(usage.stringify_keys)
|
|
23
|
+
merged_provider_metadata = (self.provider_metadata || {}).merge(provider_metadata.stringify_keys)
|
|
24
|
+
|
|
25
|
+
update!(
|
|
26
|
+
output: output.is_a?(String) ? output : output.to_json,
|
|
27
|
+
usage: merged_usage,
|
|
28
|
+
cost_usd: cost_usd,
|
|
29
|
+
finish_reason: finish_reason,
|
|
30
|
+
completion_start_time: completion_start_time,
|
|
31
|
+
provider_metadata: merged_provider_metadata,
|
|
32
|
+
messages: messages || self.messages,
|
|
33
|
+
raw_response: raw_response,
|
|
34
|
+
end_time: Time.current,
|
|
35
|
+
status_message: status_message
|
|
36
|
+
)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def time_to_first_token_ms
|
|
40
|
+
return nil unless completion_start_time && start_time
|
|
41
|
+
((completion_start_time - start_time) * 1000).round(2)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def total_tokens
|
|
45
|
+
usage&.dig("total_tokens") || 0
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def input_tokens
|
|
49
|
+
usage&.dig("input_tokens") || 0
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def output_tokens
|
|
53
|
+
usage&.dig("output_tokens") || 0
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
# Null Object pattern for Prompt
|
|
5
|
+
# Used when a prompt is not found, providing a fallback with the same interface
|
|
6
|
+
class NullPrompt
|
|
7
|
+
attr_reader :name, :prompt, :config
|
|
8
|
+
|
|
9
|
+
def initialize(name:, fallback_text:)
|
|
10
|
+
@name = name
|
|
11
|
+
@prompt = fallback_text
|
|
12
|
+
@config = {}
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Returns nil to indicate this is not a real prompt
|
|
16
|
+
def version
|
|
17
|
+
nil
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Returns the fallback text as-is (no variable compilation)
|
|
21
|
+
def compile(variables = {})
|
|
22
|
+
@prompt
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Null prompts are always in a "fallback" state
|
|
26
|
+
def state
|
|
27
|
+
"fallback"
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def draft?
|
|
31
|
+
false
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def production?
|
|
35
|
+
false
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def archived?
|
|
39
|
+
false
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def persisted?
|
|
43
|
+
false
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def id
|
|
47
|
+
nil
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# For logging/debugging
|
|
51
|
+
def to_s
|
|
52
|
+
"NullPrompt(#{name})"
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def inspect
|
|
56
|
+
"#<Observ::NullPrompt name: #{name.inspect}, fallback: #{prompt[0..50].inspect}...>"
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
class Observation < ApplicationRecord
|
|
5
|
+
self.table_name = "observ_observations"
|
|
6
|
+
self.inheritance_column = :type
|
|
7
|
+
|
|
8
|
+
belongs_to :trace, class_name: "Observ::Trace", foreign_key: :observ_trace_id, inverse_of: :observations
|
|
9
|
+
|
|
10
|
+
validates :observation_id, presence: true, uniqueness: true
|
|
11
|
+
validates :start_time, presence: true
|
|
12
|
+
validates :type, presence: true, inclusion: { in: %w[Observ::Generation Observ::Span] }
|
|
13
|
+
|
|
14
|
+
after_save :update_trace_metrics, if: :saved_change_to_cost_or_usage?
|
|
15
|
+
|
|
16
|
+
def finalize(status_message: nil)
|
|
17
|
+
update!(
|
|
18
|
+
end_time: Time.current,
|
|
19
|
+
status_message: status_message
|
|
20
|
+
)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def duration_ms
|
|
24
|
+
return nil unless end_time
|
|
25
|
+
((end_time - start_time) * 1000).round(2)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
def saved_change_to_cost_or_usage?
|
|
31
|
+
saved_change_to_cost_usd? || saved_change_to_usage?
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def update_trace_metrics
|
|
35
|
+
trace&.update_aggregated_metrics
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "mustache"
|
|
4
|
+
|
|
5
|
+
module Observ
|
|
6
|
+
class Prompt < ApplicationRecord
|
|
7
|
+
include AASM
|
|
8
|
+
|
|
9
|
+
self.table_name = "observ_prompts"
|
|
10
|
+
|
|
11
|
+
# ============================================
|
|
12
|
+
# VALIDATIONS
|
|
13
|
+
# ============================================
|
|
14
|
+
validates :name, presence: true
|
|
15
|
+
validates :prompt, presence: true
|
|
16
|
+
validates :version, presence: true, numericality: { only_integer: true, greater_than: 0 }
|
|
17
|
+
validates :state, presence: true, inclusion: { in: %w[draft production archived] }
|
|
18
|
+
|
|
19
|
+
# Only ONE production version per prompt name
|
|
20
|
+
validate :only_one_production_per_name, if: :production?
|
|
21
|
+
|
|
22
|
+
# Validate config format
|
|
23
|
+
validate :validate_config_format
|
|
24
|
+
|
|
25
|
+
# Prevent editing immutable prompts
|
|
26
|
+
before_update :ensure_editable!, if: :content_changed?
|
|
27
|
+
|
|
28
|
+
# Ensure config is always a Hash, not a String
|
|
29
|
+
before_validation :normalize_config
|
|
30
|
+
|
|
31
|
+
# ============================================
|
|
32
|
+
# SCOPES
|
|
33
|
+
# ============================================
|
|
34
|
+
scope :by_name, ->(name) { where(name: name) }
|
|
35
|
+
scope :latest_version, -> { order(version: :desc).limit(1) }
|
|
36
|
+
|
|
37
|
+
# ============================================
|
|
38
|
+
# AASM STATE MACHINE
|
|
39
|
+
# ============================================
|
|
40
|
+
aasm column: :state, after_commit: true do
|
|
41
|
+
state :draft, initial: true
|
|
42
|
+
state :production
|
|
43
|
+
state :archived
|
|
44
|
+
|
|
45
|
+
event :promote do
|
|
46
|
+
transitions from: :draft, to: :production, after: :demote_other_production_versions
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
event :demote do
|
|
50
|
+
transitions from: :production, to: :archived
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
event :restore do
|
|
54
|
+
transitions from: :archived, to: :production, after: :demote_other_production_versions
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Invalidate cache after any state transition
|
|
58
|
+
after_all_transitions :invalidate_cache_after_transition
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# ============================================
|
|
62
|
+
# CALLBACKS
|
|
63
|
+
# ============================================
|
|
64
|
+
|
|
65
|
+
# Invalidate cache after updates or deletion
|
|
66
|
+
after_save :invalidate_cache_if_changed
|
|
67
|
+
after_destroy :invalidate_cache_on_destroy
|
|
68
|
+
|
|
69
|
+
# ============================================
|
|
70
|
+
# CLASS METHODS
|
|
71
|
+
# ============================================
|
|
72
|
+
|
|
73
|
+
# Fetch prompt by name, state, or version
|
|
74
|
+
def self.fetch(name:, version: nil, state: :production, fallback: nil)
|
|
75
|
+
state ||= Observ.config.prompt_default_state
|
|
76
|
+
cache_key = cache_key_for(name: name, version: version, state: state)
|
|
77
|
+
cache_ttl = Observ.config.prompt_cache_ttl
|
|
78
|
+
|
|
79
|
+
Rails.cache.fetch(cache_key, expires_in: cache_ttl) do
|
|
80
|
+
fetch_from_database(name: name, version: version, state: state, fallback: fallback)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def self.fetch_from_database(name:, version:, state:, fallback:)
|
|
85
|
+
query = where(name: name)
|
|
86
|
+
|
|
87
|
+
prompt = if version.present?
|
|
88
|
+
query.find_by(version: version)
|
|
89
|
+
else
|
|
90
|
+
query.public_send(state).first
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
return prompt if prompt
|
|
94
|
+
return fallback if fallback
|
|
95
|
+
raise PromptNotFoundError, "Prompt '#{name}' not found"
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def self.cache_key_for(name:, version:, state:)
|
|
99
|
+
"observ:prompt:#{name}:#{version || state}"
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def self.clear_cache(name:)
|
|
103
|
+
# Clear all cache keys for this prompt
|
|
104
|
+
[ :draft, :production, :archived ].each do |state|
|
|
105
|
+
Rails.cache.delete(cache_key_for(name: name, version: nil, state: state))
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Create new version (auto-increment)
|
|
110
|
+
def self.create_version(name:, prompt:, config: {}, commit_message: nil, created_by: nil, promote_to_production: false)
|
|
111
|
+
latest_version = where(name: name).maximum(:version) || 0
|
|
112
|
+
new_version = latest_version + 1
|
|
113
|
+
|
|
114
|
+
new_prompt = create!(
|
|
115
|
+
name: name,
|
|
116
|
+
prompt: prompt,
|
|
117
|
+
version: new_version,
|
|
118
|
+
config: config,
|
|
119
|
+
commit_message: commit_message,
|
|
120
|
+
created_by: created_by,
|
|
121
|
+
state: :draft
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
new_prompt.promote! if promote_to_production
|
|
125
|
+
new_prompt
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# ============================================
|
|
129
|
+
# INSTANCE METHODS
|
|
130
|
+
# ============================================
|
|
131
|
+
|
|
132
|
+
# Compile prompt with Mustache templating
|
|
133
|
+
# Supports: variables {{name}}, loops {{#items}}...{{/items}},
|
|
134
|
+
# conditionals {{#flag}}...{{/flag}}, inverted sections {{^items}}...{{/items}}
|
|
135
|
+
def compile(variables = {})
|
|
136
|
+
Mustache.render(prompt, variables)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Compile with validation (raises if missing top-level variables)
|
|
140
|
+
# Note: Variables inside sections (loops) are validated at render time by Mustache
|
|
141
|
+
def compile_with_validation(variables = {})
|
|
142
|
+
# Extract top-level variables (outside of sections)
|
|
143
|
+
# This is a simplified approach - we strip section content and check remaining vars
|
|
144
|
+
template_without_sections = strip_sections(prompt)
|
|
145
|
+
|
|
146
|
+
# Matches: {{name}}, {{user.name}} but not {{#section}}, {{/section}}, {{^section}}, {{!comment}}, {{>partial}}, {{{raw}}}
|
|
147
|
+
required_vars = template_without_sections.scan(/\{\{([^#\^\/!>\{\s][^}\s]*)\}\}/).flatten.uniq
|
|
148
|
+
|
|
149
|
+
# Check which variables are missing (convert all keys to strings for comparison)
|
|
150
|
+
provided_keys = variables.keys.map(&:to_s)
|
|
151
|
+
missing_vars = required_vars.reject do |var|
|
|
152
|
+
# Handle dot notation (e.g., "user.name" - check if "user" key exists)
|
|
153
|
+
root_key = var.split(".").first
|
|
154
|
+
provided_keys.include?(var) || provided_keys.include?(root_key)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
if missing_vars.any?
|
|
158
|
+
raise VariableSubstitutionError, "Missing variables: #{missing_vars.join(', ')}"
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
compile(variables)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Extract top-level variables from template (for validation purposes)
|
|
165
|
+
def required_variables
|
|
166
|
+
template_without_sections = strip_sections(prompt)
|
|
167
|
+
template_without_sections.scan(/\{\{([^#\^\/!>\{\s][^}\s]*)\}\}/).flatten.uniq
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Immutability checks
|
|
171
|
+
def editable?
|
|
172
|
+
draft?
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def immutable?
|
|
176
|
+
production? || archived?
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def can_delete?
|
|
180
|
+
draft? || archived?
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Clone to new draft version
|
|
184
|
+
def clone_to_draft
|
|
185
|
+
self.class.create_version(
|
|
186
|
+
name: name,
|
|
187
|
+
prompt: prompt,
|
|
188
|
+
config: config,
|
|
189
|
+
commit_message: "Cloned from v#{version} (#{state})",
|
|
190
|
+
created_by: nil
|
|
191
|
+
)
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Version navigation
|
|
195
|
+
def previous_version
|
|
196
|
+
self.class.where(name: name).where("version < ?", version).order(version: :desc).first
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def next_version
|
|
200
|
+
self.class.where(name: name).where("version > ?", version).order(version: :asc).first
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def latest_version
|
|
204
|
+
self.class.where(name: name).order(version: :desc).first
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Export
|
|
208
|
+
def to_json_export
|
|
209
|
+
as_json(except: [ :id, :created_at, :updated_at ])
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def to_yaml_export
|
|
213
|
+
to_json_export.to_yaml
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
private
|
|
217
|
+
|
|
218
|
+
# ============================================
|
|
219
|
+
# VALIDATIONS
|
|
220
|
+
# ============================================
|
|
221
|
+
|
|
222
|
+
def only_one_production_per_name
|
|
223
|
+
existing_production = self.class.where(name: name, state: :production).where.not(id: id).exists?
|
|
224
|
+
if existing_production
|
|
225
|
+
errors.add(:state, "Only one production version allowed per prompt name")
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def validate_config_format
|
|
230
|
+
return if config.blank?
|
|
231
|
+
|
|
232
|
+
validator = Observ::PromptConfigValidator.new(config)
|
|
233
|
+
unless validator.valid?
|
|
234
|
+
validator.errors.each do |error|
|
|
235
|
+
errors.add(:config, error)
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def ensure_editable!
|
|
241
|
+
if immutable?
|
|
242
|
+
errors.add(:base, "Cannot edit #{state} prompt. Clone to draft first.")
|
|
243
|
+
raise ActiveRecord::RecordInvalid, self
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
def content_changed?
|
|
248
|
+
prompt_changed? || config_changed?
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# ============================================
|
|
252
|
+
# CALLBACKS
|
|
253
|
+
# ============================================
|
|
254
|
+
|
|
255
|
+
def demote_other_production_versions
|
|
256
|
+
self.class.where(name: name, state: :production).where.not(id: id).update_all(state: :archived)
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def invalidate_cache_after_transition
|
|
260
|
+
Observ::PromptManager.invalidate_cache(name: name)
|
|
261
|
+
Rails.logger.info("Cache invalidated after state transition for #{name} v#{version}")
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
def invalidate_cache_if_changed
|
|
265
|
+
return unless saved_change_to_prompt? || saved_change_to_config? || saved_change_to_state?
|
|
266
|
+
|
|
267
|
+
Observ::PromptManager.invalidate_cache(name: name)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
def invalidate_cache_on_destroy
|
|
271
|
+
Observ::PromptManager.invalidate_cache(name: name)
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
def clear_prompt_cache
|
|
275
|
+
self.class.clear_cache(name: name)
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
def normalize_config
|
|
279
|
+
return if config.nil?
|
|
280
|
+
|
|
281
|
+
# If config is a String, parse it to a Hash
|
|
282
|
+
if config.is_a?(String)
|
|
283
|
+
self.config = begin
|
|
284
|
+
JSON.parse(config)
|
|
285
|
+
rescue JSON::ParserError
|
|
286
|
+
{} # Default to empty hash if parsing fails
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
# Ensure it's a Hash (could be other types in edge cases)
|
|
291
|
+
self.config = {} unless config.is_a?(Hash)
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# Strip section content from template for top-level variable extraction
|
|
295
|
+
# Removes content between {{#section}}...{{/section}} and {{^section}}...{{/section}}
|
|
296
|
+
def strip_sections(template)
|
|
297
|
+
# Recursively strip nested sections
|
|
298
|
+
result = template.dup
|
|
299
|
+
|
|
300
|
+
# Match sections: {{#name}}...{{/name}} or {{^name}}...{{/name}}
|
|
301
|
+
# Use non-greedy matching and handle nesting by repeating until stable
|
|
302
|
+
loop do
|
|
303
|
+
previous = result
|
|
304
|
+
result = result.gsub(/\{\{[#\^](\w+)\}\}.*?\{\{\/\1\}\}/m, "")
|
|
305
|
+
break if result == previous
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
result
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
# Custom exceptions
|
|
313
|
+
class PromptNotFoundError < StandardError; end
|
|
314
|
+
class VariableSubstitutionError < StandardError; end
|
|
315
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
class Score < ApplicationRecord
|
|
5
|
+
self.table_name = "observ_scores"
|
|
6
|
+
|
|
7
|
+
belongs_to :dataset_run_item, class_name: "Observ::DatasetRunItem", inverse_of: :scores
|
|
8
|
+
belongs_to :trace, class_name: "Observ::Trace"
|
|
9
|
+
belongs_to :observation, class_name: "Observ::Observation", optional: true
|
|
10
|
+
|
|
11
|
+
enum :data_type, { numeric: 0, boolean: 1, categorical: 2 }
|
|
12
|
+
enum :source, { programmatic: 0, manual: 1, llm_judge: 2 }
|
|
13
|
+
|
|
14
|
+
validates :name, presence: true
|
|
15
|
+
validates :value, presence: true, numericality: true
|
|
16
|
+
validates :dataset_run_item_id, uniqueness: { scope: [ :name, :source ], message: "already has a score with this name and source" }
|
|
17
|
+
|
|
18
|
+
# Delegations for convenience
|
|
19
|
+
delegate :dataset_run, to: :dataset_run_item
|
|
20
|
+
delegate :dataset_item, to: :dataset_run_item
|
|
21
|
+
|
|
22
|
+
# Boolean helpers
|
|
23
|
+
def passed?
|
|
24
|
+
value >= 0.5
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def failed?
|
|
28
|
+
!passed?
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Display helpers
|
|
32
|
+
def display_value
|
|
33
|
+
case data_type
|
|
34
|
+
when "boolean"
|
|
35
|
+
passed? ? "Pass" : "Fail"
|
|
36
|
+
when "categorical"
|
|
37
|
+
string_value.presence || value.to_s
|
|
38
|
+
else
|
|
39
|
+
value.round(2).to_s
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def badge_class
|
|
44
|
+
if boolean?
|
|
45
|
+
passed? ? "observ-badge--success" : "observ-badge--danger"
|
|
46
|
+
else
|
|
47
|
+
value >= 0.7 ? "observ-badge--success" : (value >= 0.4 ? "observ-badge--warning" : "observ-badge--danger")
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|