rubyllm-observ 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +778 -0
- data/Rakefile +49 -0
- data/app/assets/javascripts/observ/application.js +12 -0
- data/app/assets/javascripts/observ/controllers/autoscroll_controller.js +33 -0
- data/app/assets/javascripts/observ/controllers/chat_form_controller.js +93 -0
- data/app/assets/javascripts/observ/controllers/copy_controller.js +43 -0
- data/app/assets/javascripts/observ/controllers/dashboard_controller.js +58 -0
- data/app/assets/javascripts/observ/controllers/drawer_controller.js +58 -0
- data/app/assets/javascripts/observ/controllers/expandable_controller.js +33 -0
- data/app/assets/javascripts/observ/controllers/filter_controller.js +36 -0
- data/app/assets/javascripts/observ/controllers/index.js +52 -0
- data/app/assets/javascripts/observ/controllers/json_viewer_controller.js +260 -0
- data/app/assets/javascripts/observ/controllers/message_form_controller.js +58 -0
- data/app/assets/javascripts/observ/controllers/prompt_variables_controller.js +64 -0
- data/app/assets/javascripts/observ/controllers/text_select_controller.js +14 -0
- data/app/assets/stylesheets/observ/_annotations.scss +127 -0
- data/app/assets/stylesheets/observ/_card.scss +52 -0
- data/app/assets/stylesheets/observ/_chat.scss +156 -0
- data/app/assets/stylesheets/observ/_components.scss +460 -0
- data/app/assets/stylesheets/observ/_dashboard.scss +40 -0
- data/app/assets/stylesheets/observ/_datasets.scss +697 -0
- data/app/assets/stylesheets/observ/_drawer.scss +273 -0
- data/app/assets/stylesheets/observ/_json_viewer.scss +120 -0
- data/app/assets/stylesheets/observ/_layout.scss +256 -0
- data/app/assets/stylesheets/observ/_metrics.scss +99 -0
- data/app/assets/stylesheets/observ/_observations.scss +160 -0
- data/app/assets/stylesheets/observ/_pagination.scss +143 -0
- data/app/assets/stylesheets/observ/_prompts.scss +365 -0
- data/app/assets/stylesheets/observ/_table.scss +53 -0
- data/app/assets/stylesheets/observ/_variables.scss +53 -0
- data/app/assets/stylesheets/observ/application.scss +15 -0
- data/app/controllers/observ/annotations_controller.rb +144 -0
- data/app/controllers/observ/application_controller.rb +8 -0
- data/app/controllers/observ/chats_controller.rb +58 -0
- data/app/controllers/observ/dashboard_controller.rb +159 -0
- data/app/controllers/observ/dataset_items_controller.rb +85 -0
- data/app/controllers/observ/dataset_run_items_controller.rb +84 -0
- data/app/controllers/observ/dataset_runs_controller.rb +110 -0
- data/app/controllers/observ/datasets_controller.rb +74 -0
- data/app/controllers/observ/messages_controller.rb +26 -0
- data/app/controllers/observ/observations_controller.rb +59 -0
- data/app/controllers/observ/prompt_versions_controller.rb +148 -0
- data/app/controllers/observ/prompts_controller.rb +205 -0
- data/app/controllers/observ/sessions_controller.rb +45 -0
- data/app/controllers/observ/traces_controller.rb +86 -0
- data/app/forms/observ/prompt_form.rb +96 -0
- data/app/helpers/observ/application_helper.rb +9 -0
- data/app/helpers/observ/chats_helper.rb +47 -0
- data/app/helpers/observ/dashboard_helper.rb +154 -0
- data/app/helpers/observ/datasets_helper.rb +62 -0
- data/app/helpers/observ/pagination_helper.rb +38 -0
- data/app/jobs/observ/application_job.rb +4 -0
- data/app/jobs/observ/dataset_runner_job.rb +49 -0
- data/app/mailers/observ/application_mailer.rb +6 -0
- data/app/models/concerns/observ/agent_phaseable.rb +124 -0
- data/app/models/concerns/observ/agent_selectable.rb +50 -0
- data/app/models/concerns/observ/chat_enhancements.rb +109 -0
- data/app/models/concerns/observ/message_enhancements.rb +31 -0
- data/app/models/concerns/observ/observability_instrumentation.rb +124 -0
- data/app/models/concerns/observ/prompt_management.rb +320 -0
- data/app/models/concerns/observ/trace_association.rb +9 -0
- data/app/models/observ/annotation.rb +23 -0
- data/app/models/observ/application_record.rb +5 -0
- data/app/models/observ/dataset.rb +51 -0
- data/app/models/observ/dataset_item.rb +41 -0
- data/app/models/observ/dataset_run.rb +104 -0
- data/app/models/observ/dataset_run_item.rb +111 -0
- data/app/models/observ/generation.rb +56 -0
- data/app/models/observ/null_prompt.rb +59 -0
- data/app/models/observ/observation.rb +38 -0
- data/app/models/observ/prompt.rb +315 -0
- data/app/models/observ/score.rb +51 -0
- data/app/models/observ/session.rb +131 -0
- data/app/models/observ/span.rb +13 -0
- data/app/models/observ/trace.rb +135 -0
- data/app/presenters/observ/agent_select_presenter.rb +59 -0
- data/app/services/observ/agent_executor_service.rb +174 -0
- data/app/services/observ/agent_provider.rb +60 -0
- data/app/services/observ/agent_selection_service.rb +53 -0
- data/app/services/observ/chat_instrumenter.rb +523 -0
- data/app/services/observ/dataset_runner_service.rb +153 -0
- data/app/services/observ/evaluator_runner_service.rb +58 -0
- data/app/services/observ/evaluators/base_evaluator.rb +51 -0
- data/app/services/observ/evaluators/contains_evaluator.rb +53 -0
- data/app/services/observ/evaluators/exact_match_evaluator.rb +23 -0
- data/app/services/observ/evaluators/json_structure_evaluator.rb +44 -0
- data/app/services/observ/prompt_manager/cache_statistics.rb +82 -0
- data/app/services/observ/prompt_manager/caching.rb +167 -0
- data/app/services/observ/prompt_manager/comparison.rb +49 -0
- data/app/services/observ/prompt_manager/version_management.rb +96 -0
- data/app/services/observ/prompt_manager.rb +40 -0
- data/app/services/observ/trace_text_formatter.rb +349 -0
- data/app/validators/observ/prompt_config_validator.rb +187 -0
- data/app/views/kaminari/_first_page.html.erb +11 -0
- data/app/views/kaminari/_gap.html.erb +8 -0
- data/app/views/kaminari/_last_page.html.erb +11 -0
- data/app/views/kaminari/_next_page.html.erb +11 -0
- data/app/views/kaminari/_page.html.erb +12 -0
- data/app/views/kaminari/_paginator.html.erb +25 -0
- data/app/views/kaminari/_prev_page.html.erb +11 -0
- data/app/views/kaminari/observ/_first_page.html.erb +11 -0
- data/app/views/kaminari/observ/_gap.html.erb +8 -0
- data/app/views/kaminari/observ/_last_page.html.erb +11 -0
- data/app/views/kaminari/observ/_next_page.html.erb +11 -0
- data/app/views/kaminari/observ/_page.html.erb +12 -0
- data/app/views/kaminari/observ/_paginator.html.erb +25 -0
- data/app/views/kaminari/observ/_prev_page.html.erb +11 -0
- data/app/views/layouts/observ/application.html.erb +88 -0
- data/app/views/observ/annotations/_annotation.html.erb +13 -0
- data/app/views/observ/annotations/_form.html.erb +28 -0
- data/app/views/observ/annotations/index.html.erb +28 -0
- data/app/views/observ/annotations/sessions_index.html.erb +48 -0
- data/app/views/observ/annotations/traces_index.html.erb +48 -0
- data/app/views/observ/chats/_form.html.erb +45 -0
- data/app/views/observ/chats/index.html.erb +67 -0
- data/app/views/observ/chats/new.html.erb +17 -0
- data/app/views/observ/chats/show.html.erb +34 -0
- data/app/views/observ/dashboard/index.html.erb +236 -0
- data/app/views/observ/dataset_items/_form.html.erb +49 -0
- data/app/views/observ/dataset_items/edit.html.erb +18 -0
- data/app/views/observ/dataset_items/index.html.erb +95 -0
- data/app/views/observ/dataset_items/new.html.erb +18 -0
- data/app/views/observ/dataset_run_items/_score_close_drawer.html.erb +4 -0
- data/app/views/observ/dataset_run_items/_score_drawer.html.erb +75 -0
- data/app/views/observ/dataset_run_items/_score_success.html.erb +29 -0
- data/app/views/observ/dataset_run_items/_scores_cell.html.erb +19 -0
- data/app/views/observ/dataset_run_items/details_drawer.turbo_stream.erb +80 -0
- data/app/views/observ/dataset_run_items/score_drawer.turbo_stream.erb +7 -0
- data/app/views/observ/dataset_runs/index.html.erb +108 -0
- data/app/views/observ/dataset_runs/new.html.erb +57 -0
- data/app/views/observ/dataset_runs/review.html.erb +155 -0
- data/app/views/observ/dataset_runs/show.html.erb +166 -0
- data/app/views/observ/datasets/_form.html.erb +62 -0
- data/app/views/observ/datasets/_items_tab.html.erb +66 -0
- data/app/views/observ/datasets/_runs_tab.html.erb +82 -0
- data/app/views/observ/datasets/edit.html.erb +32 -0
- data/app/views/observ/datasets/index.html.erb +105 -0
- data/app/views/observ/datasets/new.html.erb +18 -0
- data/app/views/observ/datasets/show.html.erb +67 -0
- data/app/views/observ/messages/_content.html.erb +1 -0
- data/app/views/observ/messages/_form.html.erb +33 -0
- data/app/views/observ/messages/_message.html.erb +14 -0
- data/app/views/observ/messages/_tool_calls.html.erb +10 -0
- data/app/views/observ/messages/create.turbo_stream.erb +9 -0
- data/app/views/observ/observations/index.html.erb +97 -0
- data/app/views/observ/observations/show_generation.html.erb +195 -0
- data/app/views/observ/observations/show_span.html.erb +93 -0
- data/app/views/observ/prompts/_diff_content.html.erb +16 -0
- data/app/views/observ/prompts/_form.html.erb +111 -0
- data/app/views/observ/prompts/_new_form.html.erb +102 -0
- data/app/views/observ/prompts/_prompt_actions.html.erb +4 -0
- data/app/views/observ/prompts/_prompt_content_highlighted.html.erb +4 -0
- data/app/views/observ/prompts/_version_actions.html.erb +40 -0
- data/app/views/observ/prompts/compare.html.erb +155 -0
- data/app/views/observ/prompts/edit.html.erb +17 -0
- data/app/views/observ/prompts/index.html.erb +108 -0
- data/app/views/observ/prompts/new.html.erb +17 -0
- data/app/views/observ/prompts/show.html.erb +138 -0
- data/app/views/observ/prompts/versions.html.erb +87 -0
- data/app/views/observ/sessions/annotations_drawer.turbo_stream.erb +25 -0
- data/app/views/observ/sessions/drawer_test.turbo_stream.erb +49 -0
- data/app/views/observ/sessions/index.html.erb +91 -0
- data/app/views/observ/sessions/show.html.erb +251 -0
- data/app/views/observ/traces/add_to_dataset_drawer.turbo_stream.erb +48 -0
- data/app/views/observ/traces/annotations_drawer.turbo_stream.erb +25 -0
- data/app/views/observ/traces/index.html.erb +87 -0
- data/app/views/observ/traces/show.html.erb +285 -0
- data/app/views/observ/traces/text_output_drawer.turbo_stream.erb +48 -0
- data/app/views/shared/_drawer.html.erb +26 -0
- data/config/routes.rb +80 -0
- data/db/migrate/001_create_observ_sessions.rb +21 -0
- data/db/migrate/002_create_observ_traces.rb +25 -0
- data/db/migrate/003_create_observ_observations.rb +42 -0
- data/db/migrate/004_add_message_id_to_observ_traces.rb +7 -0
- data/db/migrate/005_create_observ_prompts.rb +21 -0
- data/db/migrate/006_fix_prompt_config_strings.rb +23 -0
- data/db/migrate/007_create_observ_annotations.rb +12 -0
- data/db/migrate/009_add_prompt_fields_to_observ_chats.rb +11 -0
- data/db/migrate/010_create_observ_datasets.rb +15 -0
- data/db/migrate/011_create_observ_dataset_items.rb +17 -0
- data/db/migrate/012_create_observ_dataset_runs.rb +22 -0
- data/db/migrate/013_create_observ_dataset_run_items.rb +16 -0
- data/db/migrate/014_create_observ_scores.rb +26 -0
- data/lib/generators/observ/add_phase_tracking/add_phase_tracking_generator.rb +150 -0
- data/lib/generators/observ/add_phase_tracking/templates/migration.rb.tt +6 -0
- data/lib/generators/observ/install/USAGE +27 -0
- data/lib/generators/observ/install/install_generator.rb +270 -0
- data/lib/generators/observ/install_chat/install_chat_generator.rb +313 -0
- data/lib/generators/observ/install_chat/templates/agents/base_agent.rb.tt +147 -0
- data/lib/generators/observ/install_chat/templates/agents/simple_agent.rb.tt +55 -0
- data/lib/generators/observ/install_chat/templates/concerns/observ_chat_enhancements.rb.tt +34 -0
- data/lib/generators/observ/install_chat/templates/concerns/observ_message_enhancements.rb.tt +18 -0
- data/lib/generators/observ/install_chat/templates/initializers/observability.rb.tt +20 -0
- data/lib/generators/observ/install_chat/templates/jobs/chat_response_job.rb.tt +56 -0
- data/lib/generators/observ/install_chat/templates/migrations/add_agent_class_name.rb.tt +6 -0
- data/lib/generators/observ/install_chat/templates/migrations/add_observability_session_id.rb.tt +6 -0
- data/lib/generators/observ/install_chat/templates/tools/think_tool.rb.tt +29 -0
- data/lib/generators/observ/install_chat/templates/views/messages/_content.html.erb.tt +1 -0
- data/lib/observ/asset_installer.rb +130 -0
- data/lib/observ/asset_syncer.rb +104 -0
- data/lib/observ/configuration.rb +108 -0
- data/lib/observ/engine.rb +50 -0
- data/lib/observ/index_file_generator.rb +142 -0
- data/lib/observ/instrumenter/ruby_llm.rb +6 -0
- data/lib/observ/version.rb +3 -0
- data/lib/observ.rb +29 -0
- data/lib/tasks/observ_tasks.rake +75 -0
- metadata +453 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
module Observ
|
|
2
|
+
module ObservabilityInstrumentation
|
|
3
|
+
extend ActiveSupport::Concern
|
|
4
|
+
|
|
5
|
+
included do
|
|
6
|
+
belongs_to :observ_session, class_name: "Observ::Session", foreign_key: :observability_session_id,
|
|
7
|
+
primary_key: :session_id, optional: true
|
|
8
|
+
|
|
9
|
+
after_create :initialize_observability_session
|
|
10
|
+
after_find :ensure_instrumented_if_needed
|
|
11
|
+
|
|
12
|
+
attr_accessor :instrumenter
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def ask_with_observability(message, **options)
|
|
16
|
+
ensure_instrumented!
|
|
17
|
+
ask(message, **options)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def complete_with_observability(&block)
|
|
21
|
+
ensure_instrumented!
|
|
22
|
+
complete(&block)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def update_observability_context(new_context)
|
|
26
|
+
return unless observ_session && @instrumenter
|
|
27
|
+
|
|
28
|
+
observ_session.update_metadata(new_context)
|
|
29
|
+
@instrumenter.instance_variable_get(:@context).merge!(new_context)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def finalize_observability_session
|
|
33
|
+
return unless observ_session
|
|
34
|
+
|
|
35
|
+
observ_session.finalize
|
|
36
|
+
Rails.logger.info "[Observability] Session finalized: #{observ_session.session_id}"
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
# Hook method for building observability session metadata
|
|
42
|
+
# Override this in your model or concerns to add custom metadata
|
|
43
|
+
# @return [Hash] metadata hash
|
|
44
|
+
def observability_metadata
|
|
45
|
+
{
|
|
46
|
+
agent_type: agent_class_name || "standard",
|
|
47
|
+
chat_id: id
|
|
48
|
+
}
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Hook method for building observability context
|
|
52
|
+
# Override this in your model or concerns to add custom context
|
|
53
|
+
# @return [Hash] context hash
|
|
54
|
+
def observability_context
|
|
55
|
+
context = {
|
|
56
|
+
agent_type: agent_class_name || "standard",
|
|
57
|
+
chat_id: id
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
# Include agent_class if available for prompt metadata extraction
|
|
61
|
+
if respond_to?(:agent_class)
|
|
62
|
+
context[:agent_class] = agent_class
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Include prompt version override if specified
|
|
66
|
+
if respond_to?(:prompt_version) && prompt_version.present?
|
|
67
|
+
context[:prompt_version_override] = prompt_version
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
context
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def initialize_observability_session
|
|
74
|
+
return unless Rails.configuration.observability.enabled
|
|
75
|
+
|
|
76
|
+
session = Observ::Session.create!(
|
|
77
|
+
user_id: "chat_#{id}",
|
|
78
|
+
metadata: observability_metadata
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
update_column(:observability_session_id, session.session_id)
|
|
82
|
+
|
|
83
|
+
instrument_rubyllm_chat if Rails.configuration.observability.auto_instrument_chats
|
|
84
|
+
rescue StandardError => e
|
|
85
|
+
Rails.logger.error "[Observability] Failed to initialize session: #{e.message}"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def instrument_rubyllm_chat
|
|
89
|
+
return unless observ_session
|
|
90
|
+
return if @instrumenter
|
|
91
|
+
|
|
92
|
+
@instrumenter = Observ::ChatInstrumenter.new(
|
|
93
|
+
observ_session,
|
|
94
|
+
self,
|
|
95
|
+
context: observability_context
|
|
96
|
+
)
|
|
97
|
+
@instrumenter.instrument!
|
|
98
|
+
|
|
99
|
+
rescue StandardError => e
|
|
100
|
+
Rails.logger.error "[Observability] Failed to instrument chat: #{e.message}"
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def ensure_instrumented!
|
|
104
|
+
return if @instrumenter
|
|
105
|
+
|
|
106
|
+
reload_observ_session if observability_session_id && !observ_session
|
|
107
|
+
instrument_rubyllm_chat if observ_session
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def reload_observ_session
|
|
111
|
+
self.observ_session = Observ::Session.find_by(session_id: observability_session_id)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def ensure_instrumented_if_needed
|
|
115
|
+
return unless Rails.configuration.observability.enabled
|
|
116
|
+
return if @instrumenter
|
|
117
|
+
return unless observability_session_id
|
|
118
|
+
|
|
119
|
+
ensure_instrumented!
|
|
120
|
+
rescue StandardError => e
|
|
121
|
+
Rails.logger.error "[Observability] Failed to auto-instrument on find: #{e.message}"
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
# Concern for agents that want to use the prompt management system
|
|
5
|
+
# Provides functionality to fetch prompts from the database with fallback support,
|
|
6
|
+
# caching, variable interpolation, and model configuration from prompt metadata.
|
|
7
|
+
#
|
|
8
|
+
# Usage:
|
|
9
|
+
# class MyAgent < BaseAgent
|
|
10
|
+
# include Observ::PromptManagement
|
|
11
|
+
#
|
|
12
|
+
# FALLBACK_PROMPT = "You are a helpful assistant."
|
|
13
|
+
#
|
|
14
|
+
# use_prompt_management(
|
|
15
|
+
# prompt_name: "my-agent-system-prompt",
|
|
16
|
+
# fallback: FALLBACK_PROMPT
|
|
17
|
+
# )
|
|
18
|
+
#
|
|
19
|
+
# def self.default_model
|
|
20
|
+
# "gpt-4.1-nano"
|
|
21
|
+
# end
|
|
22
|
+
# end
|
|
23
|
+
module PromptManagement
|
|
24
|
+
extend ActiveSupport::Concern
|
|
25
|
+
|
|
26
|
+
included do
|
|
27
|
+
class_attribute :prompt_config, default: {}
|
|
28
|
+
# Cache the fetched prompt template object for metadata access
|
|
29
|
+
class_attribute :cached_prompt_template, default: nil
|
|
30
|
+
# Instance variable for version override (set per instance, not shared across class)
|
|
31
|
+
attr_accessor :prompt_version_override
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
class_methods do
|
|
35
|
+
# Enable/disable prompt management per agent
|
|
36
|
+
def use_prompt_management(enabled: true, prompt_name: nil, fallback: nil)
|
|
37
|
+
self.prompt_config = {
|
|
38
|
+
enabled: enabled,
|
|
39
|
+
prompt_name: prompt_name || default_prompt_name,
|
|
40
|
+
fallback: fallback || default_fallback_prompt
|
|
41
|
+
}
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Default prompt name based on agent class name
|
|
45
|
+
def default_prompt_name
|
|
46
|
+
name.underscore.tr("_/", "--") + "-system-prompt"
|
|
47
|
+
# Example: ResearchAgent => 'research-agent-system-prompt'
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Override in subclasses if needed
|
|
51
|
+
def default_fallback_prompt
|
|
52
|
+
"You are a helpful AI assistant."
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Check if prompt management is enabled for this agent
|
|
56
|
+
def prompt_management_enabled?
|
|
57
|
+
Observ.config.prompt_management_enabled &&
|
|
58
|
+
prompt_config[:enabled] != false
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Fetch prompt with fallback
|
|
62
|
+
# Supports version override via thread-local storage or parameter
|
|
63
|
+
def fetch_prompt(variables: {}, version: nil)
|
|
64
|
+
# Ensure defaults if prompt_config was never initialized
|
|
65
|
+
config = prompt_config.presence || {}
|
|
66
|
+
fallback = config[:fallback] || default_fallback_prompt
|
|
67
|
+
prompt_name = config[:prompt_name] || default_prompt_name
|
|
68
|
+
|
|
69
|
+
return fallback unless prompt_management_enabled?
|
|
70
|
+
|
|
71
|
+
start_time = Time.current
|
|
72
|
+
|
|
73
|
+
begin
|
|
74
|
+
# Check for version override from thread-local storage or parameter
|
|
75
|
+
version_to_use = Thread.current[:observ_prompt_version_override] || version
|
|
76
|
+
|
|
77
|
+
# Fetch prompt with version or state
|
|
78
|
+
if version_to_use.present?
|
|
79
|
+
prompt_template = Observ::PromptManager.fetch(
|
|
80
|
+
name: prompt_name,
|
|
81
|
+
version: version_to_use,
|
|
82
|
+
fallback: fallback
|
|
83
|
+
)
|
|
84
|
+
else
|
|
85
|
+
prompt_template = Observ::PromptManager.fetch(
|
|
86
|
+
name: prompt_name,
|
|
87
|
+
state: :production,
|
|
88
|
+
fallback: fallback
|
|
89
|
+
)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Cache the template for metadata access
|
|
93
|
+
@_prompt_template = prompt_template
|
|
94
|
+
|
|
95
|
+
# Log fetch result
|
|
96
|
+
duration_ms = ((Time.current - start_time) * 1000).round(2)
|
|
97
|
+
if prompt_template.version
|
|
98
|
+
version_info = version_to_use.present? ? "(version: #{prompt_template.version})" : "(production, version: #{prompt_template.version})"
|
|
99
|
+
Rails.logger.info(
|
|
100
|
+
"Prompt fetched for #{name}: #{prompt_name} " \
|
|
101
|
+
"#{version_info}, " \
|
|
102
|
+
"duration: #{duration_ms}ms)"
|
|
103
|
+
)
|
|
104
|
+
else
|
|
105
|
+
Rails.logger.info(
|
|
106
|
+
"Using fallback prompt for #{name}: prompt '#{prompt_name}' not found " \
|
|
107
|
+
"(duration: #{duration_ms}ms)"
|
|
108
|
+
)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Compile with variables (works for both Prompt and NullPrompt)
|
|
112
|
+
if variables.any?
|
|
113
|
+
prompt_template.compile(variables)
|
|
114
|
+
else
|
|
115
|
+
prompt_template.prompt
|
|
116
|
+
end
|
|
117
|
+
rescue => e
|
|
118
|
+
Rails.logger.error(
|
|
119
|
+
"Failed to fetch prompt for #{name}: #{e.message}\n" \
|
|
120
|
+
"#{e.backtrace.first(5).join("\n")}"
|
|
121
|
+
)
|
|
122
|
+
# Clear cached template on error
|
|
123
|
+
@_prompt_template = nil
|
|
124
|
+
fallback
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Override this in subclasses to provide dynamic variables
|
|
129
|
+
def prompt_variables
|
|
130
|
+
{
|
|
131
|
+
current_date: Time.current.strftime("%B %d, %Y"),
|
|
132
|
+
current_time: Time.current.strftime("%I:%M %p %Z")
|
|
133
|
+
}
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Override system_prompt to use prompt management
|
|
137
|
+
def system_prompt
|
|
138
|
+
@_system_prompt ||= fetch_prompt(variables: prompt_variables)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Get the cached prompt template (with name and version)
|
|
142
|
+
def current_prompt_template
|
|
143
|
+
@_prompt_template
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Get prompt metadata for observability
|
|
147
|
+
# @return [Hash] Hash with :prompt_name and :prompt_version keys
|
|
148
|
+
def prompt_metadata
|
|
149
|
+
template = current_prompt_template
|
|
150
|
+
return {} unless template
|
|
151
|
+
|
|
152
|
+
{
|
|
153
|
+
prompt_name: template.respond_to?(:name) ? template.name : nil,
|
|
154
|
+
prompt_version: template.respond_to?(:version) ? template.version : nil
|
|
155
|
+
}.compact
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Clear cached prompt (useful for tests or when prompt is updated)
|
|
159
|
+
def reset_prompt_cache!
|
|
160
|
+
@_system_prompt = nil
|
|
161
|
+
@_prompt_template = nil
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Override model to check prompt metadata first
|
|
165
|
+
# Priority:
|
|
166
|
+
# 1. Prompt metadata (config['model']) - if prompt management is enabled
|
|
167
|
+
# 2. Agent's default_model - fallback
|
|
168
|
+
def model
|
|
169
|
+
# Check if prompt has model in metadata
|
|
170
|
+
if prompt_management_enabled?
|
|
171
|
+
model_from_prompt = fetch_model_from_prompt
|
|
172
|
+
return model_from_prompt if model_from_prompt.present?
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# Fallback to agent's default
|
|
176
|
+
default_model
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Fetch model from prompt metadata
|
|
180
|
+
# @return [String, nil] The model from prompt config, or nil
|
|
181
|
+
def fetch_model_from_prompt(version: nil)
|
|
182
|
+
return nil unless prompt_management_enabled?
|
|
183
|
+
|
|
184
|
+
config = prompt_config.presence || {}
|
|
185
|
+
fallback = config[:fallback] || default_fallback_prompt
|
|
186
|
+
prompt_name = config[:prompt_name] || default_prompt_name
|
|
187
|
+
|
|
188
|
+
begin
|
|
189
|
+
# Check for version override from thread-local storage or parameter
|
|
190
|
+
version_to_use = Thread.current[:observ_prompt_version_override] || version
|
|
191
|
+
|
|
192
|
+
# Fetch prompt with version or state
|
|
193
|
+
if version_to_use.present?
|
|
194
|
+
prompt_template = Observ::PromptManager.fetch(
|
|
195
|
+
name: prompt_name,
|
|
196
|
+
version: version_to_use,
|
|
197
|
+
fallback: fallback
|
|
198
|
+
)
|
|
199
|
+
else
|
|
200
|
+
prompt_template = Observ::PromptManager.fetch(
|
|
201
|
+
name: prompt_name,
|
|
202
|
+
state: :production,
|
|
203
|
+
fallback: fallback
|
|
204
|
+
)
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# If we got a real Prompt object (not NullPrompt), check its config
|
|
208
|
+
if prompt_template.respond_to?(:config) && prompt_template.config.is_a?(Hash)
|
|
209
|
+
prompt_template.config["model"]
|
|
210
|
+
end
|
|
211
|
+
rescue => e
|
|
212
|
+
Rails.logger.debug(
|
|
213
|
+
"Could not fetch model from prompt #{prompt_name}: #{e.message}"
|
|
214
|
+
)
|
|
215
|
+
nil
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# Override model_parameters to check prompt metadata first
|
|
220
|
+
# Priority:
|
|
221
|
+
# 1. Prompt metadata (config['temperature'], config['max_tokens'], etc.)
|
|
222
|
+
# 2. Agent's default_model_parameters - fallback
|
|
223
|
+
# @return [Hash] The model parameters to use
|
|
224
|
+
def model_parameters
|
|
225
|
+
# Check if prompt has parameters in metadata
|
|
226
|
+
if prompt_management_enabled?
|
|
227
|
+
params_from_prompt = fetch_model_parameters_from_prompt
|
|
228
|
+
return params_from_prompt if params_from_prompt.present?
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
# Fallback to agent's defaults
|
|
232
|
+
default_model_parameters
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Fetch model parameters from prompt metadata
|
|
236
|
+
# @return [Hash] The model parameters from prompt config
|
|
237
|
+
def fetch_model_parameters_from_prompt(version: nil)
|
|
238
|
+
return {} unless prompt_management_enabled?
|
|
239
|
+
|
|
240
|
+
config = prompt_config.presence || {}
|
|
241
|
+
fallback = config[:fallback] || default_fallback_prompt
|
|
242
|
+
prompt_name = config[:prompt_name] || default_prompt_name
|
|
243
|
+
|
|
244
|
+
begin
|
|
245
|
+
# Check for version override from thread-local storage or parameter
|
|
246
|
+
version_to_use = Thread.current[:observ_prompt_version_override] || version
|
|
247
|
+
|
|
248
|
+
# Fetch prompt with version or state
|
|
249
|
+
if version_to_use.present?
|
|
250
|
+
prompt_template = Observ::PromptManager.fetch(
|
|
251
|
+
name: prompt_name,
|
|
252
|
+
version: version_to_use,
|
|
253
|
+
fallback: fallback
|
|
254
|
+
)
|
|
255
|
+
else
|
|
256
|
+
prompt_template = Observ::PromptManager.fetch(
|
|
257
|
+
name: prompt_name,
|
|
258
|
+
state: :production,
|
|
259
|
+
fallback: fallback
|
|
260
|
+
)
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# If we got a real Prompt object (not NullPrompt), extract parameters
|
|
264
|
+
if prompt_template.respond_to?(:config) && prompt_template.config.is_a?(Hash)
|
|
265
|
+
extract_llm_parameters(prompt_template.config)
|
|
266
|
+
else
|
|
267
|
+
{}
|
|
268
|
+
end
|
|
269
|
+
rescue => e
|
|
270
|
+
Rails.logger.debug(
|
|
271
|
+
"Could not fetch parameters from prompt #{prompt_name}: #{e.message}"
|
|
272
|
+
)
|
|
273
|
+
{}
|
|
274
|
+
end
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
private
|
|
278
|
+
|
|
279
|
+
# Extract LLM parameters from config hash
|
|
280
|
+
# @param config [Hash] The prompt config
|
|
281
|
+
# @return [Hash] Extracted parameters (temperature, max_tokens, etc.)
|
|
282
|
+
def extract_llm_parameters(config)
|
|
283
|
+
params = config.slice(
|
|
284
|
+
"temperature",
|
|
285
|
+
"max_tokens",
|
|
286
|
+
"top_p",
|
|
287
|
+
"frequency_penalty",
|
|
288
|
+
"presence_penalty",
|
|
289
|
+
"stop",
|
|
290
|
+
"response_format",
|
|
291
|
+
"seed"
|
|
292
|
+
).transform_keys(&:to_sym).compact
|
|
293
|
+
|
|
294
|
+
# Convert string numbers to proper types (JSON returns strings)
|
|
295
|
+
params.transform_values do |value|
|
|
296
|
+
convert_to_numeric_if_needed(value)
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# Convert string numbers to proper numeric types
|
|
301
|
+
# @param value [Object] The value to convert
|
|
302
|
+
# @return [Object] Converted value (or original if not a numeric string)
|
|
303
|
+
def convert_to_numeric_if_needed(value)
|
|
304
|
+
case value
|
|
305
|
+
when String
|
|
306
|
+
# Check if it's a numeric string (integer or float)
|
|
307
|
+
if value.match?(/\A-?\d+\.\d+\z/)
|
|
308
|
+
value.to_f
|
|
309
|
+
elsif value.match?(/\A-?\d+\z/)
|
|
310
|
+
value.to_i
|
|
311
|
+
else
|
|
312
|
+
value
|
|
313
|
+
end
|
|
314
|
+
else
|
|
315
|
+
value
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
class Annotation < ApplicationRecord
|
|
5
|
+
belongs_to :annotatable, polymorphic: true
|
|
6
|
+
|
|
7
|
+
validates :content, presence: true
|
|
8
|
+
|
|
9
|
+
# Serialize tags as JSON for SQLite compatibility
|
|
10
|
+
serialize :tags, coder: JSON
|
|
11
|
+
|
|
12
|
+
scope :recent, -> { order(created_at: :desc) }
|
|
13
|
+
|
|
14
|
+
# Ensure tags is always an array
|
|
15
|
+
after_initialize :ensure_tags_array
|
|
16
|
+
|
|
17
|
+
private
|
|
18
|
+
|
|
19
|
+
def ensure_tags_array
|
|
20
|
+
self.tags ||= []
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
class Dataset < ApplicationRecord
|
|
5
|
+
self.table_name = "observ_datasets"
|
|
6
|
+
|
|
7
|
+
has_many :items, class_name: "Observ::DatasetItem",
|
|
8
|
+
foreign_key: :dataset_id, dependent: :destroy, inverse_of: :dataset
|
|
9
|
+
has_many :runs, class_name: "Observ::DatasetRun",
|
|
10
|
+
foreign_key: :dataset_id, dependent: :destroy, inverse_of: :dataset
|
|
11
|
+
|
|
12
|
+
validates :name, presence: true, uniqueness: true
|
|
13
|
+
validates :agent_class, presence: true
|
|
14
|
+
validate :agent_class_exists, if: -> { agent_class.present? }
|
|
15
|
+
|
|
16
|
+
# Returns the agent class constant
|
|
17
|
+
def agent
|
|
18
|
+
agent_class.constantize
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Returns only active items for running evaluations
|
|
22
|
+
def active_items
|
|
23
|
+
items.active
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Count helpers for UI
|
|
27
|
+
def items_count
|
|
28
|
+
items.count
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def active_items_count
|
|
32
|
+
items.active.count
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def runs_count
|
|
36
|
+
runs.count
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def last_run
|
|
40
|
+
runs.order(created_at: :desc).first
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
def agent_class_exists
|
|
46
|
+
agent_class.constantize
|
|
47
|
+
rescue NameError
|
|
48
|
+
errors.add(:agent_class, "must be a valid agent class")
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
class DatasetItem < ApplicationRecord
|
|
5
|
+
self.table_name = "observ_dataset_items"
|
|
6
|
+
|
|
7
|
+
belongs_to :dataset, class_name: "Observ::Dataset", inverse_of: :items
|
|
8
|
+
belongs_to :source_trace, class_name: "Observ::Trace", optional: true
|
|
9
|
+
has_many :run_items, class_name: "Observ::DatasetRunItem",
|
|
10
|
+
foreign_key: :dataset_item_id, dependent: :destroy, inverse_of: :dataset_item
|
|
11
|
+
|
|
12
|
+
enum :status, { active: 0, archived: 1 }
|
|
13
|
+
|
|
14
|
+
validates :input, presence: true
|
|
15
|
+
|
|
16
|
+
scope :active, -> { where(status: :active) }
|
|
17
|
+
scope :archived, -> { where(status: :archived) }
|
|
18
|
+
|
|
19
|
+
# Preview helpers for UI display
|
|
20
|
+
def input_preview(max_length: 100)
|
|
21
|
+
return nil if input.blank?
|
|
22
|
+
text = input.is_a?(Hash) ? input.to_json : input.to_s
|
|
23
|
+
text.length > max_length ? "#{text[0...max_length]}..." : text
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def expected_output_preview(max_length: 100)
|
|
27
|
+
return nil if expected_output.blank?
|
|
28
|
+
text = expected_output.is_a?(Hash) ? expected_output.to_json : expected_output.to_s
|
|
29
|
+
text.length > max_length ? "#{text[0...max_length]}..." : text
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Check if this item has been run
|
|
33
|
+
def run_count
|
|
34
|
+
run_items.count
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def last_run_item
|
|
38
|
+
run_items.order(created_at: :desc).first
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
class DatasetRun < ApplicationRecord
|
|
5
|
+
self.table_name = "observ_dataset_runs"
|
|
6
|
+
|
|
7
|
+
belongs_to :dataset, class_name: "Observ::Dataset", inverse_of: :runs
|
|
8
|
+
has_many :run_items, class_name: "Observ::DatasetRunItem",
|
|
9
|
+
foreign_key: :dataset_run_id, dependent: :destroy, inverse_of: :dataset_run
|
|
10
|
+
has_many :items, through: :run_items, source: :dataset_item
|
|
11
|
+
has_many :scores, through: :run_items
|
|
12
|
+
|
|
13
|
+
enum :status, { pending: 0, running: 1, completed: 2, failed: 3 }
|
|
14
|
+
|
|
15
|
+
validates :name, presence: true, uniqueness: { scope: :dataset_id }
|
|
16
|
+
|
|
17
|
+
# Progress tracking
|
|
18
|
+
def progress_percentage
|
|
19
|
+
return 0 if total_items.zero?
|
|
20
|
+
((completed_items + failed_items).to_f / total_items * 100).round(1)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def finished?
|
|
24
|
+
completed? || failed?
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def in_progress?
|
|
28
|
+
pending? || running?
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Update aggregate metrics from run items
|
|
32
|
+
def update_metrics!
|
|
33
|
+
completed = run_items.where.not(trace_id: nil).where(error: nil).count
|
|
34
|
+
failed = run_items.where.not(error: nil).count
|
|
35
|
+
|
|
36
|
+
# Calculate cost and tokens from associated traces
|
|
37
|
+
trace_ids = run_items.where.not(trace_id: nil).pluck(:trace_id)
|
|
38
|
+
traces = Observ::Trace.where(id: trace_ids)
|
|
39
|
+
|
|
40
|
+
update!(
|
|
41
|
+
completed_items: completed,
|
|
42
|
+
failed_items: failed,
|
|
43
|
+
total_cost: traces.sum(:total_cost) || 0,
|
|
44
|
+
total_tokens: traces.sum(:total_tokens) || 0
|
|
45
|
+
)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Initialize run items for all active dataset items
|
|
49
|
+
def initialize_run_items!
|
|
50
|
+
dataset.active_items.find_each do |item|
|
|
51
|
+
run_items.find_or_create_by!(dataset_item: item)
|
|
52
|
+
end
|
|
53
|
+
update!(total_items: run_items.count)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Summary helpers for UI
|
|
57
|
+
def success_rate
|
|
58
|
+
return 0 if total_items.zero?
|
|
59
|
+
(completed_items.to_f / total_items * 100).round(1)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def failure_rate
|
|
63
|
+
return 0 if total_items.zero?
|
|
64
|
+
(failed_items.to_f / total_items * 100).round(1)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def pending_items_count
|
|
68
|
+
total_items - completed_items - failed_items
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def duration_seconds
|
|
72
|
+
return nil unless finished? && run_items.any?
|
|
73
|
+
first_item = run_items.order(created_at: :asc).first
|
|
74
|
+
last_item = run_items.order(updated_at: :desc).first
|
|
75
|
+
(last_item.updated_at - first_item.created_at).round(1)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Score aggregation
|
|
79
|
+
def average_score(name)
|
|
80
|
+
relevant_scores = scores.where(name: name)
|
|
81
|
+
return nil if relevant_scores.empty?
|
|
82
|
+
relevant_scores.average(:value)&.round(4)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def score_summary
|
|
86
|
+
scores.group(:name).average(:value).transform_values { |v| v.round(4) }
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def pass_rate(score_name = nil)
|
|
90
|
+
scope = scores
|
|
91
|
+
scope = scope.where(name: score_name) if score_name
|
|
92
|
+
return nil if scope.empty?
|
|
93
|
+
(scope.where("value >= 0.5").count.to_f / scope.count * 100).round(1)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def items_with_scores_count
|
|
97
|
+
run_items.joins(:scores).distinct.count
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def items_without_scores_count
|
|
101
|
+
total_items - items_with_scores_count
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|