rubyllm-observ 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +778 -0
- data/Rakefile +49 -0
- data/app/assets/javascripts/observ/application.js +12 -0
- data/app/assets/javascripts/observ/controllers/autoscroll_controller.js +33 -0
- data/app/assets/javascripts/observ/controllers/chat_form_controller.js +93 -0
- data/app/assets/javascripts/observ/controllers/copy_controller.js +43 -0
- data/app/assets/javascripts/observ/controllers/dashboard_controller.js +58 -0
- data/app/assets/javascripts/observ/controllers/drawer_controller.js +58 -0
- data/app/assets/javascripts/observ/controllers/expandable_controller.js +33 -0
- data/app/assets/javascripts/observ/controllers/filter_controller.js +36 -0
- data/app/assets/javascripts/observ/controllers/index.js +52 -0
- data/app/assets/javascripts/observ/controllers/json_viewer_controller.js +260 -0
- data/app/assets/javascripts/observ/controllers/message_form_controller.js +58 -0
- data/app/assets/javascripts/observ/controllers/prompt_variables_controller.js +64 -0
- data/app/assets/javascripts/observ/controllers/text_select_controller.js +14 -0
- data/app/assets/stylesheets/observ/_annotations.scss +127 -0
- data/app/assets/stylesheets/observ/_card.scss +52 -0
- data/app/assets/stylesheets/observ/_chat.scss +156 -0
- data/app/assets/stylesheets/observ/_components.scss +460 -0
- data/app/assets/stylesheets/observ/_dashboard.scss +40 -0
- data/app/assets/stylesheets/observ/_datasets.scss +697 -0
- data/app/assets/stylesheets/observ/_drawer.scss +273 -0
- data/app/assets/stylesheets/observ/_json_viewer.scss +120 -0
- data/app/assets/stylesheets/observ/_layout.scss +256 -0
- data/app/assets/stylesheets/observ/_metrics.scss +99 -0
- data/app/assets/stylesheets/observ/_observations.scss +160 -0
- data/app/assets/stylesheets/observ/_pagination.scss +143 -0
- data/app/assets/stylesheets/observ/_prompts.scss +365 -0
- data/app/assets/stylesheets/observ/_table.scss +53 -0
- data/app/assets/stylesheets/observ/_variables.scss +53 -0
- data/app/assets/stylesheets/observ/application.scss +15 -0
- data/app/controllers/observ/annotations_controller.rb +144 -0
- data/app/controllers/observ/application_controller.rb +8 -0
- data/app/controllers/observ/chats_controller.rb +58 -0
- data/app/controllers/observ/dashboard_controller.rb +159 -0
- data/app/controllers/observ/dataset_items_controller.rb +85 -0
- data/app/controllers/observ/dataset_run_items_controller.rb +84 -0
- data/app/controllers/observ/dataset_runs_controller.rb +110 -0
- data/app/controllers/observ/datasets_controller.rb +74 -0
- data/app/controllers/observ/messages_controller.rb +26 -0
- data/app/controllers/observ/observations_controller.rb +59 -0
- data/app/controllers/observ/prompt_versions_controller.rb +148 -0
- data/app/controllers/observ/prompts_controller.rb +205 -0
- data/app/controllers/observ/sessions_controller.rb +45 -0
- data/app/controllers/observ/traces_controller.rb +86 -0
- data/app/forms/observ/prompt_form.rb +96 -0
- data/app/helpers/observ/application_helper.rb +9 -0
- data/app/helpers/observ/chats_helper.rb +47 -0
- data/app/helpers/observ/dashboard_helper.rb +154 -0
- data/app/helpers/observ/datasets_helper.rb +62 -0
- data/app/helpers/observ/pagination_helper.rb +38 -0
- data/app/jobs/observ/application_job.rb +4 -0
- data/app/jobs/observ/dataset_runner_job.rb +49 -0
- data/app/mailers/observ/application_mailer.rb +6 -0
- data/app/models/concerns/observ/agent_phaseable.rb +124 -0
- data/app/models/concerns/observ/agent_selectable.rb +50 -0
- data/app/models/concerns/observ/chat_enhancements.rb +109 -0
- data/app/models/concerns/observ/message_enhancements.rb +31 -0
- data/app/models/concerns/observ/observability_instrumentation.rb +124 -0
- data/app/models/concerns/observ/prompt_management.rb +320 -0
- data/app/models/concerns/observ/trace_association.rb +9 -0
- data/app/models/observ/annotation.rb +23 -0
- data/app/models/observ/application_record.rb +5 -0
- data/app/models/observ/dataset.rb +51 -0
- data/app/models/observ/dataset_item.rb +41 -0
- data/app/models/observ/dataset_run.rb +104 -0
- data/app/models/observ/dataset_run_item.rb +111 -0
- data/app/models/observ/generation.rb +56 -0
- data/app/models/observ/null_prompt.rb +59 -0
- data/app/models/observ/observation.rb +38 -0
- data/app/models/observ/prompt.rb +315 -0
- data/app/models/observ/score.rb +51 -0
- data/app/models/observ/session.rb +131 -0
- data/app/models/observ/span.rb +13 -0
- data/app/models/observ/trace.rb +135 -0
- data/app/presenters/observ/agent_select_presenter.rb +59 -0
- data/app/services/observ/agent_executor_service.rb +174 -0
- data/app/services/observ/agent_provider.rb +60 -0
- data/app/services/observ/agent_selection_service.rb +53 -0
- data/app/services/observ/chat_instrumenter.rb +523 -0
- data/app/services/observ/dataset_runner_service.rb +153 -0
- data/app/services/observ/evaluator_runner_service.rb +58 -0
- data/app/services/observ/evaluators/base_evaluator.rb +51 -0
- data/app/services/observ/evaluators/contains_evaluator.rb +53 -0
- data/app/services/observ/evaluators/exact_match_evaluator.rb +23 -0
- data/app/services/observ/evaluators/json_structure_evaluator.rb +44 -0
- data/app/services/observ/prompt_manager/cache_statistics.rb +82 -0
- data/app/services/observ/prompt_manager/caching.rb +167 -0
- data/app/services/observ/prompt_manager/comparison.rb +49 -0
- data/app/services/observ/prompt_manager/version_management.rb +96 -0
- data/app/services/observ/prompt_manager.rb +40 -0
- data/app/services/observ/trace_text_formatter.rb +349 -0
- data/app/validators/observ/prompt_config_validator.rb +187 -0
- data/app/views/kaminari/_first_page.html.erb +11 -0
- data/app/views/kaminari/_gap.html.erb +8 -0
- data/app/views/kaminari/_last_page.html.erb +11 -0
- data/app/views/kaminari/_next_page.html.erb +11 -0
- data/app/views/kaminari/_page.html.erb +12 -0
- data/app/views/kaminari/_paginator.html.erb +25 -0
- data/app/views/kaminari/_prev_page.html.erb +11 -0
- data/app/views/kaminari/observ/_first_page.html.erb +11 -0
- data/app/views/kaminari/observ/_gap.html.erb +8 -0
- data/app/views/kaminari/observ/_last_page.html.erb +11 -0
- data/app/views/kaminari/observ/_next_page.html.erb +11 -0
- data/app/views/kaminari/observ/_page.html.erb +12 -0
- data/app/views/kaminari/observ/_paginator.html.erb +25 -0
- data/app/views/kaminari/observ/_prev_page.html.erb +11 -0
- data/app/views/layouts/observ/application.html.erb +88 -0
- data/app/views/observ/annotations/_annotation.html.erb +13 -0
- data/app/views/observ/annotations/_form.html.erb +28 -0
- data/app/views/observ/annotations/index.html.erb +28 -0
- data/app/views/observ/annotations/sessions_index.html.erb +48 -0
- data/app/views/observ/annotations/traces_index.html.erb +48 -0
- data/app/views/observ/chats/_form.html.erb +45 -0
- data/app/views/observ/chats/index.html.erb +67 -0
- data/app/views/observ/chats/new.html.erb +17 -0
- data/app/views/observ/chats/show.html.erb +34 -0
- data/app/views/observ/dashboard/index.html.erb +236 -0
- data/app/views/observ/dataset_items/_form.html.erb +49 -0
- data/app/views/observ/dataset_items/edit.html.erb +18 -0
- data/app/views/observ/dataset_items/index.html.erb +95 -0
- data/app/views/observ/dataset_items/new.html.erb +18 -0
- data/app/views/observ/dataset_run_items/_score_close_drawer.html.erb +4 -0
- data/app/views/observ/dataset_run_items/_score_drawer.html.erb +75 -0
- data/app/views/observ/dataset_run_items/_score_success.html.erb +29 -0
- data/app/views/observ/dataset_run_items/_scores_cell.html.erb +19 -0
- data/app/views/observ/dataset_run_items/details_drawer.turbo_stream.erb +80 -0
- data/app/views/observ/dataset_run_items/score_drawer.turbo_stream.erb +7 -0
- data/app/views/observ/dataset_runs/index.html.erb +108 -0
- data/app/views/observ/dataset_runs/new.html.erb +57 -0
- data/app/views/observ/dataset_runs/review.html.erb +155 -0
- data/app/views/observ/dataset_runs/show.html.erb +166 -0
- data/app/views/observ/datasets/_form.html.erb +62 -0
- data/app/views/observ/datasets/_items_tab.html.erb +66 -0
- data/app/views/observ/datasets/_runs_tab.html.erb +82 -0
- data/app/views/observ/datasets/edit.html.erb +32 -0
- data/app/views/observ/datasets/index.html.erb +105 -0
- data/app/views/observ/datasets/new.html.erb +18 -0
- data/app/views/observ/datasets/show.html.erb +67 -0
- data/app/views/observ/messages/_content.html.erb +1 -0
- data/app/views/observ/messages/_form.html.erb +33 -0
- data/app/views/observ/messages/_message.html.erb +14 -0
- data/app/views/observ/messages/_tool_calls.html.erb +10 -0
- data/app/views/observ/messages/create.turbo_stream.erb +9 -0
- data/app/views/observ/observations/index.html.erb +97 -0
- data/app/views/observ/observations/show_generation.html.erb +195 -0
- data/app/views/observ/observations/show_span.html.erb +93 -0
- data/app/views/observ/prompts/_diff_content.html.erb +16 -0
- data/app/views/observ/prompts/_form.html.erb +111 -0
- data/app/views/observ/prompts/_new_form.html.erb +102 -0
- data/app/views/observ/prompts/_prompt_actions.html.erb +4 -0
- data/app/views/observ/prompts/_prompt_content_highlighted.html.erb +4 -0
- data/app/views/observ/prompts/_version_actions.html.erb +40 -0
- data/app/views/observ/prompts/compare.html.erb +155 -0
- data/app/views/observ/prompts/edit.html.erb +17 -0
- data/app/views/observ/prompts/index.html.erb +108 -0
- data/app/views/observ/prompts/new.html.erb +17 -0
- data/app/views/observ/prompts/show.html.erb +138 -0
- data/app/views/observ/prompts/versions.html.erb +87 -0
- data/app/views/observ/sessions/annotations_drawer.turbo_stream.erb +25 -0
- data/app/views/observ/sessions/drawer_test.turbo_stream.erb +49 -0
- data/app/views/observ/sessions/index.html.erb +91 -0
- data/app/views/observ/sessions/show.html.erb +251 -0
- data/app/views/observ/traces/add_to_dataset_drawer.turbo_stream.erb +48 -0
- data/app/views/observ/traces/annotations_drawer.turbo_stream.erb +25 -0
- data/app/views/observ/traces/index.html.erb +87 -0
- data/app/views/observ/traces/show.html.erb +285 -0
- data/app/views/observ/traces/text_output_drawer.turbo_stream.erb +48 -0
- data/app/views/shared/_drawer.html.erb +26 -0
- data/config/routes.rb +80 -0
- data/db/migrate/001_create_observ_sessions.rb +21 -0
- data/db/migrate/002_create_observ_traces.rb +25 -0
- data/db/migrate/003_create_observ_observations.rb +42 -0
- data/db/migrate/004_add_message_id_to_observ_traces.rb +7 -0
- data/db/migrate/005_create_observ_prompts.rb +21 -0
- data/db/migrate/006_fix_prompt_config_strings.rb +23 -0
- data/db/migrate/007_create_observ_annotations.rb +12 -0
- data/db/migrate/009_add_prompt_fields_to_observ_chats.rb +11 -0
- data/db/migrate/010_create_observ_datasets.rb +15 -0
- data/db/migrate/011_create_observ_dataset_items.rb +17 -0
- data/db/migrate/012_create_observ_dataset_runs.rb +22 -0
- data/db/migrate/013_create_observ_dataset_run_items.rb +16 -0
- data/db/migrate/014_create_observ_scores.rb +26 -0
- data/lib/generators/observ/add_phase_tracking/add_phase_tracking_generator.rb +150 -0
- data/lib/generators/observ/add_phase_tracking/templates/migration.rb.tt +6 -0
- data/lib/generators/observ/install/USAGE +27 -0
- data/lib/generators/observ/install/install_generator.rb +270 -0
- data/lib/generators/observ/install_chat/install_chat_generator.rb +313 -0
- data/lib/generators/observ/install_chat/templates/agents/base_agent.rb.tt +147 -0
- data/lib/generators/observ/install_chat/templates/agents/simple_agent.rb.tt +55 -0
- data/lib/generators/observ/install_chat/templates/concerns/observ_chat_enhancements.rb.tt +34 -0
- data/lib/generators/observ/install_chat/templates/concerns/observ_message_enhancements.rb.tt +18 -0
- data/lib/generators/observ/install_chat/templates/initializers/observability.rb.tt +20 -0
- data/lib/generators/observ/install_chat/templates/jobs/chat_response_job.rb.tt +56 -0
- data/lib/generators/observ/install_chat/templates/migrations/add_agent_class_name.rb.tt +6 -0
- data/lib/generators/observ/install_chat/templates/migrations/add_observability_session_id.rb.tt +6 -0
- data/lib/generators/observ/install_chat/templates/tools/think_tool.rb.tt +29 -0
- data/lib/generators/observ/install_chat/templates/views/messages/_content.html.erb.tt +1 -0
- data/lib/observ/asset_installer.rb +130 -0
- data/lib/observ/asset_syncer.rb +104 -0
- data/lib/observ/configuration.rb +108 -0
- data/lib/observ/engine.rb +50 -0
- data/lib/observ/index_file_generator.rb +142 -0
- data/lib/observ/instrumenter/ruby_llm.rb +6 -0
- data/lib/observ/version.rb +3 -0
- data/lib/observ.rb +29 -0
- data/lib/tasks/observ_tasks.rake +75 -0
- metadata +453 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
class EvaluatorRunnerService
|
|
5
|
+
BUILT_IN_EVALUATORS = {
|
|
6
|
+
"exact_match" => Evaluators::ExactMatchEvaluator,
|
|
7
|
+
"contains" => Evaluators::ContainsEvaluator,
|
|
8
|
+
"json_structure" => Evaluators::JsonStructureEvaluator
|
|
9
|
+
}.freeze
|
|
10
|
+
|
|
11
|
+
attr_reader :dataset_run, :evaluator_configs
|
|
12
|
+
|
|
13
|
+
def initialize(dataset_run, evaluator_configs: nil)
|
|
14
|
+
@dataset_run = dataset_run
|
|
15
|
+
@evaluator_configs = evaluator_configs || default_evaluator_configs
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def call
|
|
19
|
+
return if evaluator_configs.blank?
|
|
20
|
+
|
|
21
|
+
dataset_run.run_items.includes(:dataset_item, :trace).find_each do |run_item|
|
|
22
|
+
next unless run_item.succeeded?
|
|
23
|
+
|
|
24
|
+
evaluate_item(run_item)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
dataset_run
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def evaluate_item(run_item)
|
|
31
|
+
evaluator_configs.each do |config|
|
|
32
|
+
evaluator = build_evaluator(config)
|
|
33
|
+
next unless evaluator
|
|
34
|
+
|
|
35
|
+
evaluator.call(run_item)
|
|
36
|
+
rescue StandardError => e
|
|
37
|
+
Rails.logger.error("Evaluator #{config['type']} failed for run_item #{run_item.id}: #{e.message}")
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def default_evaluator_configs
|
|
44
|
+
# Default to exact_match if no config specified
|
|
45
|
+
[ { "type" => "exact_match" } ]
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def build_evaluator(config)
|
|
49
|
+
type = config["type"]
|
|
50
|
+
evaluator_class = BUILT_IN_EVALUATORS[type]
|
|
51
|
+
|
|
52
|
+
return nil unless evaluator_class
|
|
53
|
+
|
|
54
|
+
options = config.except("type").symbolize_keys
|
|
55
|
+
evaluator_class.new(**options)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
module Evaluators
|
|
5
|
+
class BaseEvaluator
|
|
6
|
+
attr_reader :name, :options
|
|
7
|
+
|
|
8
|
+
def initialize(name: nil, **options)
|
|
9
|
+
@name = name || default_name
|
|
10
|
+
@options = options
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# Override in subclasses
|
|
14
|
+
def evaluate(run_item)
|
|
15
|
+
raise NotImplementedError, "Subclasses must implement #evaluate"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Creates and persists a score for the run item
|
|
19
|
+
def call(run_item)
|
|
20
|
+
return nil unless run_item.trace.present?
|
|
21
|
+
|
|
22
|
+
value = evaluate(run_item)
|
|
23
|
+
return nil if value.nil?
|
|
24
|
+
|
|
25
|
+
create_or_update_score(run_item, value)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
protected
|
|
29
|
+
|
|
30
|
+
def default_name
|
|
31
|
+
self.class.name.demodulize.underscore.sub(/_evaluator$/, "")
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def data_type
|
|
35
|
+
:numeric
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def create_or_update_score(run_item, value)
|
|
39
|
+
score = run_item.scores.find_or_initialize_by(name: name, source: :programmatic)
|
|
40
|
+
score.assign_attributes(
|
|
41
|
+
trace: run_item.trace,
|
|
42
|
+
value: value,
|
|
43
|
+
data_type: data_type,
|
|
44
|
+
comment: options[:comment]
|
|
45
|
+
)
|
|
46
|
+
score.save!
|
|
47
|
+
score
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
module Evaluators
|
|
5
|
+
class ContainsEvaluator < BaseEvaluator
|
|
6
|
+
def evaluate(run_item)
|
|
7
|
+
keywords = options[:keywords] || extract_keywords_from_expected(run_item)
|
|
8
|
+
return nil if keywords.blank?
|
|
9
|
+
|
|
10
|
+
output = normalize_output(run_item.actual_output)
|
|
11
|
+
return 0.0 if output.blank?
|
|
12
|
+
|
|
13
|
+
matched = keywords.count { |kw| output.downcase.include?(kw.downcase) }
|
|
14
|
+
matched.to_f / keywords.size
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
protected
|
|
18
|
+
|
|
19
|
+
def default_name
|
|
20
|
+
"contains"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def extract_keywords_from_expected(run_item)
|
|
26
|
+
expected = run_item.expected_output
|
|
27
|
+
return [] if expected.blank?
|
|
28
|
+
|
|
29
|
+
case expected
|
|
30
|
+
when Hash
|
|
31
|
+
expected["keywords"] || expected[:keywords] || []
|
|
32
|
+
when Array
|
|
33
|
+
expected
|
|
34
|
+
when String
|
|
35
|
+
[ expected ]
|
|
36
|
+
else
|
|
37
|
+
[]
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def normalize_output(output)
|
|
42
|
+
case output
|
|
43
|
+
when Hash
|
|
44
|
+
output.to_json
|
|
45
|
+
when String
|
|
46
|
+
output
|
|
47
|
+
else
|
|
48
|
+
output.to_s
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
module Evaluators
|
|
5
|
+
class ExactMatchEvaluator < BaseEvaluator
|
|
6
|
+
def evaluate(run_item)
|
|
7
|
+
return nil if run_item.expected_output.blank?
|
|
8
|
+
|
|
9
|
+
run_item.output_matches? ? 1.0 : 0.0
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
protected
|
|
13
|
+
|
|
14
|
+
def data_type
|
|
15
|
+
:boolean
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def default_name
|
|
19
|
+
"exact_match"
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
module Evaluators
|
|
5
|
+
class JsonStructureEvaluator < BaseEvaluator
|
|
6
|
+
def evaluate(run_item)
|
|
7
|
+
required_keys = options[:required_keys] || extract_keys_from_expected(run_item)
|
|
8
|
+
return nil if required_keys.blank?
|
|
9
|
+
|
|
10
|
+
output = parse_output(run_item.actual_output)
|
|
11
|
+
return 0.0 if output.nil?
|
|
12
|
+
|
|
13
|
+
present_keys = required_keys.count { |key| output.key?(key.to_s) || output.key?(key.to_sym) }
|
|
14
|
+
present_keys.to_f / required_keys.size
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
protected
|
|
18
|
+
|
|
19
|
+
def default_name
|
|
20
|
+
"json_structure"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def extract_keys_from_expected(run_item)
|
|
26
|
+
expected = run_item.expected_output
|
|
27
|
+
return [] unless expected.is_a?(Hash)
|
|
28
|
+
|
|
29
|
+
expected.keys.map(&:to_s)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def parse_output(output)
|
|
33
|
+
case output
|
|
34
|
+
when Hash
|
|
35
|
+
output
|
|
36
|
+
when String
|
|
37
|
+
JSON.parse(output) rescue nil
|
|
38
|
+
else
|
|
39
|
+
nil
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
class PromptManager
|
|
5
|
+
# Concern for cache statistics tracking and reporting.
|
|
6
|
+
# Handles hit/miss tracking and statistics aggregation.
|
|
7
|
+
module CacheStatistics
|
|
8
|
+
# ============================================
|
|
9
|
+
# CACHE STATISTICS
|
|
10
|
+
# ============================================
|
|
11
|
+
|
|
12
|
+
# Get cache statistics for a prompt
|
|
13
|
+
# @param name [String] The prompt name
|
|
14
|
+
# @return [Hash] Statistics hash with :name, :hits, :misses, :total, :hit_rate
|
|
15
|
+
def cache_stats(name)
|
|
16
|
+
hits_key = "#{Observ.config.prompt_cache_namespace}:stats:#{name}:hits"
|
|
17
|
+
misses_key = "#{Observ.config.prompt_cache_namespace}:stats:#{name}:misses"
|
|
18
|
+
|
|
19
|
+
hits = Rails.cache.read(hits_key) || 0
|
|
20
|
+
misses = Rails.cache.read(misses_key) || 0
|
|
21
|
+
total = hits + misses
|
|
22
|
+
hit_rate = total > 0 ? (hits.to_f / total * 100).round(2) : 0
|
|
23
|
+
|
|
24
|
+
{
|
|
25
|
+
name: name,
|
|
26
|
+
hits: hits,
|
|
27
|
+
misses: misses,
|
|
28
|
+
total: total,
|
|
29
|
+
hit_rate: hit_rate
|
|
30
|
+
}
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Clear all cache statistics
|
|
34
|
+
# @return [Boolean] true if successful
|
|
35
|
+
def clear_stats
|
|
36
|
+
Observ::Prompt.distinct.pluck(:name).each do |name|
|
|
37
|
+
hits_key = "#{Observ.config.prompt_cache_namespace}:stats:#{name}:hits"
|
|
38
|
+
misses_key = "#{Observ.config.prompt_cache_namespace}:stats:#{name}:misses"
|
|
39
|
+
|
|
40
|
+
Rails.cache.delete(hits_key)
|
|
41
|
+
Rails.cache.delete(misses_key)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
Rails.logger.info("Cache statistics cleared")
|
|
45
|
+
true
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
# ============================================
|
|
51
|
+
# PRIVATE TRACKING METHODS
|
|
52
|
+
# ============================================
|
|
53
|
+
|
|
54
|
+
# Track a cache hit
|
|
55
|
+
# @param name [String] The prompt name
|
|
56
|
+
# @param state [Symbol] The prompt state
|
|
57
|
+
# @param version [Integer, nil] The prompt version
|
|
58
|
+
def track_cache_hit(name, state, version)
|
|
59
|
+
cache_stats_key = "#{Observ.config.prompt_cache_namespace}:stats:#{name}:hits"
|
|
60
|
+
|
|
61
|
+
# Use Rails cache for atomic operation
|
|
62
|
+
current_value = Rails.cache.read(cache_stats_key) || 0
|
|
63
|
+
Rails.cache.write(cache_stats_key, current_value + 1, expires_in: 1.day)
|
|
64
|
+
rescue => e
|
|
65
|
+
Rails.logger.error("Failed to track cache hit: #{e.message}")
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Track a cache miss
|
|
69
|
+
# @param name [String] The prompt name
|
|
70
|
+
# @param state [Symbol] The prompt state
|
|
71
|
+
# @param version [Integer, nil] The prompt version
|
|
72
|
+
def track_cache_miss(name, state, version)
|
|
73
|
+
cache_stats_key = "#{Observ.config.prompt_cache_namespace}:stats:#{name}:misses"
|
|
74
|
+
|
|
75
|
+
current_value = Rails.cache.read(cache_stats_key) || 0
|
|
76
|
+
Rails.cache.write(cache_stats_key, current_value + 1, expires_in: 1.day)
|
|
77
|
+
rescue => e
|
|
78
|
+
Rails.logger.error("Failed to track cache miss: #{e.message}")
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
class PromptManager
|
|
5
|
+
# Concern for prompt caching operations including cache key generation,
|
|
6
|
+
# fetching with cache, invalidation, and cache warming.
|
|
7
|
+
module Caching
|
|
8
|
+
# ============================================
|
|
9
|
+
# CACHE KEY MANAGEMENT
|
|
10
|
+
# ============================================
|
|
11
|
+
|
|
12
|
+
# Enhanced cache key strategy
|
|
13
|
+
# @param name [String] The prompt name
|
|
14
|
+
# @param state [Symbol, nil] The prompt state (:draft, :production, :archived)
|
|
15
|
+
# @param version [Integer, nil] The prompt version number
|
|
16
|
+
# @return [String] The cache key for the prompt
|
|
17
|
+
def cache_key(name:, state: nil, version: nil)
|
|
18
|
+
namespace = Observ.config.prompt_cache_namespace
|
|
19
|
+
|
|
20
|
+
if version
|
|
21
|
+
"#{namespace}:#{name}:version:#{version}"
|
|
22
|
+
elsif state
|
|
23
|
+
"#{namespace}:#{name}:state:#{state}"
|
|
24
|
+
else
|
|
25
|
+
"#{namespace}:#{name}:production"
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# ============================================
|
|
30
|
+
# PROMPT FETCHING WITH ADVANCED CACHING
|
|
31
|
+
# ============================================
|
|
32
|
+
|
|
33
|
+
# Fetch single prompt with advanced caching
|
|
34
|
+
# @param name [String] The prompt name
|
|
35
|
+
# @param state [Symbol] The prompt state (default: :production)
|
|
36
|
+
# @param version [Integer, nil] Specific version to fetch
|
|
37
|
+
# @param fallback [String, nil] Fallback text if prompt not found
|
|
38
|
+
# @return [Observ::Prompt, Observ::NullPrompt] The fetched prompt or NullPrompt with fallback
|
|
39
|
+
def fetch(name:, state: :production, version: nil, fallback: nil)
|
|
40
|
+
return fetch_from_db(name: name, state: state, version: version, fallback: fallback) unless caching_enabled?
|
|
41
|
+
|
|
42
|
+
cache_key_value = cache_key(name: name, state: state, version: version)
|
|
43
|
+
|
|
44
|
+
# Check if value exists in cache
|
|
45
|
+
cache_hit = Rails.cache.exist?(cache_key_value)
|
|
46
|
+
|
|
47
|
+
result = Rails.cache.fetch(cache_key_value, expires_in: Observ.config.prompt_cache_ttl) do
|
|
48
|
+
fetch_from_db(name: name, state: state, version: version, fallback: fallback).tap do |prompt|
|
|
49
|
+
# Only track cache miss for real prompts (not NullPrompt)
|
|
50
|
+
if Observ.config.prompt_cache_monitoring_enabled && prompt && !prompt.is_a?(NullPrompt)
|
|
51
|
+
track_cache_miss(name, state, version)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Only track hit if it was actually in cache and is a real prompt
|
|
57
|
+
if cache_hit && result && !result.is_a?(NullPrompt) && Observ.config.prompt_cache_monitoring_enabled
|
|
58
|
+
track_cache_hit(name, state, version)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
result
|
|
62
|
+
rescue => e
|
|
63
|
+
Rails.logger.error("Cache fetch failed for #{name}: #{e.message}")
|
|
64
|
+
fetch_from_db(name: name, state: state, version: version, fallback: fallback)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Fetch multiple prompts at once
|
|
68
|
+
# @param names [Array<String>] The prompt names to fetch
|
|
69
|
+
# @param state [Symbol] The prompt state (default: :production)
|
|
70
|
+
# @return [Hash] Hash of prompt names to prompt objects
|
|
71
|
+
def fetch_all(names:, state: :production)
|
|
72
|
+
Prompt.where(name: names, state: state).index_by(&:name)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# ============================================
|
|
76
|
+
# CACHE INVALIDATION
|
|
77
|
+
# ============================================
|
|
78
|
+
|
|
79
|
+
# Invalidate cache for a prompt
|
|
80
|
+
# @param name [String] The prompt name
|
|
81
|
+
# @param version [Integer, nil] Specific version to invalidate (nil = all states)
|
|
82
|
+
# @return [Boolean] true if successful
|
|
83
|
+
def invalidate_cache(name:, version: nil)
|
|
84
|
+
keys = if version
|
|
85
|
+
[ cache_key(name: name, version: version) ]
|
|
86
|
+
else
|
|
87
|
+
# Invalidate all state-based keys for this prompt
|
|
88
|
+
[ :draft, :production, :archived ].map { |state| cache_key(name: name, state: state) }
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
keys.each { |key| Rails.cache.delete(key) }
|
|
92
|
+
Rails.logger.info("Cache invalidated for #{name}#{version ? " v#{version}" : ""}")
|
|
93
|
+
|
|
94
|
+
true
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# ============================================
|
|
98
|
+
# CACHE WARMING
|
|
99
|
+
# ============================================
|
|
100
|
+
|
|
101
|
+
# Warm cache for critical prompts
|
|
102
|
+
# @param prompt_names [Array<String>, nil] Specific prompts to warm (nil = all critical)
|
|
103
|
+
# @return [Hash] Hash with :success and :failed arrays
|
|
104
|
+
def warm_cache(prompt_names = nil)
|
|
105
|
+
names = prompt_names || critical_prompt_names
|
|
106
|
+
|
|
107
|
+
results = { success: [], failed: [] }
|
|
108
|
+
|
|
109
|
+
names.each do |name|
|
|
110
|
+
begin
|
|
111
|
+
# Fetch production version to warm cache
|
|
112
|
+
fetch(name: name, state: :production)
|
|
113
|
+
results[:success] << name
|
|
114
|
+
rescue => e
|
|
115
|
+
results[:failed] << { name: name, error: e.message }
|
|
116
|
+
Rails.logger.error("Failed to warm cache for #{name}: #{e.message}")
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
Rails.logger.info("Cache warming completed: #{results[:success].count} success, #{results[:failed].count} failed")
|
|
121
|
+
results
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Get list of critical prompts (prompts used by agents)
|
|
125
|
+
# @return [Array<String>] Array of prompt names
|
|
126
|
+
def critical_prompt_names
|
|
127
|
+
return Observ.config.prompt_cache_critical_prompts if Observ.config.prompt_cache_critical_prompts.any?
|
|
128
|
+
|
|
129
|
+
# Auto-discover from production prompts
|
|
130
|
+
Observ::Prompt.where(state: :production).distinct.pluck(:name)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
private
|
|
134
|
+
|
|
135
|
+
# ============================================
|
|
136
|
+
# PRIVATE HELPER METHODS
|
|
137
|
+
# ============================================
|
|
138
|
+
|
|
139
|
+
# Check if caching is enabled
|
|
140
|
+
# @return [Boolean]
|
|
141
|
+
def caching_enabled?
|
|
142
|
+
Observ.config.prompt_cache_ttl.present? && Observ.config.prompt_cache_ttl > 0
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Fetch prompt from database
|
|
146
|
+
# @param name [String] The prompt name
|
|
147
|
+
# @param state [Symbol] The prompt state
|
|
148
|
+
# @param version [Integer, nil] Specific version to fetch
|
|
149
|
+
# @param fallback [String, nil] Fallback text if not found
|
|
150
|
+
# @return [Observ::Prompt, Observ::NullPrompt] The prompt or NullPrompt
|
|
151
|
+
# @raise [PromptNotFoundError] If prompt not found and no fallback provided
|
|
152
|
+
def fetch_from_db(name:, state:, version:, fallback:)
|
|
153
|
+
query = Observ::Prompt.where(name: name)
|
|
154
|
+
|
|
155
|
+
prompt = if version.present?
|
|
156
|
+
query.find_by(version: version)
|
|
157
|
+
else
|
|
158
|
+
query.public_send(state).first
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
return prompt if prompt
|
|
162
|
+
return NullPrompt.new(name: name, fallback_text: fallback) if fallback
|
|
163
|
+
raise PromptNotFoundError, "Prompt '#{name}' not found"
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
class PromptManager
|
|
5
|
+
# Concern for comparing different versions of prompts.
|
|
6
|
+
# Provides diff functionality to highlight changes between versions.
|
|
7
|
+
module Comparison
|
|
8
|
+
# ============================================
|
|
9
|
+
# VERSION COMPARISON
|
|
10
|
+
# ============================================
|
|
11
|
+
|
|
12
|
+
# Compare two versions of a prompt
|
|
13
|
+
# @param name [String] The prompt name
|
|
14
|
+
# @param version_a [Integer] First version number
|
|
15
|
+
# @param version_b [Integer] Second version number
|
|
16
|
+
# @return [Hash] Hash with :from, :to, and :diff keys
|
|
17
|
+
def compare_versions(name:, version_a:, version_b:)
|
|
18
|
+
prompt_a = Prompt.find_by!(name: name, version: version_a)
|
|
19
|
+
prompt_b = Prompt.find_by!(name: name, version: version_b)
|
|
20
|
+
|
|
21
|
+
{
|
|
22
|
+
from: prompt_a,
|
|
23
|
+
to: prompt_b,
|
|
24
|
+
diff: calculate_diff(prompt_a.prompt, prompt_b.prompt)
|
|
25
|
+
}
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
# ============================================
|
|
31
|
+
# PRIVATE DIFF CALCULATION
|
|
32
|
+
# ============================================
|
|
33
|
+
|
|
34
|
+
# Calculate diff between two text strings
|
|
35
|
+
# @param text_a [String] First text
|
|
36
|
+
# @param text_b [String] Second text
|
|
37
|
+
# @return [Hash] Hash with :added_lines, :removed_lines, and :changed keys
|
|
38
|
+
def calculate_diff(text_a, text_b)
|
|
39
|
+
# Simple line-by-line diff
|
|
40
|
+
# In production, consider using 'diff-lcs' gem for better diffs
|
|
41
|
+
{
|
|
42
|
+
added_lines: text_b.lines - text_a.lines,
|
|
43
|
+
removed_lines: text_a.lines - text_b.lines,
|
|
44
|
+
changed: text_a != text_b
|
|
45
|
+
}
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
class PromptManager
|
|
5
|
+
# Concern for prompt version management operations including creation,
|
|
6
|
+
# state transitions (promote, demote, restore), and version queries.
|
|
7
|
+
module VersionManagement
|
|
8
|
+
# ============================================
|
|
9
|
+
# PROMPT CREATION
|
|
10
|
+
# ============================================
|
|
11
|
+
|
|
12
|
+
# Create new version of a prompt
|
|
13
|
+
# @param name [String] The prompt name
|
|
14
|
+
# @param prompt [String] The prompt content
|
|
15
|
+
# @param config [Hash] Configuration options (default: {})
|
|
16
|
+
# @param commit_message [String, nil] Optional commit message
|
|
17
|
+
# @param created_by [String, nil] Optional creator identifier
|
|
18
|
+
# @param promote_to_production [Boolean] Whether to promote immediately (default: false)
|
|
19
|
+
# @return [Observ::Prompt] The newly created prompt
|
|
20
|
+
def create(name:, prompt:, config: {}, commit_message: nil, created_by: nil, promote_to_production: false)
|
|
21
|
+
Prompt.create_version(
|
|
22
|
+
name: name,
|
|
23
|
+
prompt: prompt,
|
|
24
|
+
config: config,
|
|
25
|
+
commit_message: commit_message,
|
|
26
|
+
created_by: created_by,
|
|
27
|
+
promote_to_production: promote_to_production
|
|
28
|
+
)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# ============================================
|
|
32
|
+
# VERSION QUERIES
|
|
33
|
+
# ============================================
|
|
34
|
+
|
|
35
|
+
# Get all versions for a prompt
|
|
36
|
+
# @param name [String] The prompt name
|
|
37
|
+
# @return [ActiveRecord::Relation] Versions ordered by version number descending
|
|
38
|
+
def versions(name:)
|
|
39
|
+
Prompt.where(name: name).order(version: :desc)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# ============================================
|
|
43
|
+
# STATE TRANSITIONS
|
|
44
|
+
# ============================================
|
|
45
|
+
|
|
46
|
+
# Rollback to specific version (restore archived to production)
|
|
47
|
+
# @param name [String] The prompt name
|
|
48
|
+
# @param to_version [Integer] The version number to rollback to
|
|
49
|
+
# @return [Observ::Prompt] The rolled back prompt
|
|
50
|
+
# @raise [StateTransitionError] If trying to rollback to a draft version
|
|
51
|
+
def rollback(name:, to_version:)
|
|
52
|
+
prompt = Prompt.find_by!(name: name, version: to_version)
|
|
53
|
+
|
|
54
|
+
if prompt.archived?
|
|
55
|
+
prompt.restore!
|
|
56
|
+
prompt
|
|
57
|
+
elsif prompt.production?
|
|
58
|
+
# Already production, nothing to do
|
|
59
|
+
prompt
|
|
60
|
+
else
|
|
61
|
+
raise StateTransitionError, "Cannot rollback to draft version"
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Promote specific version to production
|
|
66
|
+
# @param name [String] The prompt name
|
|
67
|
+
# @param version [Integer] The version number to promote
|
|
68
|
+
# @return [Observ::Prompt] The promoted prompt
|
|
69
|
+
def promote(name:, version:)
|
|
70
|
+
prompt = Prompt.find_by!(name: name, version: version)
|
|
71
|
+
prompt.promote! if prompt.draft?
|
|
72
|
+
prompt
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Demote production to archived
|
|
76
|
+
# @param name [String] The prompt name
|
|
77
|
+
# @param version [Integer] The version number to demote
|
|
78
|
+
# @return [Observ::Prompt] The demoted prompt
|
|
79
|
+
def demote(name:, version:)
|
|
80
|
+
prompt = Prompt.find_by!(name: name, version: version)
|
|
81
|
+
prompt.demote! if prompt.production?
|
|
82
|
+
prompt
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Restore archived to production
|
|
86
|
+
# @param name [String] The prompt name
|
|
87
|
+
# @param version [Integer] The version number to restore
|
|
88
|
+
# @return [Observ::Prompt] The restored prompt
|
|
89
|
+
def restore(name:, version:)
|
|
90
|
+
prompt = Prompt.find_by!(name: name, version: version)
|
|
91
|
+
prompt.restore! if prompt.archived?
|
|
92
|
+
prompt
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Observ
|
|
4
|
+
# PromptManager provides a high-level interface for managing prompts
|
|
5
|
+
# with advanced caching, versioning, and comparison capabilities.
|
|
6
|
+
#
|
|
7
|
+
# This service is organized using concerns:
|
|
8
|
+
# - Caching: Cache operations, invalidation, and warming
|
|
9
|
+
# - CacheStatistics: Hit/miss tracking and reporting
|
|
10
|
+
# - VersionManagement: CRUD and state transition operations
|
|
11
|
+
# - Comparison: Version comparison and diffing
|
|
12
|
+
#
|
|
13
|
+
# @example Fetching a prompt with caching
|
|
14
|
+
# prompt = PromptManager.fetch(name: 'my-prompt', state: :production)
|
|
15
|
+
#
|
|
16
|
+
# @example Creating and promoting a new version
|
|
17
|
+
# prompt = PromptManager.create(
|
|
18
|
+
# name: 'my-prompt',
|
|
19
|
+
# prompt: 'Hello {{name}}',
|
|
20
|
+
# config: { model: 'gpt-4o' },
|
|
21
|
+
# promote_to_production: true
|
|
22
|
+
# )
|
|
23
|
+
#
|
|
24
|
+
# @example Cache management
|
|
25
|
+
# PromptManager.warm_cache(['prompt1', 'prompt2'])
|
|
26
|
+
# PromptManager.invalidate_cache(name: 'my-prompt')
|
|
27
|
+
# stats = PromptManager.cache_stats('my-prompt')
|
|
28
|
+
#
|
|
29
|
+
class PromptManager
|
|
30
|
+
# Extend with concerns for clean separation of responsibilities
|
|
31
|
+
extend Caching
|
|
32
|
+
extend CacheStatistics
|
|
33
|
+
extend VersionManagement
|
|
34
|
+
extend Comparison
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Custom exceptions
|
|
38
|
+
class StateTransitionError < StandardError; end
|
|
39
|
+
class PromptNotFoundError < StandardError; end
|
|
40
|
+
end
|