prompt_canary 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +86 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/CONTRIBUTING.md +45 -0
- data/LICENSE.txt +21 -0
- data/README.md +338 -0
- data/Rakefile +12 -0
- data/app/controllers/prompt_canary/application_controller.rb +6 -0
- data/app/controllers/prompt_canary/dashboard/prompts_controller.rb +69 -0
- data/app/views/layouts/prompt_canary/application.html.erb +42 -0
- data/app/views/prompt_canary/dashboard/prompts/index.html.erb +50 -0
- data/app/views/prompt_canary/dashboard/prompts/show.html.erb +114 -0
- data/config/routes.rb +12 -0
- data/examples/auto_rollback.rb +105 -0
- data/examples/demo.rb +83 -0
- data/exe/prompt_canary +6 -0
- data/lib/generators/prompt_canary/install_generator.rb +39 -0
- data/lib/generators/prompt_canary/templates/create_prompt_canary_calls.rb +62 -0
- data/lib/prompt_canary/adapter_factory.rb +16 -0
- data/lib/prompt_canary/adapters/anthropic.rb +39 -0
- data/lib/prompt_canary/adapters/base.rb +11 -0
- data/lib/prompt_canary/cli/commands/history.rb +63 -0
- data/lib/prompt_canary/cli/commands/status.rb +55 -0
- data/lib/prompt_canary/cli.rb +69 -0
- data/lib/prompt_canary/configuration.rb +31 -0
- data/lib/prompt_canary/deployment.rb +186 -0
- data/lib/prompt_canary/engine.rb +7 -0
- data/lib/prompt_canary/monitor.rb +30 -0
- data/lib/prompt_canary/monitor_job.rb +13 -0
- data/lib/prompt_canary/prompt.rb +13 -0
- data/lib/prompt_canary/prompt_executor.rb +27 -0
- data/lib/prompt_canary/promptable.rb +50 -0
- data/lib/prompt_canary/railtie.rb +28 -0
- data/lib/prompt_canary/recorder.rb +55 -0
- data/lib/prompt_canary/result.rb +18 -0
- data/lib/prompt_canary/rollback_rule.rb +22 -0
- data/lib/prompt_canary/router.rb +61 -0
- data/lib/prompt_canary/storage/active_record_adapter.rb +58 -0
- data/lib/prompt_canary/storage/memory.rb +21 -0
- data/lib/prompt_canary/storage/sqlite.rb +64 -0
- data/lib/prompt_canary/storage_factory.rb +24 -0
- data/lib/prompt_canary/version.rb +5 -0
- data/lib/prompt_canary/version_builder.rb +52 -0
- data/lib/prompt_canary/version_object.rb +63 -0
- data/lib/prompt_canary.rb +101 -0
- data/sig/prompt_canary.rbs +4 -0
- metadata +95 -0
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PromptCanary
|
|
4
|
+
class CLI
|
|
5
|
+
module Commands
|
|
6
|
+
module History
|
|
7
|
+
private
|
|
8
|
+
|
|
9
|
+
def history(args)
|
|
10
|
+
options = {}
|
|
11
|
+
OptionParser.new do |opts|
|
|
12
|
+
opts.on("--since PERIOD") { |p| options[:since] = p }
|
|
13
|
+
end.parse!(args)
|
|
14
|
+
|
|
15
|
+
prompt_name = args.first
|
|
16
|
+
if prompt_name.nil?
|
|
17
|
+
warn "Usage: prompt_canary history PROMPT_CLASS [--since Nd]"
|
|
18
|
+
exit 1
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
if options[:since] && options[:since] !~ /\A[1-9]\d*d\z/
|
|
22
|
+
warn "Invalid --since format: #{options[:since].inspect}. Expected format: Nd (e.g. 7d)"
|
|
23
|
+
exit 1
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
require "prompt_canary/storage/active_record_adapter" unless defined?(PromptCanary::PromptEvent)
|
|
27
|
+
history_scope(prompt_name, options[:since]).each { |e| puts format_event(e) }
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def history_scope(prompt_name, since_period)
|
|
31
|
+
scope = PromptCanary::PromptEvent.where(prompt: prompt_name).order(:recorded_at)
|
|
32
|
+
return scope unless since_period
|
|
33
|
+
|
|
34
|
+
scope.where("recorded_at >= ?", Time.now - (since_period.to_i * 24 * 60 * 60))
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def format_event(event)
|
|
38
|
+
parts = [event.recorded_at.strftime("%Y-%m-%d %H:%M"),
|
|
39
|
+
event.event.upcase.ljust(12),
|
|
40
|
+
event.version,
|
|
41
|
+
event_change(event),
|
|
42
|
+
"[#{event.triggered_by}]",
|
|
43
|
+
event_metric(event)].compact
|
|
44
|
+
parts.join(" ").strip
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def event_change(event)
|
|
48
|
+
if event.previous_status || event.new_status
|
|
49
|
+
"#{event.previous_status} → #{event.new_status}"
|
|
50
|
+
elsif !event.previous_percent.nil? && !event.new_percent.nil?
|
|
51
|
+
"#{event.previous_percent}% → #{event.new_percent}%"
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def event_metric(event)
|
|
56
|
+
return unless event.triggering_metric
|
|
57
|
+
|
|
58
|
+
"#{event.triggering_metric} #{event.triggering_value} > #{event.triggering_threshold}"
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PromptCanary
|
|
4
|
+
class CLI
|
|
5
|
+
module Commands
|
|
6
|
+
module Status
|
|
7
|
+
private
|
|
8
|
+
|
|
9
|
+
def status(args)
|
|
10
|
+
prompt_name = args.first
|
|
11
|
+
if prompt_name.nil?
|
|
12
|
+
warn "Usage: prompt_canary status PROMPT_CLASS"
|
|
13
|
+
exit 1
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
prompt_class = begin
|
|
17
|
+
Object.const_get(prompt_name)
|
|
18
|
+
rescue NameError
|
|
19
|
+
warn "Unknown prompt class: #{prompt_name}"
|
|
20
|
+
exit 1
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
primary_name = prompt_class.primary_version.name
|
|
24
|
+
puts "#{prompt_name}:"
|
|
25
|
+
prompt_class.versions.each do |v|
|
|
26
|
+
label = version_status(prompt_class.name, v.name, primary_name)
|
|
27
|
+
percent = v.rollout.fetch(:percent, 0)
|
|
28
|
+
traffic = percent.positive? ? "#{percent}% traffic (canary)" : "no canary traffic"
|
|
29
|
+
puts " #{v.name.ljust(6)} #{label.ljust(12)} #{traffic}"
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def version_status(prompt_name, version_name, primary_name)
|
|
34
|
+
if demoted_in_db?(prompt_name, version_name)
|
|
35
|
+
"DEMOTED"
|
|
36
|
+
elsif version_name == primary_name
|
|
37
|
+
"PRIMARY"
|
|
38
|
+
else
|
|
39
|
+
"CANDIDATE"
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def demoted_in_db?(prompt_name, version_name)
|
|
44
|
+
return false unless defined?(PromptCanary::RolloutOverride)
|
|
45
|
+
|
|
46
|
+
PromptCanary::RolloutOverride
|
|
47
|
+
.where(prompt: prompt_name, version: version_name, rollout_override: 0)
|
|
48
|
+
.exists?
|
|
49
|
+
rescue ::ActiveRecord::ConnectionNotEstablished, ::ActiveRecord::StatementInvalid
|
|
50
|
+
false
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "optparse"
|
|
4
|
+
require_relative "cli/commands/history"
|
|
5
|
+
require_relative "cli/commands/status"
|
|
6
|
+
|
|
7
|
+
module PromptCanary
|
|
8
|
+
class CLI
|
|
9
|
+
include Commands::History
|
|
10
|
+
include Commands::Status
|
|
11
|
+
|
|
12
|
+
def run(args)
|
|
13
|
+
subcommand = args.shift
|
|
14
|
+
case subcommand
|
|
15
|
+
when "demote" then demote(args)
|
|
16
|
+
when "promote" then promote(args)
|
|
17
|
+
when "history" then history(args)
|
|
18
|
+
when "status" then status(args)
|
|
19
|
+
else
|
|
20
|
+
warn "Unknown command: #{subcommand}"
|
|
21
|
+
exit 1
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def promote(args)
|
|
28
|
+
options = {}
|
|
29
|
+
OptionParser.new do |opts|
|
|
30
|
+
opts.on("--reason REASON") { |r| options[:reason] = r }
|
|
31
|
+
end.parse!(args)
|
|
32
|
+
|
|
33
|
+
prompt_name, version_name = args
|
|
34
|
+
if prompt_name.nil? || version_name.nil?
|
|
35
|
+
warn "Usage: prompt_canary promote PROMPT_CLASS VERSION [--reason REASON]"
|
|
36
|
+
exit 1
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
prompt_class = begin
|
|
40
|
+
Object.const_get(prompt_name)
|
|
41
|
+
rescue NameError
|
|
42
|
+
warn "Unknown prompt class: #{prompt_name}"
|
|
43
|
+
exit 1
|
|
44
|
+
end
|
|
45
|
+
PromptCanary.promote(prompt_class, version_name, reason: options[:reason])
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def demote(args)
|
|
49
|
+
options = {}
|
|
50
|
+
OptionParser.new do |opts|
|
|
51
|
+
opts.on("--reason REASON") { |r| options[:reason] = r }
|
|
52
|
+
end.parse!(args)
|
|
53
|
+
|
|
54
|
+
prompt_name, version_name = args
|
|
55
|
+
if prompt_name.nil? || version_name.nil?
|
|
56
|
+
warn "Usage: prompt_canary demote PROMPT_CLASS VERSION [--reason REASON]"
|
|
57
|
+
exit 1
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
prompt_class = begin
|
|
61
|
+
Object.const_get(prompt_name)
|
|
62
|
+
rescue NameError
|
|
63
|
+
warn "Unknown prompt class: #{prompt_name}"
|
|
64
|
+
exit 1
|
|
65
|
+
end
|
|
66
|
+
PromptCanary.demote(prompt_class, version_name, reason: options[:reason])
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PromptCanary
|
|
4
|
+
class Configuration
|
|
5
|
+
VALID_ADAPTERS = %i[anthropic].freeze
|
|
6
|
+
VALID_STORAGE = %i[memory sqlite active_record].freeze
|
|
7
|
+
|
|
8
|
+
attr_reader :adapter, :storage
|
|
9
|
+
|
|
10
|
+
def adapter=(value)
|
|
11
|
+
unless VALID_ADAPTERS.include?(value)
|
|
12
|
+
raise ConfigurationError, "Unknown adapter: #{value.inspect}. Valid adapters: #{VALID_ADAPTERS.join(", ")}"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
@adapter = value
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def storage=(value)
|
|
19
|
+
unless VALID_STORAGE.include?(value)
|
|
20
|
+
raise ConfigurationError, "Unknown storage: #{value.inspect}. Valid storage: #{VALID_STORAGE.join(", ")}"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
@storage = value
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def validate!
|
|
27
|
+
raise ConfigurationError, "adapter is required" if adapter.nil?
|
|
28
|
+
raise ConfigurationError, "storage is required" if storage.nil?
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PromptCanary
|
|
4
|
+
# rubocop:disable Metrics/ModuleLength
|
|
5
|
+
module Deployment
|
|
6
|
+
def set_canary(prompt_class, version_name, percent)
|
|
7
|
+
assert_valid_canary_percent!(percent)
|
|
8
|
+
assert_version_registered!(prompt_class, version_name)
|
|
9
|
+
|
|
10
|
+
if ar_storage?
|
|
11
|
+
require "prompt_canary/storage/active_record_adapter"
|
|
12
|
+
if RolloutOverride.where(prompt: prompt_class.name, version: version_name, rollout_override: 0).exists?
|
|
13
|
+
raise DemotedVersionError,
|
|
14
|
+
"#{version_name.inspect} is demoted — call restore before adjusting traffic"
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
prev_percent = effective_canary_percent(prompt_class, version_name)
|
|
18
|
+
override = RolloutOverride.find_or_initialize_by(prompt: prompt_class.name, version: version_name)
|
|
19
|
+
override.created_at ||= Time.now
|
|
20
|
+
override.update!(rollout_override: percent)
|
|
21
|
+
record_event(prompt: prompt_class.name, version: version_name, event: "canary_set",
|
|
22
|
+
previous_percent: prev_percent, new_percent: percent, triggered_by: "manual")
|
|
23
|
+
else
|
|
24
|
+
version = prompt_class.versions.find { |v| v.name == version_name }
|
|
25
|
+
version.set_rollout!(percent)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def promote(prompt_class, version_name, reason: nil)
|
|
30
|
+
assert_version_registered!(prompt_class, version_name)
|
|
31
|
+
|
|
32
|
+
if ar_storage?
|
|
33
|
+
require "prompt_canary/storage/active_record_adapter"
|
|
34
|
+
prev_primary = effective_primary_name(prompt_class)
|
|
35
|
+
prev_status = current_version_status(prompt_class, version_name)
|
|
36
|
+
override = PrimaryOverride.find_or_initialize_by(prompt: prompt_class.name)
|
|
37
|
+
override.created_at ||= Time.now
|
|
38
|
+
override.update!(version: version_name)
|
|
39
|
+
record_event(prompt: prompt_class.name, version: version_name, event: "promoted",
|
|
40
|
+
previous_status: prev_status, new_status: "primary",
|
|
41
|
+
reason: reason, triggered_by: "manual")
|
|
42
|
+
record_superseded_event(prompt_class, prev_primary, version_name)
|
|
43
|
+
else
|
|
44
|
+
prompt_class.promote_to_primary!(version_name)
|
|
45
|
+
end
|
|
46
|
+
publish("prompt_canary.promoted", prompt: prompt_class.name, version: version_name, reason: reason)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def demote(prompt_class, version_name, reason: nil, triggered_by: "manual",
|
|
50
|
+
triggering_metric: nil, triggering_value: nil, triggering_threshold: nil)
|
|
51
|
+
assert_can_demote_primary!(prompt_class, version_name)
|
|
52
|
+
|
|
53
|
+
if ar_storage?
|
|
54
|
+
require "prompt_canary/storage/active_record_adapter"
|
|
55
|
+
prev_percent = effective_canary_percent(prompt_class, version_name)
|
|
56
|
+
override = RolloutOverride.find_or_initialize_by(prompt: prompt_class.name, version: version_name)
|
|
57
|
+
override.created_at ||= Time.now
|
|
58
|
+
override.update!(rollout_override: 0)
|
|
59
|
+
record_event(
|
|
60
|
+
prompt: prompt_class.name, version: version_name, event: "demoted",
|
|
61
|
+
previous_percent: prev_percent, new_percent: 0, new_status: "demoted",
|
|
62
|
+
reason: reason, triggered_by: triggered_by,
|
|
63
|
+
triggering_metric: triggering_metric&.to_s,
|
|
64
|
+
triggering_value: triggering_value,
|
|
65
|
+
triggering_threshold: triggering_threshold
|
|
66
|
+
)
|
|
67
|
+
else
|
|
68
|
+
version = prompt_class.versions.find { |v| v.name == version_name }
|
|
69
|
+
version&.demote!
|
|
70
|
+
end
|
|
71
|
+
publish("prompt_canary.demoted", prompt: prompt_class.name, version: version_name, reason: reason)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def restore(prompt_class, version_name)
|
|
75
|
+
if ar_storage?
|
|
76
|
+
require "prompt_canary/storage/active_record_adapter"
|
|
77
|
+
RolloutOverride.where(prompt: prompt_class.name, version: version_name).delete_all
|
|
78
|
+
record_event(prompt: prompt_class.name, version: version_name, event: "restored",
|
|
79
|
+
previous_status: "demoted", new_status: "candidate", triggered_by: "manual")
|
|
80
|
+
else
|
|
81
|
+
version = prompt_class.versions.find { |v| v.name == version_name }
|
|
82
|
+
version&.restore!
|
|
83
|
+
end
|
|
84
|
+
publish("prompt_canary.restored", prompt: prompt_class.name, version: version_name)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
private
|
|
88
|
+
|
|
89
|
+
def record_superseded_event(prompt_class, prev_primary, promoted_name)
|
|
90
|
+
return unless prev_primary && prev_primary != promoted_name
|
|
91
|
+
|
|
92
|
+
record_event(prompt: prompt_class.name, version: prev_primary, event: "superseded",
|
|
93
|
+
previous_status: "primary", new_status: "candidate", triggered_by: "manual")
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def current_version_status(prompt_class, version_name)
|
|
97
|
+
return "primary" if effective_primary_name(prompt_class) == version_name
|
|
98
|
+
return "demoted" if RolloutOverride.where(
|
|
99
|
+
prompt: prompt_class.name, version: version_name, rollout_override: 0
|
|
100
|
+
).exists?
|
|
101
|
+
|
|
102
|
+
"candidate"
|
|
103
|
+
rescue ::ActiveRecord::ConnectionNotEstablished, ::ActiveRecord::StatementInvalid
|
|
104
|
+
"candidate"
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def assert_version_registered!(prompt_class, version_name)
|
|
108
|
+
return if prompt_class.versions.any? { |v| v.name == version_name }
|
|
109
|
+
|
|
110
|
+
raise UnknownVersionError, "#{version_name.inspect} is not a registered version of #{prompt_class}"
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def assert_valid_canary_percent!(percent)
|
|
114
|
+
raise ArgumentError, "percent must be positive — use `demote` to stop traffic" if percent.zero?
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def assert_can_demote_primary!(prompt_class, version_name)
|
|
118
|
+
return unless effective_primary_name(prompt_class) == version_name
|
|
119
|
+
return unless no_viable_candidate?(prompt_class, version_name)
|
|
120
|
+
|
|
121
|
+
raise CannotDemotePrimaryError,
|
|
122
|
+
"Cannot demote #{version_name.inspect} — primary version with no viable " \
|
|
123
|
+
"candidate. Promote another version first."
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def effective_primary_name(prompt_class)
|
|
127
|
+
if ar_storage? && defined?(PromptCanary::PrimaryOverride)
|
|
128
|
+
override = PrimaryOverride.find_by(prompt: prompt_class.name)
|
|
129
|
+
return override.version if override
|
|
130
|
+
end
|
|
131
|
+
prompt_class.primary_version.name
|
|
132
|
+
rescue ::ActiveRecord::ConnectionNotEstablished, ::ActiveRecord::StatementInvalid
|
|
133
|
+
prompt_class.primary_version.name
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def no_viable_candidate?(prompt_class, version_name)
|
|
137
|
+
others = prompt_class.versions.reject { |v| v.name == version_name }
|
|
138
|
+
return true if others.empty?
|
|
139
|
+
|
|
140
|
+
if ar_storage?
|
|
141
|
+
require "prompt_canary/storage/active_record_adapter"
|
|
142
|
+
others.all? do |v|
|
|
143
|
+
RolloutOverride.where(prompt: prompt_class.name, version: v.name, rollout_override: 0).exists?
|
|
144
|
+
end
|
|
145
|
+
else
|
|
146
|
+
others.all?(&:demoted?)
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def ar_storage?
|
|
151
|
+
defined?(configuration) && configuration.storage == :active_record
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def effective_canary_percent(prompt_class, version_name)
|
|
155
|
+
existing = RolloutOverride.find_by(
|
|
156
|
+
"prompt = ? AND version = ? AND rollout_override > 0",
|
|
157
|
+
prompt_class.name, version_name
|
|
158
|
+
)
|
|
159
|
+
return existing.rollout_override if existing
|
|
160
|
+
|
|
161
|
+
version = prompt_class.versions.find { |v| v.name == version_name }
|
|
162
|
+
version&.rollout&.fetch(:percent, 0) || 0
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def record_event(payload)
|
|
166
|
+
return unless defined?(PromptCanary::PromptEvent)
|
|
167
|
+
|
|
168
|
+
PromptEvent.create!(
|
|
169
|
+
prompt: payload[:prompt],
|
|
170
|
+
version: payload[:version],
|
|
171
|
+
event: payload[:event],
|
|
172
|
+
previous_percent: payload[:previous_percent],
|
|
173
|
+
new_percent: payload[:new_percent],
|
|
174
|
+
previous_status: payload[:previous_status],
|
|
175
|
+
new_status: payload[:new_status],
|
|
176
|
+
reason: payload[:reason],
|
|
177
|
+
triggered_by: payload.fetch(:triggered_by, "manual"),
|
|
178
|
+
triggering_metric: payload[:triggering_metric],
|
|
179
|
+
triggering_value: payload[:triggering_value],
|
|
180
|
+
triggering_threshold: payload[:triggering_threshold],
|
|
181
|
+
recorded_at: Time.now
|
|
182
|
+
)
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
# rubocop:enable Metrics/ModuleLength
|
|
186
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PromptCanary
|
|
4
|
+
class Monitor
|
|
5
|
+
def initialize(recorder:)
|
|
6
|
+
@recorder = recorder
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def evaluate(prompt_class)
|
|
10
|
+
prompt_class.versions.each do |version|
|
|
11
|
+
version.rollback_rules.each do |rule|
|
|
12
|
+
value = @recorder.public_send(rule.metric,
|
|
13
|
+
prompt: prompt_class.name,
|
|
14
|
+
version: version.name,
|
|
15
|
+
over: rule.window)
|
|
16
|
+
|
|
17
|
+
next unless rule.violated_by?(value)
|
|
18
|
+
|
|
19
|
+
PromptCanary.demote(
|
|
20
|
+
prompt_class, version.name,
|
|
21
|
+
triggered_by: "monitor",
|
|
22
|
+
triggering_metric: rule.metric.to_s,
|
|
23
|
+
triggering_value: value,
|
|
24
|
+
triggering_threshold: rule.threshold
|
|
25
|
+
)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PromptCanary
|
|
4
|
+
class MonitorJob < ActiveJob::Base
|
|
5
|
+
queue_as :default
|
|
6
|
+
|
|
7
|
+
def perform
|
|
8
|
+
recorder = Recorder.new(storage: StorageFactory.build(PromptCanary.configuration.storage))
|
|
9
|
+
monitor = Monitor.new(recorder: recorder)
|
|
10
|
+
PromptCanary.registered_prompts.each { |klass| monitor.evaluate(klass) }
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PromptCanary
|
|
4
|
+
class Prompt
|
|
5
|
+
include Promptable
|
|
6
|
+
|
|
7
|
+
def self.inherited(subclass)
|
|
8
|
+
warn "[PromptCanary] Inheriting from PromptCanary::Prompt is deprecated. " \
|
|
9
|
+
"Use `include PromptCanary::Promptable` instead."
|
|
10
|
+
super
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PromptCanary
|
|
4
|
+
class PromptExecutor
|
|
5
|
+
def initialize(prompt_class:, adapter: nil, recorder: nil)
|
|
6
|
+
@prompt_class = prompt_class
|
|
7
|
+
@adapter = adapter || AdapterFactory.build(PromptCanary.configuration.adapter)
|
|
8
|
+
@recorder = recorder || Recorder.new(storage: StorageFactory.build(PromptCanary.configuration.storage))
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def call(context: {}, **args)
|
|
12
|
+
version = Router.choose(@prompt_class, context)
|
|
13
|
+
telemetry = @adapter.call(version: version, args: args)
|
|
14
|
+
@recorder.record(prompt: @prompt_class.name, version: version, telemetry: telemetry)
|
|
15
|
+
|
|
16
|
+
Result.new(
|
|
17
|
+
text: telemetry[:text],
|
|
18
|
+
version_used: version.name,
|
|
19
|
+
model: version.model,
|
|
20
|
+
latency_ms: telemetry[:latency_ms],
|
|
21
|
+
tokens: telemetry[:tokens],
|
|
22
|
+
error: telemetry[:error],
|
|
23
|
+
recorded_at: Time.now
|
|
24
|
+
)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PromptCanary
|
|
4
|
+
module Promptable
|
|
5
|
+
def self.included(base)
|
|
6
|
+
base.extend(ClassMethods)
|
|
7
|
+
PromptCanary.register_prompt(base)
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
module ClassMethods
|
|
11
|
+
def version(name, &block)
|
|
12
|
+
if versions.any? { |v| v.name == name }
|
|
13
|
+
raise DuplicateVersionError, "Version #{name.inspect} is already registered on #{self}"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
builder = VersionBuilder.new(name)
|
|
17
|
+
builder.instance_eval(&block)
|
|
18
|
+
versions << builder.build
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def versions
|
|
22
|
+
@versions ||= []
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def primary_version
|
|
26
|
+
raise NoPrimaryVersionError, "#{self} has no versions registered" if versions.empty?
|
|
27
|
+
|
|
28
|
+
if @primary_override
|
|
29
|
+
versions.find { |v| v.name == @primary_override } || versions.first
|
|
30
|
+
else
|
|
31
|
+
versions.first
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def promote_to_primary!(version_name)
|
|
36
|
+
@primary_override = version_name
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def call(context: {}, adapter: nil, recorder: nil, **args)
|
|
40
|
+
PromptExecutor.new(prompt_class: self, adapter: adapter, recorder: recorder)
|
|
41
|
+
.call(context: context, **args)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def reset_registry!
|
|
45
|
+
@versions = []
|
|
46
|
+
@primary_override = nil
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "monitor_job"
|
|
4
|
+
|
|
5
|
+
module PromptCanary
|
|
6
|
+
class Railtie < Rails::Railtie
|
|
7
|
+
initializer "prompt_canary.load_prompt_classes" do |_app|
|
|
8
|
+
ActiveSupport.on_load(:after_initialize) do
|
|
9
|
+
PromptCanary.load_prompt_classes(Rails.root.join("app", "prompts").to_s)
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
initializer "prompt_canary.check_storage_config" do
|
|
14
|
+
ActiveSupport.on_load(:after_initialize) do
|
|
15
|
+
PromptCanary.check_storage_config!(Rails.logger)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
initializer "prompt_canary.log_registered_prompts" do
|
|
20
|
+
ActiveSupport.on_load(:after_initialize) do
|
|
21
|
+
next unless PromptCanary.registered_prompts.any?
|
|
22
|
+
|
|
23
|
+
names = PromptCanary.registered_prompts.map(&:name).join(", ")
|
|
24
|
+
Rails.logger.info("[PromptCanary] Registered prompts: #{names}")
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PromptCanary
|
|
4
|
+
class Recorder
|
|
5
|
+
attr_reader :storage
|
|
6
|
+
|
|
7
|
+
def initialize(storage:)
|
|
8
|
+
@storage = storage
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def latency_p95(prompt:, version:, over:)
|
|
12
|
+
records = @storage.read_recent(prompt: prompt, version: version, limit: over)
|
|
13
|
+
return 0 if records.empty?
|
|
14
|
+
|
|
15
|
+
latencies = records.map { |r| r[:latency_ms] }.sort
|
|
16
|
+
index = (latencies.length * 0.95).ceil - 1
|
|
17
|
+
latencies[index]
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def error_rate(prompt:, version:, over:)
|
|
21
|
+
records = @storage.read_recent(prompt: prompt, version: version, limit: over)
|
|
22
|
+
return 0.0 if records.empty?
|
|
23
|
+
|
|
24
|
+
errored = records.count { |r| !r[:error].nil? }
|
|
25
|
+
(errored.to_f / records.length).round(2)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def stats(prompt:, version:, over:)
|
|
29
|
+
records = @storage.read_recent(prompt: prompt, version: version, limit: over)
|
|
30
|
+
return { call_count: 0, error_rate: 0.0, latency_p95: nil, last_called_at: nil } if records.empty?
|
|
31
|
+
|
|
32
|
+
latencies = records.map { |r| r[:latency_ms] }.compact.sort
|
|
33
|
+
error_count = records.count { |r| !r[:error].nil? }
|
|
34
|
+
p95_index = (latencies.size * 0.95).ceil - 1
|
|
35
|
+
|
|
36
|
+
{
|
|
37
|
+
call_count: records.size,
|
|
38
|
+
error_rate: (error_count.to_f / records.size).round(2),
|
|
39
|
+
latency_p95: latencies[p95_index],
|
|
40
|
+
last_called_at: records.last[:recorded_at]
|
|
41
|
+
}
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def record(prompt:, version:, telemetry:)
|
|
45
|
+
@storage.write(
|
|
46
|
+
prompt: prompt,
|
|
47
|
+
version: version.name,
|
|
48
|
+
latency_ms: telemetry[:latency_ms],
|
|
49
|
+
tokens: telemetry[:tokens],
|
|
50
|
+
error: telemetry[:error],
|
|
51
|
+
recorded_at: Time.now
|
|
52
|
+
)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PromptCanary
|
|
4
|
+
class Result
|
|
5
|
+
attr_reader :text, :version_used, :model, :latency_ms, :tokens, :error, :recorded_at
|
|
6
|
+
|
|
7
|
+
def initialize(text:, version_used:, model:, latency_ms:, tokens:, error:, recorded_at:)
|
|
8
|
+
@text = text
|
|
9
|
+
@version_used = version_used
|
|
10
|
+
@model = model
|
|
11
|
+
@latency_ms = latency_ms
|
|
12
|
+
@tokens = tokens
|
|
13
|
+
@error = error
|
|
14
|
+
@recorded_at = recorded_at
|
|
15
|
+
freeze
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PromptCanary
|
|
4
|
+
class RollbackRule
|
|
5
|
+
attr_reader :metric, :threshold, :comparator, :window
|
|
6
|
+
|
|
7
|
+
def initialize(metric:, threshold:, comparator:, window:)
|
|
8
|
+
@metric = metric
|
|
9
|
+
@threshold = threshold
|
|
10
|
+
@comparator = comparator
|
|
11
|
+
@window = window
|
|
12
|
+
freeze
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def violated_by?(value)
|
|
16
|
+
case comparator
|
|
17
|
+
when :greater_than then value > threshold
|
|
18
|
+
when :less_than then value < threshold
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|