prompt_canary 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +86 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/CONTRIBUTING.md +45 -0
- data/LICENSE.txt +21 -0
- data/README.md +338 -0
- data/Rakefile +12 -0
- data/app/controllers/prompt_canary/application_controller.rb +6 -0
- data/app/controllers/prompt_canary/dashboard/prompts_controller.rb +69 -0
- data/app/views/layouts/prompt_canary/application.html.erb +42 -0
- data/app/views/prompt_canary/dashboard/prompts/index.html.erb +50 -0
- data/app/views/prompt_canary/dashboard/prompts/show.html.erb +114 -0
- data/config/routes.rb +12 -0
- data/examples/auto_rollback.rb +105 -0
- data/examples/demo.rb +83 -0
- data/exe/prompt_canary +6 -0
- data/lib/generators/prompt_canary/install_generator.rb +39 -0
- data/lib/generators/prompt_canary/templates/create_prompt_canary_calls.rb +62 -0
- data/lib/prompt_canary/adapter_factory.rb +16 -0
- data/lib/prompt_canary/adapters/anthropic.rb +39 -0
- data/lib/prompt_canary/adapters/base.rb +11 -0
- data/lib/prompt_canary/cli/commands/history.rb +63 -0
- data/lib/prompt_canary/cli/commands/status.rb +55 -0
- data/lib/prompt_canary/cli.rb +69 -0
- data/lib/prompt_canary/configuration.rb +31 -0
- data/lib/prompt_canary/deployment.rb +186 -0
- data/lib/prompt_canary/engine.rb +7 -0
- data/lib/prompt_canary/monitor.rb +30 -0
- data/lib/prompt_canary/monitor_job.rb +13 -0
- data/lib/prompt_canary/prompt.rb +13 -0
- data/lib/prompt_canary/prompt_executor.rb +27 -0
- data/lib/prompt_canary/promptable.rb +50 -0
- data/lib/prompt_canary/railtie.rb +28 -0
- data/lib/prompt_canary/recorder.rb +55 -0
- data/lib/prompt_canary/result.rb +18 -0
- data/lib/prompt_canary/rollback_rule.rb +22 -0
- data/lib/prompt_canary/router.rb +61 -0
- data/lib/prompt_canary/storage/active_record_adapter.rb +58 -0
- data/lib/prompt_canary/storage/memory.rb +21 -0
- data/lib/prompt_canary/storage/sqlite.rb +64 -0
- data/lib/prompt_canary/storage_factory.rb +24 -0
- data/lib/prompt_canary/version.rb +5 -0
- data/lib/prompt_canary/version_builder.rb +52 -0
- data/lib/prompt_canary/version_object.rb +63 -0
- data/lib/prompt_canary.rb +101 -0
- data/sig/prompt_canary.rbs +4 -0
- metadata +95 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PromptCanary
|
|
4
|
+
module Dashboard
|
|
5
|
+
class PromptsController < PromptCanary::ApplicationController
|
|
6
|
+
attr_reader :prompts, :prompt
|
|
7
|
+
|
|
8
|
+
def index
|
|
9
|
+
@prompts = PromptCanary.registered_prompts.map { |klass| build_prompt_data(klass) }
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def show
|
|
13
|
+
klass = PromptCanary.registered_prompts.find { |k| k.name == params[:name] }
|
|
14
|
+
head(:not_found) && return unless klass
|
|
15
|
+
|
|
16
|
+
@prompt = build_prompt_data(klass).merge(events: fetch_events(klass.name))
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def promote
|
|
20
|
+
klass = PromptCanary.registered_prompts.find { |k| k.name == params[:name] }
|
|
21
|
+
head(:not_found) && return unless klass
|
|
22
|
+
|
|
23
|
+
PromptCanary.promote(klass, params[:version])
|
|
24
|
+
redirect_to prompt_path(params[:name])
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
def build_prompt_data(klass)
|
|
30
|
+
{
|
|
31
|
+
name: klass.name,
|
|
32
|
+
versions: klass.versions.map do |v|
|
|
33
|
+
is_demoted = demoted?(klass.name, v.name)
|
|
34
|
+
is_primary = klass.primary_version.name == v.name
|
|
35
|
+
{
|
|
36
|
+
name: v.name,
|
|
37
|
+
primary: is_primary,
|
|
38
|
+
demoted: is_demoted,
|
|
39
|
+
active: is_primary || (!is_demoted && v.rollout.fetch(:percent, 0).positive?),
|
|
40
|
+
stats: PromptCanary.stats(klass, v.name)
|
|
41
|
+
}
|
|
42
|
+
end
|
|
43
|
+
}
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def fetch_events(prompt_name)
|
|
47
|
+
return [] unless defined?(PromptCanary::PromptEvent)
|
|
48
|
+
|
|
49
|
+
PromptCanary::PromptEvent
|
|
50
|
+
.where(prompt: prompt_name)
|
|
51
|
+
.order(recorded_at: :asc)
|
|
52
|
+
.limit(10)
|
|
53
|
+
.to_a
|
|
54
|
+
rescue ::ActiveRecord::ConnectionNotEstablished, ::ActiveRecord::StatementInvalid
|
|
55
|
+
[]
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def demoted?(prompt_name, version_name)
|
|
59
|
+
return false unless defined?(PromptCanary::RolloutOverride)
|
|
60
|
+
|
|
61
|
+
PromptCanary::RolloutOverride
|
|
62
|
+
.where(prompt: prompt_name, version: version_name, rollout_override: 0)
|
|
63
|
+
.exists?
|
|
64
|
+
rescue ::ActiveRecord::ConnectionNotEstablished, ::ActiveRecord::StatementInvalid
|
|
65
|
+
false
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html>
|
|
3
|
+
<head>
|
|
4
|
+
<title>PromptCanary Dashboard</title>
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
6
|
+
<style>
|
|
7
|
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
|
8
|
+
body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", monospace; font-size: 14px; background: #f5f5f5; color: #222; }
|
|
9
|
+
header { background: #1a1a2e; color: #fff; padding: 16px 24px; display: flex; align-items: center; gap: 12px; }
|
|
10
|
+
header h1 { font-size: 18px; font-weight: 600; }
|
|
11
|
+
header a { color: #aaa; text-decoration: none; font-size: 13px; }
|
|
12
|
+
header a:hover { color: #fff; }
|
|
13
|
+
main { padding: 24px; max-width: 1100px; margin: 0 auto; }
|
|
14
|
+
h2 { font-size: 16px; font-weight: 600; margin-bottom: 16px; color: #333; }
|
|
15
|
+
table { width: 100%; border-collapse: collapse; background: #fff; border-radius: 6px; overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.08); }
|
|
16
|
+
th { background: #f0f0f0; text-align: left; padding: 10px 14px; font-weight: 600; font-size: 12px; text-transform: uppercase; color: #555; border-bottom: 1px solid #ddd; }
|
|
17
|
+
td { padding: 10px 14px; border-bottom: 1px solid #eee; vertical-align: top; }
|
|
18
|
+
tr:last-child td { border-bottom: none; }
|
|
19
|
+
tr:hover td { background: #fafafa; }
|
|
20
|
+
.badge { display: inline-block; padding: 2px 8px; border-radius: 10px; font-size: 11px; font-weight: 600; }
|
|
21
|
+
.badge-primary { background: #d4edda; color: #155724; }
|
|
22
|
+
.badge-candidate { background: #fff3cd; color: #856404; }
|
|
23
|
+
.badge-error { background: #f8d7da; color: #721c24; }
|
|
24
|
+
.badge-active { background: #cce5ff; color: #004085; }
|
|
25
|
+
tr.inactive td { opacity: 0.45; }
|
|
26
|
+
a { color: #0066cc; text-decoration: none; }
|
|
27
|
+
a:hover { text-decoration: underline; }
|
|
28
|
+
.stat { font-variant-numeric: tabular-nums; }
|
|
29
|
+
.muted { color: #888; }
|
|
30
|
+
.back { margin-bottom: 16px; display: block; }
|
|
31
|
+
</style>
|
|
32
|
+
</head>
|
|
33
|
+
<body>
|
|
34
|
+
<header>
|
|
35
|
+
<h1>🐤 PromptCanary</h1>
|
|
36
|
+
<%= link_to "Dashboard", prompts_path %>
|
|
37
|
+
</header>
|
|
38
|
+
<main>
|
|
39
|
+
<%= yield %>
|
|
40
|
+
</main>
|
|
41
|
+
</body>
|
|
42
|
+
</html>
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
<h2>Registered Prompts</h2>
|
|
2
|
+
|
|
3
|
+
<% if @prompts.empty? %>
|
|
4
|
+
<p class="muted">No prompts registered. Include <code>PromptCanary::Promptable</code> in a prompt class to see it here.</p>
|
|
5
|
+
<% else %>
|
|
6
|
+
<table>
|
|
7
|
+
<thead>
|
|
8
|
+
<tr>
|
|
9
|
+
<th>Prompt</th>
|
|
10
|
+
<th>Version</th>
|
|
11
|
+
<th>Status</th>
|
|
12
|
+
<th>Calls</th>
|
|
13
|
+
<th>Error Rate</th>
|
|
14
|
+
<th>P95 Latency</th>
|
|
15
|
+
<th>Last Called</th>
|
|
16
|
+
</tr>
|
|
17
|
+
</thead>
|
|
18
|
+
<tbody>
|
|
19
|
+
<% @prompts.each do |prompt| %>
|
|
20
|
+
<% prompt[:versions].each_with_index do |v, i| %>
|
|
21
|
+
<tr class="<%= v[:active] ? '' : 'inactive' %>">
|
|
22
|
+
<% if i == 0 %>
|
|
23
|
+
<td rowspan="<%= prompt[:versions].size %>">
|
|
24
|
+
<%= link_to prompt[:name], prompt_path(prompt[:name]) %>
|
|
25
|
+
</td>
|
|
26
|
+
<% end %>
|
|
27
|
+
<td class="stat"><%= v[:name] %></td>
|
|
28
|
+
<td>
|
|
29
|
+
<% if v[:active] %>
|
|
30
|
+
<span class="badge badge-active">active</span>
|
|
31
|
+
<% end %>
|
|
32
|
+
<span class="badge <%= v[:primary] ? 'badge-primary' : 'badge-candidate' %>">
|
|
33
|
+
<%= v[:primary] ? "primary" : "candidate" %>
|
|
34
|
+
</span>
|
|
35
|
+
<% if v[:demoted] %>
|
|
36
|
+
<span class="badge badge-error">demoted</span>
|
|
37
|
+
<% end %>
|
|
38
|
+
</td>
|
|
39
|
+
<td class="stat"><%= v[:stats][:call_count] %></td>
|
|
40
|
+
<td class="stat <%= v[:stats][:error_rate].to_f > 0.05 ? 'badge-error' : '' %>">
|
|
41
|
+
<%= "%.1f%%" % (v[:stats][:error_rate].to_f * 100) %>
|
|
42
|
+
</td>
|
|
43
|
+
<td class="stat"><%= v[:stats][:latency_p95] ? "#{v[:stats][:latency_p95]}ms" : "—" %></td>
|
|
44
|
+
<td class="stat muted"><%= v[:stats][:last_called_at] ? v[:stats][:last_called_at].strftime("%b %d %H:%M") : "—" %></td>
|
|
45
|
+
</tr>
|
|
46
|
+
<% end %>
|
|
47
|
+
<% end %>
|
|
48
|
+
</tbody>
|
|
49
|
+
</table>
|
|
50
|
+
<% end %>
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
<%= link_to "← All Prompts", prompts_path, class: "back" %>
|
|
2
|
+
<h2><%= @prompt[:name] %></h2>
|
|
3
|
+
|
|
4
|
+
<table style="margin-bottom: 24px;">
|
|
5
|
+
<thead>
|
|
6
|
+
<tr>
|
|
7
|
+
<th>Version</th>
|
|
8
|
+
<th>Status</th>
|
|
9
|
+
<th>Calls</th>
|
|
10
|
+
<th>Error Rate</th>
|
|
11
|
+
<th>P95 Latency</th>
|
|
12
|
+
<th>Last Called</th>
|
|
13
|
+
<th>Actions</th>
|
|
14
|
+
</tr>
|
|
15
|
+
</thead>
|
|
16
|
+
<tbody>
|
|
17
|
+
<% @prompt[:versions].each do |v| %>
|
|
18
|
+
<tr class="<%= v[:active] ? '' : 'inactive' %>">
|
|
19
|
+
<td class="stat"><%= v[:name] %></td>
|
|
20
|
+
<td>
|
|
21
|
+
<% if v[:active] %>
|
|
22
|
+
<span class="badge badge-active">active</span>
|
|
23
|
+
<% end %>
|
|
24
|
+
<span class="badge <%= v[:primary] ? 'badge-primary' : 'badge-candidate' %>">
|
|
25
|
+
<%= v[:primary] ? "primary" : "candidate" %>
|
|
26
|
+
</span>
|
|
27
|
+
<% if v[:demoted] %>
|
|
28
|
+
<span class="badge badge-error">demoted</span>
|
|
29
|
+
<% end %>
|
|
30
|
+
</td>
|
|
31
|
+
<td class="stat"><%= v[:stats][:call_count] %></td>
|
|
32
|
+
<td class="stat <%= v[:stats][:error_rate].to_f > 0.05 ? 'badge-error' : '' %>">
|
|
33
|
+
<%= "%.1f%%" % (v[:stats][:error_rate].to_f * 100) %>
|
|
34
|
+
</td>
|
|
35
|
+
<td class="stat"><%= v[:stats][:latency_p95] ? "#{v[:stats][:latency_p95]}ms" : "—" %></td>
|
|
36
|
+
<td class="stat muted"><%= v[:stats][:last_called_at] ? v[:stats][:last_called_at].strftime("%b %d %H:%M") : "—" %></td>
|
|
37
|
+
<td>
|
|
38
|
+
<% unless v[:primary] || v[:demoted] %>
|
|
39
|
+
<%= form_with url: promote_prompt_path(@prompt[:name]), method: :post, local: true do |f| %>
|
|
40
|
+
<%= f.hidden_field :version, value: v[:name] %>
|
|
41
|
+
<%= f.submit "Promote", class: "btn" %>
|
|
42
|
+
<% end %>
|
|
43
|
+
<% end %>
|
|
44
|
+
</td>
|
|
45
|
+
</tr>
|
|
46
|
+
<% end %>
|
|
47
|
+
</tbody>
|
|
48
|
+
</table>
|
|
49
|
+
|
|
50
|
+
<% if @prompt[:events].any? %>
|
|
51
|
+
<h3>Deployment History</h3>
|
|
52
|
+
<table style="margin-bottom: 24px;">
|
|
53
|
+
<thead>
|
|
54
|
+
<tr>
|
|
55
|
+
<th>Time</th>
|
|
56
|
+
<th>Event</th>
|
|
57
|
+
<th>Version</th>
|
|
58
|
+
<th>Change</th>
|
|
59
|
+
<th>Triggered By</th>
|
|
60
|
+
<th>Reason</th>
|
|
61
|
+
</tr>
|
|
62
|
+
</thead>
|
|
63
|
+
<tbody>
|
|
64
|
+
<% @prompt[:events].each do |e| %>
|
|
65
|
+
<tr>
|
|
66
|
+
<td class="stat muted"><%= e.recorded_at.strftime("%b %d %H:%M") %></td>
|
|
67
|
+
<td><span class="badge badge-candidate"><%= e.event.upcase %></span></td>
|
|
68
|
+
<td class="stat"><%= e.version %></td>
|
|
69
|
+
<td class="stat muted">
|
|
70
|
+
<% if e.previous_status || e.new_status %>
|
|
71
|
+
<%= e.previous_status %> → <%= e.new_status %>
|
|
72
|
+
<% elsif !e.previous_percent.nil? && !e.new_percent.nil? %>
|
|
73
|
+
<%= e.previous_percent %>% → <%= e.new_percent %>%
|
|
74
|
+
<% end %>
|
|
75
|
+
</td>
|
|
76
|
+
<td class="stat"><%= e.triggered_by %></td>
|
|
77
|
+
<td class="muted"><%= e.reason %></td>
|
|
78
|
+
</tr>
|
|
79
|
+
<% end %>
|
|
80
|
+
</tbody>
|
|
81
|
+
</table>
|
|
82
|
+
<% end %>
|
|
83
|
+
|
|
84
|
+
<h2>Recent Calls</h2>
|
|
85
|
+
<% calls = PromptCanary::Call.where(prompt: @prompt[:name]).order(recorded_at: :desc).limit(50) %>
|
|
86
|
+
<% if calls.empty? %>
|
|
87
|
+
<p class="muted">No calls recorded yet.</p>
|
|
88
|
+
<% else %>
|
|
89
|
+
<table>
|
|
90
|
+
<thead>
|
|
91
|
+
<tr>
|
|
92
|
+
<th>Recorded At</th>
|
|
93
|
+
<th>Version</th>
|
|
94
|
+
<th>Latency</th>
|
|
95
|
+
<th>Tokens In</th>
|
|
96
|
+
<th>Tokens Out</th>
|
|
97
|
+
<th>Error</th>
|
|
98
|
+
</tr>
|
|
99
|
+
</thead>
|
|
100
|
+
<tbody>
|
|
101
|
+
<% calls.each do |call| %>
|
|
102
|
+
<% tokens = call.tokens ? JSON.parse(call.tokens) : nil %>
|
|
103
|
+
<tr>
|
|
104
|
+
<td class="stat muted"><%= call.recorded_at.strftime("%b %d %H:%M:%S") %></td>
|
|
105
|
+
<td class="stat"><%= call.version %></td>
|
|
106
|
+
<td class="stat"><%= call.latency_ms ? "#{call.latency_ms}ms" : "—" %></td>
|
|
107
|
+
<td class="stat"><%= tokens ? tokens["input"] : "—" %></td>
|
|
108
|
+
<td class="stat"><%= tokens ? tokens["output"] : "—" %></td>
|
|
109
|
+
<td><%= call.error ? %(<span class="badge badge-error">#{call.error}</span>).html_safe : "—" %></td>
|
|
110
|
+
</tr>
|
|
111
|
+
<% end %>
|
|
112
|
+
</tbody>
|
|
113
|
+
</table>
|
|
114
|
+
<% end %>
|
data/config/routes.rb
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Run from the repo root: bundle exec ruby examples/auto_rollback.rb
|
|
5
|
+
#
|
|
6
|
+
# Demonstrates automatic rollback: seeds synthetic errors into storage,
|
|
7
|
+
# runs the monitor, and shows the version being demoted.
|
|
8
|
+
|
|
9
|
+
$LOAD_PATH.unshift File.join(__dir__, "../lib")
|
|
10
|
+
require "prompt_canary"
|
|
11
|
+
|
|
12
|
+
# --- Configuration ---
|
|
13
|
+
|
|
14
|
+
PromptCanary.configure do |c|
|
|
15
|
+
c.adapter = :anthropic
|
|
16
|
+
c.storage = :memory
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# --- Prompt definition ---
|
|
20
|
+
|
|
21
|
+
class SupportResponder
|
|
22
|
+
include PromptCanary::Promptable
|
|
23
|
+
|
|
24
|
+
version "v1" do
|
|
25
|
+
model "claude-haiku-4-5-20251001"
|
|
26
|
+
system "You are a helpful customer support agent."
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
version "v2" do
|
|
30
|
+
model "claude-haiku-4-5-20251001"
|
|
31
|
+
system "You are a concise customer support agent. Keep replies under 50 words."
|
|
32
|
+
rollout percent: 30
|
|
33
|
+
rollback_if :error_rate, greater_than: 0.05, over: 100
|
|
34
|
+
rollback_if :latency_p95, greater_than: 2000, over: 100
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
storage = PromptCanary::Storage::Memory.new
|
|
39
|
+
recorder = PromptCanary::Recorder.new(storage: storage)
|
|
40
|
+
|
|
41
|
+
# --- Subscribe to demotion events ---
|
|
42
|
+
|
|
43
|
+
PromptCanary.subscribe("prompt_canary.demoted") do |payload|
|
|
44
|
+
puts " [notification] #{payload[:prompt]} #{payload[:version]} demoted — #{payload[:reason]}"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# --- Seed healthy traffic ---
|
|
48
|
+
|
|
49
|
+
puts "=== Seeding 100 calls for v2 (2% error rate — below threshold) ===\n\n"
|
|
50
|
+
|
|
51
|
+
100.times do |i|
|
|
52
|
+
storage.write(
|
|
53
|
+
prompt: "SupportResponder",
|
|
54
|
+
version: "v2",
|
|
55
|
+
latency_ms: rand(200..800),
|
|
56
|
+
tokens: { input: 20, output: 15 },
|
|
57
|
+
error: i < 2 ? StandardError.new("timeout") : nil,
|
|
58
|
+
recorded_at: Time.now
|
|
59
|
+
)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
puts "Running monitor..."
|
|
63
|
+
PromptCanary::Monitor.new(recorder: recorder).evaluate(SupportResponder)
|
|
64
|
+
|
|
65
|
+
v2 = SupportResponder.versions.find { |v| v.name == "v2" }
|
|
66
|
+
puts "v2 rollout percent: #{v2.rollout[:percent]}% (no change expected)\n\n"
|
|
67
|
+
|
|
68
|
+
# --- Seed failing traffic ---
|
|
69
|
+
|
|
70
|
+
puts "=== Seeding 100 more calls (12% error rate — above threshold) ===\n\n"
|
|
71
|
+
|
|
72
|
+
100.times do |i|
|
|
73
|
+
storage.write(
|
|
74
|
+
prompt: "SupportResponder",
|
|
75
|
+
version: "v2",
|
|
76
|
+
latency_ms: rand(200..800),
|
|
77
|
+
tokens: { input: 20, output: 15 },
|
|
78
|
+
error: i < 12 ? StandardError.new("model overloaded") : nil,
|
|
79
|
+
recorded_at: Time.now
|
|
80
|
+
)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
puts "Running monitor..."
|
|
84
|
+
PromptCanary::Monitor.new(recorder: recorder).evaluate(SupportResponder)
|
|
85
|
+
|
|
86
|
+
puts "v2 rollout percent: #{v2.rollout[:percent]}% (expected 0 — demoted)\n\n"
|
|
87
|
+
|
|
88
|
+
# --- Confirm router falls back to primary ---
|
|
89
|
+
|
|
90
|
+
puts "=== Router now falls back to v1 (primary) ===\n\n"
|
|
91
|
+
|
|
92
|
+
adapter = Object.new
|
|
93
|
+
def adapter.call(version:, args:)
|
|
94
|
+
{ text: "Happy to help!", latency_ms: 250, tokens: { input: 10, output: 5 }, error: nil }
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
5.times do |i|
|
|
98
|
+
result = SupportResponder.call(
|
|
99
|
+
user_message: "Where is my order?",
|
|
100
|
+
context: { call_id: i },
|
|
101
|
+
adapter: adapter,
|
|
102
|
+
recorder: recorder
|
|
103
|
+
)
|
|
104
|
+
puts "call_id=#{i} version=#{result.version_used}"
|
|
105
|
+
end
|
data/examples/demo.rb
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Run from the repo root: bundle exec ruby examples/demo.rb
|
|
5
|
+
#
|
|
6
|
+
# Uses a stubbed adapter by default so no API key is needed.
|
|
7
|
+
# To run against Anthropic, set ANTHROPIC_API_KEY and pass --real as an argument.
|
|
8
|
+
|
|
9
|
+
$LOAD_PATH.unshift File.join(__dir__, "../lib")
|
|
10
|
+
require "prompt_canary"
|
|
11
|
+
|
|
12
|
+
USE_REAL_API = ARGV.include?("--real")
|
|
13
|
+
|
|
14
|
+
# --- Configuration ---
|
|
15
|
+
|
|
16
|
+
PromptCanary.configure do |c|
|
|
17
|
+
c.adapter = :anthropic
|
|
18
|
+
c.storage = :memory
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# --- Prompt definition ---
|
|
22
|
+
|
|
23
|
+
class InvoiceExtractor
|
|
24
|
+
include PromptCanary::Promptable
|
|
25
|
+
|
|
26
|
+
version "v1" do
|
|
27
|
+
model "claude-haiku-4-5-20251001"
|
|
28
|
+
system "Extract structured data from this invoice. Return plain text."
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
version "v2" do
|
|
32
|
+
model "claude-haiku-4-5-20251001"
|
|
33
|
+
system "Extract structured data from this invoice. Return JSON."
|
|
34
|
+
rollout percent: 20
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# --- Adapter setup ---
|
|
39
|
+
|
|
40
|
+
if USE_REAL_API
|
|
41
|
+
puts "Using real Anthropic API (ANTHROPIC_API_KEY must be set)\n\n"
|
|
42
|
+
adapter = PromptCanary::Adapters::Anthropic.new
|
|
43
|
+
else
|
|
44
|
+
puts "Using stubbed adapter (pass --real to hit Anthropic)\n\n"
|
|
45
|
+
adapter = Object.new
|
|
46
|
+
def adapter.call(version:, args:)
|
|
47
|
+
{
|
|
48
|
+
text: "Vendor: Acme Corp Amount: $1,250.00 Date: 2026-01-15",
|
|
49
|
+
latency_ms: rand(200..600),
|
|
50
|
+
tokens: { input: 45, output: 18 },
|
|
51
|
+
error: nil
|
|
52
|
+
}
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# --- Demonstrate routing ---
|
|
57
|
+
|
|
58
|
+
puts "=== Routing demo (20% rollout to v2) ===\n\n"
|
|
59
|
+
|
|
60
|
+
results = 10.times.map do |i|
|
|
61
|
+
result = InvoiceExtractor.call(
|
|
62
|
+
user_message: "Invoice ##{i + 1}: Acme Corp, $1,250.00, 2026-01-15",
|
|
63
|
+
context: { call_id: i },
|
|
64
|
+
adapter: adapter
|
|
65
|
+
)
|
|
66
|
+
puts "call_id=#{i} version=#{result.version_used} latency=#{result.latency_ms}ms"
|
|
67
|
+
result
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
v2_count = results.count { |r| r.version_used == "v2" }
|
|
71
|
+
puts "\n#{v2_count}/10 calls routed to v2 (expected ~2 at 20% rollout)\n\n"
|
|
72
|
+
|
|
73
|
+
# --- Show a full result ---
|
|
74
|
+
|
|
75
|
+
puts "=== Result structure ===\n\n"
|
|
76
|
+
r = results.first
|
|
77
|
+
puts "text: #{r.text}"
|
|
78
|
+
puts "version_used: #{r.version_used}"
|
|
79
|
+
puts "model: #{r.model}"
|
|
80
|
+
puts "latency_ms: #{r.latency_ms}"
|
|
81
|
+
puts "tokens: #{r.tokens.inspect}"
|
|
82
|
+
puts "error: #{r.error.inspect}"
|
|
83
|
+
puts "recorded_at: #{r.recorded_at}"
|
data/exe/prompt_canary
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rails/generators"
|
|
4
|
+
require "rails/generators/migration"
|
|
5
|
+
|
|
6
|
+
module PromptCanary
|
|
7
|
+
module Generators
|
|
8
|
+
class InstallGenerator < Rails::Generators::Base
|
|
9
|
+
include Rails::Generators::Migration
|
|
10
|
+
|
|
11
|
+
source_root File.expand_path("templates", __dir__)
|
|
12
|
+
|
|
13
|
+
def self.next_migration_number(_path)
|
|
14
|
+
Time.now.utc.strftime("%Y%m%d%H%M%S")
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def mount_engine
|
|
18
|
+
route 'mount PromptCanary::Engine, at: "/prompt_canary"'
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def copy_migration
|
|
22
|
+
migration_template(
|
|
23
|
+
"create_prompt_canary_calls.rb",
|
|
24
|
+
"db/migrate/create_prompt_canary_calls.rb"
|
|
25
|
+
)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def show_instructions
|
|
29
|
+
say "\nPromptCanary installed!", :green
|
|
30
|
+
say " 1. Run: rails db:migrate"
|
|
31
|
+
say " 2. Set storage: :active_record in your PromptCanary initializer"
|
|
32
|
+
say " 3. Add to config/recurring.yml:"
|
|
33
|
+
say " prompt_canary_monitor:"
|
|
34
|
+
say " class: PromptCanary::MonitorJob"
|
|
35
|
+
say " schedule: every 5 minutes\n"
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class CreatePromptCanaryTables < ActiveRecord::Migration[7.2]
|
|
4
|
+
def change
|
|
5
|
+
create_calls_table
|
|
6
|
+
create_rollout_overrides_table
|
|
7
|
+
create_primary_overrides_table
|
|
8
|
+
create_events_table
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
private
|
|
12
|
+
|
|
13
|
+
def create_calls_table
|
|
14
|
+
create_table :prompt_canary_calls do |t|
|
|
15
|
+
t.string :prompt, null: false
|
|
16
|
+
t.string :version, null: false
|
|
17
|
+
t.integer :latency_ms
|
|
18
|
+
t.text :tokens
|
|
19
|
+
t.text :error
|
|
20
|
+
t.datetime :recorded_at, null: false
|
|
21
|
+
end
|
|
22
|
+
add_index :prompt_canary_calls, %i[prompt version recorded_at]
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def create_rollout_overrides_table
|
|
26
|
+
create_table :prompt_canary_rollout_overrides do |t|
|
|
27
|
+
t.string :prompt, null: false
|
|
28
|
+
t.string :version, null: false
|
|
29
|
+
t.integer :rollout_override, null: false
|
|
30
|
+
t.datetime :created_at, null: false
|
|
31
|
+
end
|
|
32
|
+
add_index :prompt_canary_rollout_overrides, %i[prompt version], unique: true
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def create_primary_overrides_table
|
|
36
|
+
create_table :prompt_canary_primary_overrides do |t|
|
|
37
|
+
t.string :prompt, null: false
|
|
38
|
+
t.string :version, null: false
|
|
39
|
+
t.datetime :created_at, null: false
|
|
40
|
+
end
|
|
41
|
+
add_index :prompt_canary_primary_overrides, :prompt, unique: true
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def create_events_table
|
|
45
|
+
create_table :prompt_canary_events do |t|
|
|
46
|
+
t.string :prompt, null: false
|
|
47
|
+
t.string :version, null: false
|
|
48
|
+
t.string :event, null: false
|
|
49
|
+
t.integer :previous_percent
|
|
50
|
+
t.integer :new_percent
|
|
51
|
+
t.string :previous_status
|
|
52
|
+
t.string :new_status
|
|
53
|
+
t.text :reason
|
|
54
|
+
t.string :triggered_by, null: false
|
|
55
|
+
t.string :triggering_metric
|
|
56
|
+
t.float :triggering_value
|
|
57
|
+
t.float :triggering_threshold
|
|
58
|
+
t.datetime :recorded_at, null: false
|
|
59
|
+
end
|
|
60
|
+
add_index :prompt_canary_events, %i[prompt version recorded_at]
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PromptCanary
|
|
4
|
+
class AdapterFactory
|
|
5
|
+
REGISTRY = {
|
|
6
|
+
anthropic: -> { Adapters::Anthropic.new }
|
|
7
|
+
}.freeze
|
|
8
|
+
|
|
9
|
+
def self.build(adapter_name)
|
|
10
|
+
builder = REGISTRY[adapter_name]
|
|
11
|
+
raise ConfigurationError, "Unknown adapter: #{adapter_name.inspect}" unless builder
|
|
12
|
+
|
|
13
|
+
builder.call
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "anthropic"
|
|
4
|
+
|
|
5
|
+
module PromptCanary
|
|
6
|
+
module Adapters
|
|
7
|
+
class Anthropic < Base
|
|
8
|
+
DEFAULT_MAX_TOKENS = 4096
|
|
9
|
+
|
|
10
|
+
def initialize(client: ::Anthropic::Client.new)
|
|
11
|
+
super()
|
|
12
|
+
@client = client
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def call(version:, args:)
|
|
16
|
+
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
17
|
+
|
|
18
|
+
response = @client.messages.create(
|
|
19
|
+
model: version.model,
|
|
20
|
+
system_: version.system_for(args),
|
|
21
|
+
max_tokens: DEFAULT_MAX_TOKENS,
|
|
22
|
+
messages: [{ role: "user", content: args.fetch(:user_message, "Generate.") }]
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
latency_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
|
|
26
|
+
|
|
27
|
+
{
|
|
28
|
+
text: response.content.first.text,
|
|
29
|
+
latency_ms: latency_ms,
|
|
30
|
+
tokens: { input: response.usage.input_tokens, output: response.usage.output_tokens },
|
|
31
|
+
error: nil
|
|
32
|
+
}
|
|
33
|
+
rescue ::Anthropic::Errors::APIError => e
|
|
34
|
+
latency_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
|
|
35
|
+
{ text: nil, latency_ms: latency_ms, tokens: nil, error: e }
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|