prompt_warden 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/CHANGELOG.md +33 -2
- data/Gemfile +4 -4
- data/Gemfile.lock +21 -1
- data/README.md +217 -19
- data/Rakefile +19 -2
- data/bin/console +3 -3
- data/bin/pw_tail +8 -0
- data/examples/policy.yml +22 -0
- data/lib/prompt_warden/adapter.rb +59 -0
- data/lib/prompt_warden/buffer.rb +60 -0
- data/lib/prompt_warden/cli.rb +199 -0
- data/lib/prompt_warden/configuration.rb +39 -0
- data/lib/prompt_warden/cost_calculator.rb +105 -0
- data/lib/prompt_warden/event.rb +18 -0
- data/lib/prompt_warden/instrumentation/anthropic.rb +85 -0
- data/lib/prompt_warden/instrumentation/langchain.rb +76 -0
- data/lib/prompt_warden/instrumentation/openai.rb +79 -0
- data/lib/prompt_warden/policy.rb +73 -0
- data/lib/prompt_warden/railtie.rb +15 -0
- data/lib/prompt_warden/uploader.rb +93 -0
- data/lib/prompt_warden/version.rb +1 -1
- data/lib/prompt_warden.rb +32 -3
- data/prompt_warden.gemspec +33 -25
- data/spec/adapter_auto_detect_spec.rb +65 -0
- data/spec/anthropic_adapter_spec.rb +137 -0
- data/spec/buffer_spec.rb +44 -0
- data/spec/cli_spec.rb +255 -0
- data/spec/configuration_spec.rb +30 -0
- data/spec/cost_calculator_spec.rb +216 -0
- data/spec/event_spec.rb +30 -0
- data/spec/langchain_adapter_spec.rb +139 -0
- data/spec/openai_adapter_spec.rb +153 -0
- data/spec/policy_spec.rb +170 -0
- data/spec/prompt_warden_spec.rb +2 -2
- data/spec/spec_helper.rb +7 -8
- data/spec/uploader_spec.rb +79 -0
- metadata +98 -15
@@ -0,0 +1,199 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'json'
|
5
|
+
require 'time'
|
6
|
+
|
7
|
+
module PromptWarden
|
8
|
+
module CLI
|
9
|
+
class EventFormatter
|
10
|
+
def self.format(event, json: false)
|
11
|
+
if json
|
12
|
+
event.to_h.to_json
|
13
|
+
else
|
14
|
+
format_human_readable(event)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def self.format_human_readable(event)
|
21
|
+
timestamp = event.timestamp ? Time.parse(event.timestamp).strftime('%H:%M:%S') : 'N/A'
|
22
|
+
model = event.model || 'unknown'
|
23
|
+
cost = event.cost_usd ? "$#{event.cost_usd}" : 'N/A'
|
24
|
+
status = event.status || 'unknown'
|
25
|
+
|
26
|
+
# Format alerts
|
27
|
+
alert_text = if event.alerts&.any?
|
28
|
+
alerts = event.alerts.map do |alert|
|
29
|
+
case alert['type']
|
30
|
+
when 'regex'
|
31
|
+
"⚠️ #{alert['rule']}"
|
32
|
+
when 'cost'
|
33
|
+
"💰 >$#{alert['limit']}"
|
34
|
+
else
|
35
|
+
"❓ #{alert['type']}"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
" [#{alerts.join(', ')}]"
|
39
|
+
else
|
40
|
+
""
|
41
|
+
end
|
42
|
+
|
43
|
+
# Truncate prompt for display
|
44
|
+
prompt_preview = event.prompt ? event.prompt[0..50] : 'N/A'
|
45
|
+
prompt_preview += '...' if event.prompt && event.prompt.length > 50
|
46
|
+
|
47
|
+
"#{timestamp} #{model} #{cost} #{status}#{alert_text} | #{prompt_preview}"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class EventFilter
|
52
|
+
def self.matches?(event, options)
|
53
|
+
return false if options[:alerts] && (!event.alerts || event.alerts.empty?)
|
54
|
+
return false if options[:model] && event.model != options[:model]
|
55
|
+
return false if options[:cost] && (!event.cost_usd || event.cost_usd < options[:cost])
|
56
|
+
return false if options[:status] && event.status != options[:status]
|
57
|
+
true
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
class CLIBuffer
|
62
|
+
def initialize
|
63
|
+
@events = []
|
64
|
+
@listeners = []
|
65
|
+
end
|
66
|
+
|
67
|
+
def push(event)
|
68
|
+
@events << event
|
69
|
+
@listeners.each { |listener| listener.call(event) }
|
70
|
+
end
|
71
|
+
|
72
|
+
def on_event(&block)
|
73
|
+
@listeners << block
|
74
|
+
end
|
75
|
+
|
76
|
+
def recent_events(limit: 10)
|
77
|
+
@events.last(limit)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
class Tail
|
82
|
+
def self.run(args = ARGV)
|
83
|
+
options = parse_options(args)
|
84
|
+
|
85
|
+
# Configure PromptWarden if not already configured
|
86
|
+
configure_prompt_warden
|
87
|
+
|
88
|
+
# Run the tail command
|
89
|
+
new(options).run
|
90
|
+
end
|
91
|
+
|
92
|
+
def initialize(options)
|
93
|
+
@options = options
|
94
|
+
end
|
95
|
+
|
96
|
+
def run
|
97
|
+
puts "🔍 PromptWarden CLI - Real-time event monitoring"
|
98
|
+
puts "Press Ctrl+C to exit"
|
99
|
+
puts "-" * 80
|
100
|
+
|
101
|
+
# Create a mock buffer for CLI mode
|
102
|
+
@cli_buffer = CLIBuffer.new
|
103
|
+
|
104
|
+
# Override the real buffer temporarily
|
105
|
+
@original_buffer = PromptWarden.send(:buffer)
|
106
|
+
PromptWarden.define_singleton_method(:buffer) { @cli_buffer }
|
107
|
+
|
108
|
+
# Set up event listener
|
109
|
+
@event_count = 0
|
110
|
+
@cli_buffer.on_event do |event|
|
111
|
+
next unless EventFilter.matches?(event, @options)
|
112
|
+
|
113
|
+
@event_count += 1
|
114
|
+
puts EventFormatter.format(event, json: @options[:json])
|
115
|
+
|
116
|
+
exit 0 if @options[:limit] && @event_count >= @options[:limit]
|
117
|
+
end
|
118
|
+
|
119
|
+
# Show recent events if not following
|
120
|
+
unless @options[:follow]
|
121
|
+
show_recent_events
|
122
|
+
return
|
123
|
+
end
|
124
|
+
|
125
|
+
# Follow mode - wait for events
|
126
|
+
if @options[:follow]
|
127
|
+
puts "Waiting for events... (use --no-follow to see recent events only)"
|
128
|
+
|
129
|
+
# Keep the process alive
|
130
|
+
loop do
|
131
|
+
sleep 1
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
rescue Interrupt
|
136
|
+
puts "\n👋 Goodbye!"
|
137
|
+
rescue => e
|
138
|
+
puts "❌ Error: #{e.message}"
|
139
|
+
puts e.backtrace if ENV['DEBUG']
|
140
|
+
exit 1
|
141
|
+
ensure
|
142
|
+
# Restore original buffer
|
143
|
+
PromptWarden.define_singleton_method(:buffer) { @original_buffer }
|
144
|
+
end
|
145
|
+
|
146
|
+
private
|
147
|
+
|
148
|
+
def show_recent_events
|
149
|
+
recent_events = @cli_buffer.recent_events(limit: @options[:limit] || 10)
|
150
|
+
recent_events.each do |event|
|
151
|
+
next unless EventFilter.matches?(event, @options)
|
152
|
+
puts EventFormatter.format(event, json: @options[:json])
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def self.parse_options(args)
|
157
|
+
options = {
|
158
|
+
alerts: false,
|
159
|
+
model: nil,
|
160
|
+
cost: nil,
|
161
|
+
status: nil,
|
162
|
+
limit: nil,
|
163
|
+
json: false,
|
164
|
+
follow: true
|
165
|
+
}
|
166
|
+
|
167
|
+
OptionParser.new do |opts|
|
168
|
+
opts.banner = "Usage: pw_tail [options]"
|
169
|
+
|
170
|
+
opts.on('--alerts', 'Show only events with alerts') { options[:alerts] = true }
|
171
|
+
opts.on('--model MODEL', 'Filter by model') { |m| options[:model] = m }
|
172
|
+
opts.on('--cost COST', Float, 'Show events above cost (USD)') { |c| options[:cost] = c }
|
173
|
+
opts.on('--status STATUS', 'Filter by status (ok, failed)') { |s| options[:status] = s }
|
174
|
+
opts.on('--limit N', Integer, 'Limit number of events to show') { |l| options[:limit] = l }
|
175
|
+
opts.on('--json', 'Output in JSON format') { options[:json] = true }
|
176
|
+
opts.on('--no-follow', 'Don\'t follow new events') { options[:follow] = false }
|
177
|
+
opts.on('-h', '--help', 'Show this help message') do
|
178
|
+
puts opts
|
179
|
+
exit
|
180
|
+
end
|
181
|
+
end.parse!(args)
|
182
|
+
|
183
|
+
options
|
184
|
+
end
|
185
|
+
|
186
|
+
def self.configure_prompt_warden
|
187
|
+
begin
|
188
|
+
PromptWarden.configuration
|
189
|
+
rescue
|
190
|
+
# Set minimal configuration for CLI
|
191
|
+
PromptWarden.configure do |config|
|
192
|
+
config.project_token = 'cli_mode'
|
193
|
+
config.api_url = 'http://localhost:3000' # Won't actually upload in CLI mode
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
module PromptWarden
|
6
|
+
class Configuration
|
7
|
+
attr_accessor :project_token, :flush_interval, :batch_bytes,
|
8
|
+
:max_retries, :logger, :api_url,
|
9
|
+
:price_overrides
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@project_token = ENV['PROMPT_WARDEN_TOKEN']
|
13
|
+
@flush_interval = 1.0
|
14
|
+
@batch_bytes = 256 * 1024
|
15
|
+
@max_retries = 3
|
16
|
+
@logger = defined?(Rails) ? Rails.logger : Logger.new($stdout)
|
17
|
+
@api_url = ENV['PROMPT_WARDEN_API'] || 'https://staging.promptwarden.dev/api/v1/ingest'
|
18
|
+
@price_overrides = {} # e.g., { "gpt-4o" => 0.005 }
|
19
|
+
@adapter_blocks = {} # lazily‑executed registration procs
|
20
|
+
end
|
21
|
+
|
22
|
+
# --- validation ------------------------------------------------------
|
23
|
+
def validate!
|
24
|
+
raise ArgumentError, 'project_token is required' if @project_token.to_s.empty?
|
25
|
+
end
|
26
|
+
|
27
|
+
# --- adapter registration -------------------------------------------
|
28
|
+
# Called by host app for custom SDKs
|
29
|
+
def register_adapter(key, &block)
|
30
|
+
@adapter_blocks[key] = block
|
31
|
+
block.call if Gem.loaded_specs.key?(key.to_s) # execute immediately if gem already loaded
|
32
|
+
end
|
33
|
+
|
34
|
+
# Invoked by internal loader after all built‑in auto‑detects
|
35
|
+
def run_pending_adapters
|
36
|
+
@adapter_blocks.each_value(&:call)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
module PromptWarden
|
6
|
+
class CostCalculator
|
7
|
+
# Model pricing (per 1K tokens) - updated as of 2024
|
8
|
+
MODEL_PRICING = {
|
9
|
+
# OpenAI Models
|
10
|
+
'gpt-4o' => { input: 0.0025, output: 0.01 },
|
11
|
+
'gpt-4o-mini' => { input: 0.00015, output: 0.0006 },
|
12
|
+
'gpt-4-turbo' => { input: 0.01, output: 0.03 },
|
13
|
+
'gpt-4' => { input: 0.03, output: 0.06 },
|
14
|
+
'gpt-3.5-turbo' => { input: 0.0005, output: 0.0015 },
|
15
|
+
|
16
|
+
# Anthropic Models
|
17
|
+
'claude-3-opus-20240229' => { input: 0.015, output: 0.075 },
|
18
|
+
'claude-3-sonnet-20240229' => { input: 0.003, output: 0.015 },
|
19
|
+
'claude-3-haiku-20240307' => { input: 0.00025, output: 0.00125 },
|
20
|
+
'claude-3-5-sonnet-20241022' => { input: 0.003, output: 0.015 },
|
21
|
+
|
22
|
+
# Default fallback
|
23
|
+
'default' => { input: 0.001, output: 0.002 }
|
24
|
+
}.freeze
|
25
|
+
|
26
|
+
class << self
|
27
|
+
def calculate_cost(prompt:, model:, response_tokens: nil)
|
28
|
+
input_tokens = count_tokens(prompt, model)
|
29
|
+
output_tokens = response_tokens || estimate_output_tokens(prompt, model)
|
30
|
+
|
31
|
+
pricing = get_model_pricing(model)
|
32
|
+
input_cost = (input_tokens / 1000.0) * pricing[:input]
|
33
|
+
output_cost = (output_tokens / 1000.0) * pricing[:output]
|
34
|
+
|
35
|
+
(input_cost + output_cost).round(6)
|
36
|
+
end
|
37
|
+
|
38
|
+
def count_tokens(text, model = nil)
|
39
|
+
return 0 if text.nil? || text.empty?
|
40
|
+
|
41
|
+
# Use tiktoken for OpenAI models if available
|
42
|
+
if model&.start_with?('gpt-') && defined?(Tiktoken)
|
43
|
+
count_tokens_tiktoken(text, model)
|
44
|
+
else
|
45
|
+
# Fallback to approximate counting
|
46
|
+
count_tokens_approximate(text)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def estimate_output_tokens(prompt, model)
|
51
|
+
return 0 if prompt.nil? || prompt.empty?
|
52
|
+
|
53
|
+
# Rough estimation based on prompt length and model
|
54
|
+
base_tokens = prompt.length / 4 # ~4 chars per token
|
55
|
+
|
56
|
+
# Adjust based on model type
|
57
|
+
case model
|
58
|
+
when /gpt-4o/
|
59
|
+
(base_tokens * 0.8).round # More efficient
|
60
|
+
when /claude-3-opus/
|
61
|
+
(base_tokens * 1.2).round # More verbose
|
62
|
+
when /claude-3-sonnet/
|
63
|
+
(base_tokens * 1.0).round # Standard
|
64
|
+
when /claude-3-haiku/
|
65
|
+
(base_tokens * 0.7).round # Concise
|
66
|
+
else
|
67
|
+
base_tokens.round
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def get_model_pricing(model)
|
72
|
+
return MODEL_PRICING['default'] unless model
|
73
|
+
|
74
|
+
# Try exact match first
|
75
|
+
return MODEL_PRICING[model] if MODEL_PRICING[model]
|
76
|
+
|
77
|
+
# Try partial matches for model variants
|
78
|
+
MODEL_PRICING.each do |key, pricing|
|
79
|
+
next if key == 'default'
|
80
|
+
return pricing if model.include?(key.split('-').first) || key.include?(model.split('-').first)
|
81
|
+
end
|
82
|
+
|
83
|
+
MODEL_PRICING['default']
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
def count_tokens_tiktoken(text, model)
|
89
|
+
# Use tiktoken gem for accurate OpenAI token counting
|
90
|
+
encoding = Tiktoken.encoding_for_model(model)
|
91
|
+
encoding.encode(text).length
|
92
|
+
rescue => e
|
93
|
+
# Fallback if tiktoken fails
|
94
|
+
PromptWarden.configuration.logger.warn("Tiktoken failed for #{model}: #{e.message}")
|
95
|
+
count_tokens_approximate(text)
|
96
|
+
end
|
97
|
+
|
98
|
+
def count_tokens_approximate(text)
|
99
|
+
# Approximate token counting (roughly 4 characters per token)
|
100
|
+
# This is a simplified approximation - tiktoken is more accurate
|
101
|
+
(text.length / 4.0).round
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'time'
|
4
|
+
|
5
|
+
module PromptWarden
|
6
|
+
Event = Struct.new(
|
7
|
+
:id, :prompt, :response, :model,
|
8
|
+
:latency_ms, :cost_usd, :status, :rule, :timestamp, :alerts,
|
9
|
+
keyword_init: true
|
10
|
+
) do
|
11
|
+
def to_h
|
12
|
+
to_h = super
|
13
|
+
to_h[:timestamp] ||= Time.now.utc.iso8601
|
14
|
+
to_h[:alerts] ||= []
|
15
|
+
to_h
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'securerandom'
|
4
|
+
|
5
|
+
module PromptWarden
|
6
|
+
module Instrumentation
|
7
|
+
module Anthropic
|
8
|
+
def messages(params = {}, opts = {})
|
9
|
+
_pw_wrap(:messages, params) { super }
|
10
|
+
end
|
11
|
+
|
12
|
+
def complete(params = {}, opts = {})
|
13
|
+
_pw_wrap(:complete, params) { super }
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def _pw_wrap(_method_sym, params)
|
19
|
+
cfg = PromptWarden.configuration
|
20
|
+
prompt = _extract_prompt(params)
|
21
|
+
model = params[:model] || params['model']
|
22
|
+
|
23
|
+
# Use enhanced cost calculation
|
24
|
+
cost_estimate = PromptWarden.calculate_cost(prompt: prompt, model: model)
|
25
|
+
|
26
|
+
# Check for policy alerts before making the request
|
27
|
+
alerts = PromptWarden::Policy.instance.check_alerts(prompt: prompt, cost_estimate: cost_estimate)
|
28
|
+
|
29
|
+
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
30
|
+
resp = yield
|
31
|
+
latency = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
|
32
|
+
|
33
|
+
# Calculate actual cost with response tokens if available
|
34
|
+
response_content = _extract_response(resp)
|
35
|
+
response_tokens = resp.dig('usage', 'output_tokens') || resp.dig(:usage, :output_tokens)
|
36
|
+
actual_cost = PromptWarden.calculate_cost(
|
37
|
+
prompt: prompt,
|
38
|
+
model: model,
|
39
|
+
response_tokens: response_tokens
|
40
|
+
)
|
41
|
+
|
42
|
+
PromptWarden.record(
|
43
|
+
id: SecureRandom.uuid,
|
44
|
+
prompt: prompt,
|
45
|
+
response: response_content,
|
46
|
+
model: model,
|
47
|
+
latency_ms: latency,
|
48
|
+
cost_usd: actual_cost,
|
49
|
+
status: 'ok',
|
50
|
+
alerts: alerts
|
51
|
+
)
|
52
|
+
resp
|
53
|
+
rescue StandardError
|
54
|
+
PromptWarden.record(
|
55
|
+
id: SecureRandom.uuid,
|
56
|
+
model: model,
|
57
|
+
prompt: prompt,
|
58
|
+
status: 'failed',
|
59
|
+
alerts: alerts
|
60
|
+
)
|
61
|
+
raise
|
62
|
+
end
|
63
|
+
|
64
|
+
def _extract_prompt(params)
|
65
|
+
if params[:messages]
|
66
|
+
params[:messages].map { |m| m[:content] || m['content'] }.join("\n")
|
67
|
+
else
|
68
|
+
params[:prompt] || params['prompt'] || ''
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def _extract_response(resp)
|
73
|
+
if resp[:content]
|
74
|
+
resp[:content].map { |c| c[:text] || c['text'] }.join("\n")
|
75
|
+
elsif resp['content']
|
76
|
+
resp['content'].map { |c| c[:text] || c['text'] }.join("\n")
|
77
|
+
else
|
78
|
+
resp['completion'] || resp[:completion] || ''
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
Anthropic::Client.prepend(PromptWarden::Instrumentation::Anthropic) if defined?(Anthropic::Client)
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'securerandom'
|
4
|
+
|
5
|
+
module PromptWarden
|
6
|
+
module Instrumentation
|
7
|
+
module Langchain
|
8
|
+
def chat(prompt, **opts)
|
9
|
+
_pw_wrap(:chat, { prompt: prompt, **opts }) { super }
|
10
|
+
end
|
11
|
+
|
12
|
+
def complete(prompt, **opts)
|
13
|
+
_pw_wrap(:complete, { prompt: prompt, **opts }) { super }
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def _pw_wrap(method_sym, params)
|
19
|
+
cfg = PromptWarden.configuration
|
20
|
+
prompt = params[:prompt] || params['prompt']
|
21
|
+
model = _extract_model(params)
|
22
|
+
|
23
|
+
# Use enhanced cost calculation
|
24
|
+
cost_estimate = PromptWarden.calculate_cost(prompt: prompt, model: model)
|
25
|
+
|
26
|
+
# Check for policy alerts before making the request
|
27
|
+
alerts = PromptWarden::Policy.instance.check_alerts(prompt: prompt, cost_estimate: cost_estimate)
|
28
|
+
|
29
|
+
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
30
|
+
resp = yield
|
31
|
+
latency = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
|
32
|
+
|
33
|
+
# Calculate actual cost (Langchain doesn't provide token counts)
|
34
|
+
actual_cost = PromptWarden.calculate_cost(prompt: prompt, model: model)
|
35
|
+
|
36
|
+
PromptWarden.record(
|
37
|
+
id: SecureRandom.uuid,
|
38
|
+
prompt: prompt,
|
39
|
+
response: resp.is_a?(String) ? resp : resp.to_s,
|
40
|
+
model: model,
|
41
|
+
latency_ms: latency,
|
42
|
+
cost_usd: actual_cost,
|
43
|
+
status: 'ok',
|
44
|
+
alerts: alerts
|
45
|
+
)
|
46
|
+
resp
|
47
|
+
rescue StandardError
|
48
|
+
PromptWarden.record(
|
49
|
+
id: SecureRandom.uuid,
|
50
|
+
model: model,
|
51
|
+
prompt: prompt,
|
52
|
+
status: 'failed',
|
53
|
+
alerts: alerts
|
54
|
+
)
|
55
|
+
raise
|
56
|
+
end
|
57
|
+
|
58
|
+
def _extract_model(params)
|
59
|
+
# Try to extract model from various sources
|
60
|
+
model = params[:model] ||
|
61
|
+
params['model'] ||
|
62
|
+
self.class.name ||
|
63
|
+
name ||
|
64
|
+
'default'
|
65
|
+
|
66
|
+
# Clean up class names to model names
|
67
|
+
model = model.gsub(/^Langchain::LLM::/, '').downcase if model.include?('Langchain::LLM::')
|
68
|
+
model
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
if defined?(Langchain::LLM::Base)
|
75
|
+
Langchain::LLM::Base.prepend(PromptWarden::Instrumentation::Langchain)
|
76
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'securerandom'
|
4
|
+
require 'time'
|
5
|
+
|
6
|
+
module PromptWarden
|
7
|
+
module Instrumentation
|
8
|
+
module OpenAI
|
9
|
+
# --- wrappers ------------------------------------------------------
|
10
|
+
def chat(params = {}, opts = {})
|
11
|
+
_pw_wrap(:chat, params) { super }
|
12
|
+
end
|
13
|
+
|
14
|
+
def completions(params = {}, opts = {})
|
15
|
+
_pw_wrap(:completions, params) { super }
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def _pw_wrap(_method_sym, params)
|
21
|
+
cfg = PromptWarden.configuration
|
22
|
+
prompt = _extract_prompt(params)
|
23
|
+
model = params[:model]
|
24
|
+
|
25
|
+
# Use enhanced cost calculation
|
26
|
+
cost_estimate = PromptWarden.calculate_cost(prompt: prompt, model: model)
|
27
|
+
|
28
|
+
# Check for policy alerts before making the request
|
29
|
+
alerts = PromptWarden::Policy.instance.check_alerts(prompt: prompt, cost_estimate: cost_estimate)
|
30
|
+
|
31
|
+
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
32
|
+
resp = yield
|
33
|
+
latency = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
|
34
|
+
|
35
|
+
# Calculate actual cost with response tokens if available
|
36
|
+
response_content = resp.dig('choices', 0, 'message', 'content')
|
37
|
+
response_tokens = resp.dig('usage', 'completion_tokens')
|
38
|
+
actual_cost = PromptWarden.calculate_cost(
|
39
|
+
prompt: prompt,
|
40
|
+
model: model,
|
41
|
+
response_tokens: response_tokens
|
42
|
+
)
|
43
|
+
|
44
|
+
PromptWarden.record(
|
45
|
+
id: SecureRandom.uuid,
|
46
|
+
prompt: prompt,
|
47
|
+
response: response_content,
|
48
|
+
model: model,
|
49
|
+
latency_ms: latency,
|
50
|
+
cost_usd: actual_cost,
|
51
|
+
status: 'ok',
|
52
|
+
alerts: alerts
|
53
|
+
)
|
54
|
+
resp
|
55
|
+
rescue StandardError
|
56
|
+
PromptWarden.record(
|
57
|
+
id: SecureRandom.uuid,
|
58
|
+
model: params[:model],
|
59
|
+
prompt: prompt,
|
60
|
+
status: 'failed',
|
61
|
+
alerts: alerts
|
62
|
+
)
|
63
|
+
raise
|
64
|
+
end
|
65
|
+
|
66
|
+
def _extract_prompt(params)
|
67
|
+
if params[:messages]
|
68
|
+
params[:messages].map { |m| m[:content] }.join("\n")
|
69
|
+
else
|
70
|
+
params[:prompt].to_s
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
if defined?(OpenAI::Client)
|
78
|
+
OpenAI::Client.prepend(PromptWarden::Instrumentation::OpenAI)
|
79
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
require 'singleton'
|
5
|
+
|
6
|
+
module PromptWarden
|
7
|
+
class PolicyError < StandardError; end
|
8
|
+
|
9
|
+
class Policy
|
10
|
+
include Singleton
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
path = ENV['PROMPT_WARDEN_POLICY'] || 'config/promptwarden.yml'
|
14
|
+
@rules = File.exist?(path) ? YAML.load_file(path) : {}
|
15
|
+
@rules = @rules.transform_keys(&:to_s)
|
16
|
+
end
|
17
|
+
|
18
|
+
def reload!
|
19
|
+
path = ENV['PROMPT_WARDEN_POLICY'] || 'config/promptwarden.yml'
|
20
|
+
@rules = File.exist?(path) ? YAML.load_file(path) : {}
|
21
|
+
@rules = @rules.transform_keys(&:to_s)
|
22
|
+
end
|
23
|
+
|
24
|
+
def parse_regex(str)
|
25
|
+
# Handles strings like "/pattern/i" or "/pattern/"
|
26
|
+
if str.is_a?(String) && str =~ %r{^/(.*?)/([imx]*)$}
|
27
|
+
pattern = ::Regexp.last_match(1)
|
28
|
+
flags = ::Regexp.last_match(2)
|
29
|
+
options = 0
|
30
|
+
options |= Regexp::IGNORECASE if flags.include?('i')
|
31
|
+
options |= Regexp::MULTILINE if flags.include?('m')
|
32
|
+
options |= Regexp::EXTENDED if flags.include?('x')
|
33
|
+
Regexp.new(pattern, options)
|
34
|
+
else
|
35
|
+
Regexp.new(str.to_s)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def check_alerts(prompt:, cost_estimate:)
|
40
|
+
alerts = []
|
41
|
+
|
42
|
+
# Check warn_if_regex patterns
|
43
|
+
Array(@rules['warn_if_regex']).each do |regex|
|
44
|
+
r = parse_regex(regex)
|
45
|
+
if prompt.match?(r)
|
46
|
+
alerts << { type: 'regex', rule: regex, level: 'warn' }
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Check cost limits
|
51
|
+
max_cost = @rules['max_cost_usd'].to_f
|
52
|
+
if max_cost.positive? && cost_estimate > max_cost
|
53
|
+
alerts << { type: 'cost', limit: max_cost, level: 'block' }
|
54
|
+
end
|
55
|
+
|
56
|
+
alerts
|
57
|
+
end
|
58
|
+
|
59
|
+
def check!(prompt:, cost_estimate:)
|
60
|
+
max_cost = @rules['max_cost_usd'].to_f
|
61
|
+
if max_cost.positive? && cost_estimate > max_cost
|
62
|
+
raise PolicyError, "Cost #{cost_estimate}$ exceeds max_cost_usd #{max_cost}"
|
63
|
+
end
|
64
|
+
|
65
|
+
Array(@rules['reject_if_regex']).each do |regex|
|
66
|
+
r = parse_regex(regex)
|
67
|
+
raise PolicyError, "Prompt matches reject regex #{regex}" if prompt.match?(r)
|
68
|
+
end
|
69
|
+
|
70
|
+
:ok
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rails/railtie'
|
4
|
+
|
5
|
+
module PromptWarden
|
6
|
+
class Railtie < Rails::Railtie
|
7
|
+
initializer 'prompt_warden.configure' do
|
8
|
+
# Create `config/initializers/prompt_warden.rb` when user runs generator
|
9
|
+
end
|
10
|
+
|
11
|
+
rake_tasks do
|
12
|
+
# Add future rake tasks here
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|