RubyGems - prompt_warden - Versions diffs - 0.1.0 → 0.1.1 - Mend

prompt_warden 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

checksums.yaml +4 -4
data/.gitignore +3 -0
data/CHANGELOG.md +33 -2
data/Gemfile +4 -4
data/Gemfile.lock +21 -1
data/README.md +217 -19
data/Rakefile +19 -2
data/bin/console +3 -3
data/bin/pw_tail +8 -0
data/examples/policy.yml +22 -0
data/lib/prompt_warden/adapter.rb +59 -0
data/lib/prompt_warden/buffer.rb +60 -0
data/lib/prompt_warden/cli.rb +199 -0
data/lib/prompt_warden/configuration.rb +39 -0
data/lib/prompt_warden/cost_calculator.rb +105 -0
data/lib/prompt_warden/event.rb +18 -0
data/lib/prompt_warden/instrumentation/anthropic.rb +85 -0
data/lib/prompt_warden/instrumentation/langchain.rb +76 -0
data/lib/prompt_warden/instrumentation/openai.rb +79 -0
data/lib/prompt_warden/policy.rb +73 -0
data/lib/prompt_warden/railtie.rb +15 -0
data/lib/prompt_warden/uploader.rb +93 -0
data/lib/prompt_warden/version.rb +1 -1
data/lib/prompt_warden.rb +32 -3
data/prompt_warden.gemspec +33 -25
data/spec/adapter_auto_detect_spec.rb +65 -0
data/spec/anthropic_adapter_spec.rb +137 -0
data/spec/buffer_spec.rb +44 -0
data/spec/cli_spec.rb +255 -0
data/spec/configuration_spec.rb +30 -0
data/spec/cost_calculator_spec.rb +216 -0
data/spec/event_spec.rb +30 -0
data/spec/langchain_adapter_spec.rb +139 -0
data/spec/openai_adapter_spec.rb +153 -0
data/spec/policy_spec.rb +170 -0
data/spec/prompt_warden_spec.rb +2 -2
data/spec/spec_helper.rb +7 -8
data/spec/uploader_spec.rb +79 -0
metadata +98 -15

data/spec/cli_spec.rb ADDED Viewed

@@ -0,0 +1,255 @@
+# frozen_string_literal: true
+require 'time'
+RSpec.describe PromptWarden::CLI do
+  describe PromptWarden::CLI::EventFormatter do
+    let(:event) do
+      PromptWarden::Event.new(
+        id: 'test-id',
+        prompt: 'What is the ETA for this project?',
+        response: 'The ETA is 2 weeks',
+        model: 'gpt-4o',
+        latency_ms: 1250,
+        cost_usd: 0.005,
+        status: 'ok',
+        timestamp: '2024-01-15T10:30:00Z',
+        alerts: [
+          { 'type' => 'regex', 'rule' => '/ETA/i', 'level' => 'warn' }
+        ]
+      )
+    end
+    describe '.format' do
+      it 'formats events in human readable format by default' do
+        output = described_class.format(event)
+        expect(output).to include('10:30:00')
+        expect(output).to include('gpt-4o')
+        expect(output).to include('$0.005')
+        expect(output).to include('ok')
+        expect(output).to include('⚠️ /ETA/i')
+        expect(output).to include('What is the ETA for this project?')
+      end
+      it 'formats events in JSON format when requested' do
+        output = described_class.format(event, json: true)
+        parsed = JSON.parse(output)
+        expect(parsed['id']).to eq('test-id')
+        expect(parsed['model']).to eq('gpt-4o')
+        expect(parsed['alerts']).to eq([{ 'type' => 'regex', 'rule' => '/ETA/i', 'level' => 'warn' }])
+      end
+      it 'handles events with cost alerts' do
+        cost_event = PromptWarden::Event.new(
+          model: 'gpt-4o',
+          cost_usd: 0.75,
+          alerts: [{ 'type' => 'cost', 'limit' => 0.50, 'level' => 'block' }]
+        )
+        output = described_class.format(cost_event)
+        expect(output).to include('💰 >$0.5')
+      end
+      it 'handles events with multiple alerts' do
+        multi_event = PromptWarden::Event.new(
+          model: 'gpt-4o',
+          alerts: [
+            { 'type' => 'regex', 'rule' => '/ETA/i', 'level' => 'warn' },
+            { 'type' => 'regex', 'rule' => '/urgent/i', 'level' => 'warn' }
+          ]
+        )
+        output = described_class.format(multi_event)
+        expect(output).to include('⚠️ /ETA/i')
+        expect(output).to include('⚠️ /urgent/i')
+      end
+      it 'handles events with no alerts' do
+        no_alert_event = PromptWarden::Event.new(
+          model: 'gpt-4o',
+          alerts: []
+        )
+        output = described_class.format(no_alert_event)
+        expect(output).not_to include('⚠️')
+        expect(output).not_to include('💰')
+      end
+      it 'truncates long prompts' do
+        long_prompt = 'A' * 100
+        long_event = PromptWarden::Event.new(
+          model: 'gpt-4o',
+          prompt: long_prompt
+        )
+        output = described_class.format(long_event)
+        expect(output).to include('A' * 50 + '...')
+      end
+      it 'handles missing fields gracefully' do
+        minimal_event = PromptWarden::Event.new
+        output = described_class.format(minimal_event)
+        expect(output).to include('unknown')
+        expect(output).to include('N/A')
+      end
+    end
+  end
+  describe PromptWarden::CLI::EventFilter do
+    let(:event) do
+      PromptWarden::Event.new(
+        model: 'gpt-4o',
+        cost_usd: 0.005,
+        status: 'ok',
+        alerts: [{ 'type' => 'regex', 'rule' => '/ETA/i', 'level' => 'warn' }]
+      )
+    end
+    describe '.matches?' do
+      it 'matches events with no filters' do
+        expect(described_class.matches?(event, {})).to be true
+      end
+      it 'filters by alerts' do
+        expect(described_class.matches?(event, { alerts: true })).to be true
+        no_alert_event = PromptWarden::Event.new(alerts: [])
+        expect(described_class.matches?(no_alert_event, { alerts: true })).to be false
+      end
+      it 'filters by model' do
+        expect(described_class.matches?(event, { model: 'gpt-4o' })).to be true
+        expect(described_class.matches?(event, { model: 'claude-3' })).to be false
+      end
+      it 'filters by cost' do
+        expect(described_class.matches?(event, { cost: 0.01 })).to be false  # 0.005 < 0.01
+        expect(described_class.matches?(event, { cost: 0.001 })).to be true  # 0.005 > 0.001
+      end
+      it 'filters by status' do
+        expect(described_class.matches?(event, { status: 'ok' })).to be true
+        expect(described_class.matches?(event, { status: 'failed' })).to be false
+      end
+      it 'combines multiple filters' do
+        expect(described_class.matches?(event, {
+          alerts: true,
+          model: 'gpt-4o',
+          status: 'ok'
+        })).to be true
+        expect(described_class.matches?(event, {
+          alerts: true,
+          model: 'claude-3'  # Wrong model
+        })).to be false
+      end
+      it 'handles events with missing cost' do
+        no_cost_event = PromptWarden::Event.new(model: 'gpt-4o')
+        expect(described_class.matches?(no_cost_event, { cost: 0.01 })).to be false
+      end
+    end
+  end
+  describe PromptWarden::CLI::CLIBuffer do
+    let(:buffer) { described_class.new }
+    let(:event) { PromptWarden::Event.new(id: 'test-id') }
+    describe '#push' do
+      it 'stores events' do
+        buffer.push(event)
+        expect(buffer.recent_events).to include(event)
+      end
+      it 'calls listeners when events are pushed' do
+        listener_called = false
+        buffer.on_event { listener_called = true }
+        buffer.push(event)
+        expect(listener_called).to be true
+      end
+      it 'calls multiple listeners' do
+        calls = []
+        buffer.on_event { calls << 1 }
+        buffer.on_event { calls << 2 }
+        buffer.push(event)
+        expect(calls).to eq([1, 2])
+      end
+    end
+    describe '#recent_events' do
+      it 'returns recent events with limit' do
+        5.times { |i| buffer.push(PromptWarden::Event.new(id: "event-#{i}")) }
+        recent = buffer.recent_events(limit: 3)
+        expect(recent.length).to eq(3)
+        expect(recent.last.id).to eq('event-4')
+      end
+      it 'returns all events if limit exceeds count' do
+        3.times { |i| buffer.push(PromptWarden::Event.new(id: "event-#{i}")) }
+        recent = buffer.recent_events(limit: 10)
+        expect(recent.length).to eq(3)
+      end
+    end
+  end
+  describe PromptWarden::CLI::Tail do
+    describe '.parse_options' do
+      it 'parses no arguments' do
+        options = described_class.parse_options([])
+        expect(options[:alerts]).to be false
+        expect(options[:follow]).to be true
+        expect(options[:json]).to be false
+      end
+      it 'parses --alerts flag' do
+        options = described_class.parse_options(['--alerts'])
+        expect(options[:alerts]).to be true
+      end
+      it 'parses --model argument' do
+        options = described_class.parse_options(['--model', 'gpt-4o'])
+        expect(options[:model]).to eq('gpt-4o')
+      end
+      it 'parses --cost argument' do
+        options = described_class.parse_options(['--cost', '0.50'])
+        expect(options[:cost]).to eq(0.50)
+      end
+      it 'parses --status argument' do
+        options = described_class.parse_options(['--status', 'failed'])
+        expect(options[:status]).to eq('failed')
+      end
+      it 'parses --limit argument' do
+        options = described_class.parse_options(['--limit', '5'])
+        expect(options[:limit]).to eq(5)
+      end
+      it 'parses --json flag' do
+        options = described_class.parse_options(['--json'])
+        expect(options[:json]).to be true
+      end
+      it 'parses --no-follow flag' do
+        options = described_class.parse_options(['--no-follow'])
+        expect(options[:follow]).to be false
+      end
+    end
+    describe '.configure_prompt_warden' do
+      it 'does not override existing configuration' do
+        PromptWarden.configure do |config|
+          config.project_token = 'existing-token'
+        end
+        described_class.configure_prompt_warden
+        expect(PromptWarden.configuration.project_token).to eq('existing-token')
+      end
+      it 'sets CLI configuration when no existing config' do
+        # Test that the method doesn't raise errors
+        expect { described_class.configure_prompt_warden }.not_to raise_error
+      end
+    end
+  end
+end

data/spec/configuration_spec.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+RSpec.describe PromptWarden::Configuration do
+  subject(:config) { described_class.new }
+  it 'defaults flush_interval to 1 second' do
+    expect(config.flush_interval).to eq 1.0
+  end
+  it 'pulls project token from ENV by default' do
+    with_env('PROMPT_WARDEN_TOKEN' => 'env_tok') do
+      expect(described_class.new.project_token).to eq 'env_tok'
+    end
+  end
+  it 'raises if project_token missing after configure' do
+    expect do
+      PromptWarden.configure { |c| c.project_token = nil }
+    end.to raise_error(ArgumentError)
+  end
+  # helper
+  def with_env(env)
+    old = ENV.to_h.slice(*env.keys)
+    env.each { |k, v| ENV[k] = v }
+    yield
+  ensure
+    old.each { |k, v| ENV[k] = v }
+  end
+end

data/spec/cost_calculator_spec.rb ADDED Viewed

@@ -0,0 +1,216 @@
+# frozen_string_literal: true
+RSpec.describe PromptWarden::CostCalculator do
+  describe '.calculate_cost' do
+    it 'calculates cost for OpenAI models' do
+      cost = described_class.calculate_cost(
+        prompt: 'Hello, world!',
+        model: 'gpt-4o'
+      )
+      expect(cost).to be > 0
+      expect(cost).to be < 0.01  # Should be very small for short prompt
+    end
+    it 'calculates cost for Anthropic models' do
+      cost = described_class.calculate_cost(
+        prompt: 'Hello, world!',
+        model: 'claude-3-sonnet-20240229'
+      )
+      expect(cost).to be > 0
+      expect(cost).to be < 0.01
+    end
+    it 'uses response tokens when provided' do
+      cost = described_class.calculate_cost(
+        prompt: 'Hello, world!',
+        model: 'gpt-4o',
+        response_tokens: 100
+      )
+      expect(cost).to be > 0
+    end
+    it 'handles nil prompt' do
+      cost = described_class.calculate_cost(
+        prompt: nil,
+        model: 'gpt-4o'
+      )
+      expect(cost).to eq(0)
+    end
+    it 'handles empty prompt' do
+      cost = described_class.calculate_cost(
+        prompt: '',
+        model: 'gpt-4o'
+      )
+      expect(cost).to eq(0)
+    end
+    it 'uses default pricing for unknown models' do
+      cost = described_class.calculate_cost(
+        prompt: 'Hello, world!',
+        model: 'unknown-model'
+      )
+      expect(cost).to be > 0
+    end
+  end
+  describe '.count_tokens' do
+    it 'counts tokens for text' do
+      count = described_class.count_tokens('Hello, world!')
+      expect(count).to be > 0
+    end
+    it 'returns 0 for nil text' do
+      count = described_class.count_tokens(nil)
+      expect(count).to eq(0)
+    end
+    it 'returns 0 for empty text' do
+      count = described_class.count_tokens('')
+      expect(count).to eq(0)
+    end
+    it 'uses approximate counting for non-OpenAI models' do
+      count = described_class.count_tokens('Hello, world!', 'claude-3-sonnet')
+      expect(count).to be > 0
+    end
+    context 'with tiktoken available' do
+      before do
+        # Mock tiktoken if not available
+        unless defined?(Tiktoken)
+          stub_const('Tiktoken', Class.new)
+          allow(Tiktoken).to receive(:encoding_for_model).and_return(
+            double('encoding', encode: [1, 2, 3, 4, 5])
+          )
+        end
+      end
+      it 'uses tiktoken for OpenAI models' do
+        count = described_class.count_tokens('Hello, world!', 'gpt-4o')
+        expect(count).to eq(5)
+      end
+      it 'falls back to approximate counting if tiktoken fails' do
+        allow(Tiktoken).to receive(:encoding_for_model).and_raise(StandardError.new('tiktoken error'))
+        count = described_class.count_tokens('Hello, world!', 'gpt-4o')
+        expect(count).to be > 0
+      end
+    end
+  end
+  describe '.estimate_output_tokens' do
+    it 'estimates output tokens based on prompt length' do
+      tokens = described_class.estimate_output_tokens('Hello, world!', 'gpt-4o')
+      expect(tokens).to be > 0
+    end
+    it 'adjusts estimation based on model type' do
+      short_prompt = 'Hi'
+      gpt4o_tokens = described_class.estimate_output_tokens(short_prompt, 'gpt-4o')
+      claude_opus_tokens = described_class.estimate_output_tokens(short_prompt, 'claude-3-opus-20240229')
+      claude_haiku_tokens = described_class.estimate_output_tokens(short_prompt, 'claude-3-haiku-20240307')
+      # Claude Opus should estimate more tokens (more verbose)
+      expect(claude_opus_tokens).to be >= gpt4o_tokens
+      # Claude Haiku should estimate fewer tokens (more concise)
+      expect(claude_haiku_tokens).to be <= gpt4o_tokens
+    end
+    it 'handles nil model' do
+      tokens = described_class.estimate_output_tokens('Hello, world!', nil)
+      expect(tokens).to be > 0
+    end
+  end
+  describe '.get_model_pricing' do
+    it 'returns exact match pricing' do
+      pricing = described_class.get_model_pricing('gpt-4o')
+      expect(pricing).to eq({ input: 0.0025, output: 0.01 })
+    end
+    it 'returns default pricing for unknown models' do
+      pricing = described_class.get_model_pricing('unknown-model')
+      expect(pricing).to eq({ input: 0.001, output: 0.002 })
+    end
+    it 'handles nil model' do
+      pricing = described_class.get_model_pricing(nil)
+      expect(pricing).to eq({ input: 0.001, output: 0.002 })
+    end
+    it 'finds partial matches for model variants' do
+      # Test that it can find pricing for model variants
+      pricing = described_class.get_model_pricing('gpt-4o-mini')
+      expect(pricing).to eq({ input: 0.00015, output: 0.0006 })
+    end
+  end
+  describe 'MODEL_PRICING' do
+    it 'includes major OpenAI models' do
+      expect(described_class::MODEL_PRICING).to include('gpt-4o')
+      expect(described_class::MODEL_PRICING).to include('gpt-4o-mini')
+      expect(described_class::MODEL_PRICING).to include('gpt-3.5-turbo')
+    end
+    it 'includes major Anthropic models' do
+      expect(described_class::MODEL_PRICING).to include('claude-3-opus-20240229')
+      expect(described_class::MODEL_PRICING).to include('claude-3-sonnet-20240229')
+      expect(described_class::MODEL_PRICING).to include('claude-3-haiku-20240307')
+    end
+    it 'includes default pricing' do
+      expect(described_class::MODEL_PRICING).to include('default')
+    end
+    it 'has correct pricing structure' do
+      described_class::MODEL_PRICING.each do |model, pricing|
+        expect(pricing).to have_key(:input)
+        expect(pricing).to have_key(:output)
+        expect(pricing[:input]).to be > 0
+        expect(pricing[:output]).to be > 0
+      end
+    end
+  end
+  describe 'integration with PromptWarden' do
+    it 'can be called through PromptWarden.calculate_cost' do
+      cost = PromptWarden.calculate_cost(
+        prompt: 'Hello, world!',
+        model: 'gpt-4o'
+      )
+      expect(cost).to be > 0
+    end
+    it 'handles different prompt lengths' do
+      short_cost = PromptWarden.calculate_cost(
+        prompt: 'Hi',
+        model: 'gpt-4o'
+      )
+      long_cost = PromptWarden.calculate_cost(
+        prompt: 'This is a much longer prompt that should cost more to process',
+        model: 'gpt-4o'
+      )
+      expect(long_cost).to be > short_cost
+    end
+    it 'calculates different costs for different models' do
+      gpt4o_cost = PromptWarden.calculate_cost(
+        prompt: 'Hello, world!',
+        model: 'gpt-4o'
+      )
+      claude_cost = PromptWarden.calculate_cost(
+        prompt: 'Hello, world!',
+        model: 'claude-3-opus-20240229'
+      )
+      # Different models should have different costs
+      expect(gpt4o_cost).not_to eq(claude_cost)
+    end
+  end
+end

data/spec/event_spec.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+RSpec.describe PromptWarden::Event do
+  it 'serialises to hash with timestamp' do
+    evt = described_class.new(prompt: 'Hi', response: 'Hello')
+    expect(evt.to_h).to include(:timestamp)
+  end
+  it 'includes alerts field in serialization' do
+    evt = described_class.new(prompt: 'Hi', response: 'Hello')
+    expect(evt.to_h).to include(:alerts)
+    expect(evt.to_h[:alerts]).to eq []
+  end
+  it 'preserves alerts when provided' do
+    alerts = [{ type: 'regex', rule: '/ETA/i', level: 'warn' }]
+    evt = described_class.new(
+      prompt: 'Hi',
+      response: 'Hello',
+      alerts: alerts
+    )
+    expect(evt.to_h[:alerts]).to eq alerts
+  end
+  it 'defaults alerts to empty array when not provided' do
+    evt = described_class.new(prompt: 'Hi', response: 'Hello')
+    expect(evt.alerts).to eq nil
+    expect(evt.to_h[:alerts]).to eq []
+  end
+end

data/spec/langchain_adapter_spec.rb ADDED Viewed

@@ -0,0 +1,139 @@
+# frozen_string_literal: true
+require_relative '../lib/prompt_warden/instrumentation/langchain'
+RSpec.describe 'PromptWarden Langchain adapter' do
+  before do
+    require 'prompt_warden'
+    PromptWarden.configure { |c| c.project_token = 'tok' }
+    allow(PromptWarden).to receive(:record)
+    allow(PromptWarden::Policy.instance).to receive(:check_alerts).and_return([])
+    allow(PromptWarden).to receive(:calculate_cost).and_return(0.005)
+  end
+  it 'records prompt usage via instrumentation for chat method' do
+    client = Object.new
+    client.extend(PromptWarden::Instrumentation::Langchain)
+    # Call the instrumentation method directly
+    client.send(:_pw_wrap, :chat, {
+      prompt: 'hi',
+      model: 'gpt-4o'
+    }) do
+      'langchain reply'
+    end
+    expect(PromptWarden).to have_received(:record).with(hash_including(
+      prompt: 'hi',
+      model: 'gpt-4o',
+      cost_usd: 0.005,
+      status: 'ok',
+      alerts: []
+    ))
+  end
+  it 'records prompt usage via instrumentation for complete method' do
+    client = Object.new
+    client.extend(PromptWarden::Instrumentation::Langchain)
+    # Call the instrumentation method directly
+    client.send(:_pw_wrap, :complete, {
+      prompt: 'hello',
+      model: 'claude-3-sonnet'
+    }) do
+      'langchain completion'
+    end
+    expect(PromptWarden).to have_received(:record).with(hash_including(
+      prompt: 'hello',
+      model: 'claude-3-sonnet',
+      cost_usd: 0.005,
+      status: 'ok',
+      alerts: []
+    ))
+  end
+  it 'includes alerts when policy alerts are detected' do
+    alerts = [{ type: 'regex', rule: '/confidential/i', level: 'warn' }]
+    allow(PromptWarden::Policy.instance).to receive(:check_alerts).and_return(alerts)
+    client = Object.new
+    client.extend(PromptWarden::Instrumentation::Langchain)
+    # Call the instrumentation method directly
+    client.send(:_pw_wrap, :chat, {
+      prompt: 'This is confidential information',
+      model: 'gpt-4o'
+    }) do
+      'langchain reply'
+    end
+    expect(PromptWarden).to have_received(:record).with(hash_including(
+      prompt: 'This is confidential information',
+      model: 'gpt-4o',
+      status: 'ok',
+      alerts: alerts
+    ))
+  end
+  it 'records failed status on error with alerts' do
+    alerts = [{ type: 'regex', rule: '/ETA/i', level: 'warn' }]
+    allow(PromptWarden::Policy.instance).to receive(:check_alerts).and_return(alerts)
+    client = Object.new
+    client.extend(PromptWarden::Instrumentation::Langchain)
+    expect {
+      client.send(:_pw_wrap, :chat, {
+        prompt: 'What is the ETA?',
+        model: 'gpt-4o'
+      }) do
+        raise 'test error'
+      end
+    }.to raise_error('test error')
+    expect(PromptWarden).to have_received(:record).with(hash_including(
+      prompt: 'What is the ETA?',
+      model: 'gpt-4o',
+      status: 'failed',
+      alerts: alerts
+    ))
+  end
+  it 'extracts model from class name when not provided' do
+    client = Object.new
+    client.extend(PromptWarden::Instrumentation::Langchain)
+    allow(client).to receive(:class).and_return(double(name: 'Langchain::LLM::OpenAI'))
+    client.send(:_pw_wrap, :chat, {
+      prompt: 'hi'
+    }) do
+      'langchain reply'
+    end
+    expect(PromptWarden).to have_received(:record).with(hash_including(
+      prompt: 'hi',
+      model: 'openai',
+      status: 'ok',
+      alerts: []
+    ))
+  end
+  it 'uses enhanced cost calculation for estimates and actual costs' do
+    client = Object.new
+    client.extend(PromptWarden::Instrumentation::Langchain)
+    # Mock cost calculation calls
+    allow(PromptWarden).to receive(:calculate_cost).with(
+      prompt: 'hi',
+      model: 'gpt-4o'
+    ).and_return(0.003)  # Estimate
+    allow(PromptWarden).to receive(:calculate_cost).with(
+      prompt: 'hi',
+      model: 'gpt-4o'
+    ).and_return(0.005)  # Actual cost (Langchain doesn't provide token counts)
+    client.send(:_pw_wrap, :chat, {
+      prompt: 'hi',
+      model: 'gpt-4o'
+    }) do
+      'langchain reply'
+    end
+    expect(PromptWarden).to have_received(:calculate_cost).with(
+      prompt: 'hi',
+      model: 'gpt-4o'
+    ).twice  # Once for estimate, once for actual
+  end
+end