RubyGems - guardrails-ruby - Versions diffs - 0.1.0 - Mend

guardrails-ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

checksums.yaml +7 -0
data/CLAUDE.md +507 -0
data/Gemfile +2 -0
data/LICENSE +21 -0
data/README.md +243 -0
data/Rakefile +9 -0
data/examples/basic.rb +64 -0
data/examples/custom_check.rb +103 -0
data/examples/rails_controller.rb +73 -0
data/guardrails-ruby.gemspec +30 -0
data/lib/guardrails_ruby/check.rb +64 -0
data/lib/guardrails_ruby/checks/competitor_mention.rb +36 -0
data/lib/guardrails_ruby/checks/encoding.rb +33 -0
data/lib/guardrails_ruby/checks/format.rb +35 -0
data/lib/guardrails_ruby/checks/hallucinated_emails.rb +30 -0
data/lib/guardrails_ruby/checks/hallucinated_urls.rb +38 -0
data/lib/guardrails_ruby/checks/keyword_filter.rb +33 -0
data/lib/guardrails_ruby/checks/max_length.rb +30 -0
data/lib/guardrails_ruby/checks/pii.rb +54 -0
data/lib/guardrails_ruby/checks/prompt_injection.rb +36 -0
data/lib/guardrails_ruby/checks/relevance.rb +43 -0
data/lib/guardrails_ruby/checks/topic.rb +25 -0
data/lib/guardrails_ruby/checks/toxic_language.rb +28 -0
data/lib/guardrails_ruby/configuration.rb +15 -0
data/lib/guardrails_ruby/guard.rb +129 -0
data/lib/guardrails_ruby/middleware.rb +30 -0
data/lib/guardrails_ruby/rails/controller.rb +57 -0
data/lib/guardrails_ruby/rails/railtie.rb +20 -0
data/lib/guardrails_ruby/redactors/keyword_redactor.rb +33 -0
data/lib/guardrails_ruby/redactors/pii_redactor.rb +59 -0
data/lib/guardrails_ruby/result.rb +53 -0
data/lib/guardrails_ruby/version.rb +5 -0
data/lib/guardrails_ruby/violation.rb +41 -0
data/lib/guardrails_ruby.rb +38 -0
metadata +115 -0

data/lib/guardrails_ruby/checks/hallucinated_emails.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+module GuardrailsRuby
+  module Checks
+    class HallucinatedEmails < Check
+      check_name :hallucinated_emails
+      direction :output
+      EMAIL_PATTERN = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/
+      def call(text, context: {})
+        emails = text.scan(EMAIL_PATTERN)
+        return pass! if emails.empty?
+        source_context = context[:source_context] || ""
+        source_emails = source_context.scan(EMAIL_PATTERN).map(&:downcase)
+        hallucinated = emails.reject { |e| source_emails.include?(e.downcase) }
+        if hallucinated.any?
+          fail! "Potentially hallucinated emails: #{hallucinated.join(', ')}",
+            action: @options.fetch(:action, :warn),
+            matches: hallucinated
+        else
+          pass!
+        end
+      end
+    end
+  end
+end

data/lib/guardrails_ruby/checks/hallucinated_urls.rb ADDED Viewed

@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+module GuardrailsRuby
+  module Checks
+    class HallucinatedURLs < Check
+      check_name :hallucinated_urls
+      direction :output
+      URL_PATTERN = %r{https?://[^\s<>"{}|\\^`\[\]]+}
+      def call(text, context: {})
+        urls = text.scan(URL_PATTERN)
+        return pass! if urls.empty?
+        source_context = context[:source_context] || ""
+        source_urls = source_context.scan(URL_PATTERN)
+        hallucinated = urls.reject do |url|
+          source_urls.any? { |s| normalize(url).start_with?(normalize(s)) }
+        end
+        if hallucinated.any?
+          fail! "Potentially hallucinated URLs: #{hallucinated.join(', ')}",
+            action: @options.fetch(:action, :warn),
+            matches: hallucinated
+        else
+          pass!
+        end
+      end
+      private
+      def normalize(url)
+        url.downcase.chomp("/")
+      end
+    end
+  end
+end

data/lib/guardrails_ruby/checks/keyword_filter.rb ADDED Viewed

@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+module GuardrailsRuby
+  module Checks
+    class KeywordFilter < Check
+      check_name :keyword_filter
+      direction :both
+      def call(text, context: {})
+        blocklist = @options.fetch(:blocklist, [])
+        allowlist = @options.fetch(:allowlist, [])
+        text_lower = text.downcase
+        if blocklist.any?
+          found = blocklist.select { |kw| text_lower.include?(kw.downcase) }
+          if found.any?
+            return fail!("Blocked keywords found: #{found.join(', ')}", matches: found)
+          end
+        end
+        if allowlist.any?
+          has_allowed = allowlist.any? { |kw| text_lower.include?(kw.downcase) }
+          unless has_allowed
+            return fail!("No allowed keywords found. Expected one of: #{allowlist.join(', ')}")
+          end
+        end
+        pass!
+      end
+    end
+  end
+end

data/lib/guardrails_ruby/checks/max_length.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+module GuardrailsRuby
+  module Checks
+    class MaxLength < Check
+      check_name :max_length
+      direction :input
+      def call(text, context: {})
+        max_chars = @options[:chars]
+        max_tokens = @options[:tokens]
+        if max_chars && text.length > max_chars
+          fail! "Input exceeds maximum length of #{max_chars} characters (got #{text.length})"
+        elsif max_tokens && estimate_tokens(text) > max_tokens
+          fail! "Input exceeds maximum length of #{max_tokens} tokens (estimated #{estimate_tokens(text)})"
+        else
+          pass!
+        end
+      end
+      private
+      # Simple token estimation (~4 chars per token)
+      def estimate_tokens(text)
+        (text.length / 4.0).ceil
+      end
+    end
+  end
+end

data/lib/guardrails_ruby/checks/pii.rb ADDED Viewed

@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+module GuardrailsRuby
+  module Checks
+    class PII < Check
+      check_name :pii
+      direction :both
+      PATTERNS = {
+        ssn: /\b\d{3}-\d{2}-\d{4}\b/,
+        credit_card: /\b(?:\d{4}[- ]?){3}\d{4}\b/,
+        email: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/,
+        phone_us: /\b(?:\+?1[-.]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/,
+        ip_address: /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/,
+        date_of_birth: /\b(?:DOB|date of birth|born)[:\s]*\d{1,2}[\/\-]\d{1,2}[\/\-]\d{2,4}\b/i
+      }.freeze
+      REDACT_MAP = {
+        ssn: "[SSN REDACTED]",
+        credit_card: "[CC REDACTED]",
+        email: "[EMAIL REDACTED]",
+        phone_us: "[PHONE REDACTED]",
+        ip_address: "[IP REDACTED]",
+        date_of_birth: "[DOB REDACTED]"
+      }.freeze
+      def call(text, context: {})
+        found = {}
+        PATTERNS.each do |type, pattern|
+          matches = text.scan(pattern)
+          found[type] = matches if matches.any?
+        end
+        if found.any?
+          fail! "PII detected: #{found.keys.join(', ')}",
+            matches: found,
+            sanitized: redact(text, found)
+        else
+          pass!
+        end
+      end
+      private
+      def redact(text, found)
+        result = text.dup
+        PATTERNS.each do |type, pattern|
+          result.gsub!(pattern, REDACT_MAP[type]) if found.key?(type)
+        end
+        result
+      end
+    end
+  end
+end

data/lib/guardrails_ruby/checks/prompt_injection.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+module GuardrailsRuby
+  module Checks
+    class PromptInjection < Check
+      check_name :prompt_injection
+      direction :input
+      INJECTION_PATTERNS = [
+        /ignore\s+(all\s+)?previous\s+instructions/i,
+        /ignore\s+(all\s+)?above/i,
+        /disregard\s+(all\s+)?previous/i,
+        /you\s+are\s+now\s+(a|an)\s+/i,
+        /pretend\s+(you('re|\s+are)\s+|to\s+be\s+)/i,
+        /act\s+as\s+(a|an|if)\s+/i,
+        /new\s+instructions?[:\s]/i,
+        /system\s*prompt[:\s]/i,
+        /\[\s*system\s*\]/i,
+        /<\s*system\s*>/i,
+        /```\s*(system|instruction)/i,
+        /STOP\.?\s*(forget|ignore|disregard)/i
+      ].freeze
+      def call(text, context: {})
+        matched = INJECTION_PATTERNS.select { |p| text.match?(p) }
+        if matched.any?
+          fail! "Prompt injection detected",
+            matches: matched.map(&:source)
+        else
+          pass!
+        end
+      end
+    end
+  end
+end

data/lib/guardrails_ruby/checks/relevance.rb ADDED Viewed

@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+module GuardrailsRuby
+  module Checks
+    class Relevance < Check
+      check_name :relevance
+      direction :output
+      # This check requires LLM-based evaluation for real accuracy.
+      # For now, a simple heuristic: check if any words from the input appear in the output.
+      def call(text, context: {})
+        input_text = context[:input]
+        return pass! unless input_text
+        input_words = significant_words(input_text)
+        return pass! if input_words.empty?
+        output_lower = text.downcase
+        overlap = input_words.count { |w| output_lower.include?(w) }
+        ratio = overlap.to_f / input_words.size
+        if ratio < 0.1 && text.length > 20
+          fail! "Output may not be relevant to the input (low keyword overlap)",
+            action: @options.fetch(:action, :warn)
+        else
+          pass!
+        end
+      end
+      private
+      STOP_WORDS = %w[the a an is are was were be been being have has had do does did
+                      will would shall should may might can could of in to for on with
+                      at by from it its this that these those i me my we our you your
+                      he she they them his her and or but not no if so as how what when
+                      where which who whom why].freeze
+      def significant_words(text)
+        text.downcase.scan(/\b\w+\b/).reject { |w| STOP_WORDS.include?(w) || w.length < 3 }
+      end
+    end
+  end
+end

data/lib/guardrails_ruby/checks/topic.rb ADDED Viewed

@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+module GuardrailsRuby
+  module Checks
+    class Topic < Check
+      check_name :topic
+      direction :input
+      def call(text, context: {})
+        allowed = @options.fetch(:allowed, [])
+        return pass! if allowed.empty?
+        # Simple keyword-based topic matching
+        text_lower = text.downcase
+        on_topic = allowed.any? { |topic| text_lower.include?(topic.to_s.downcase) }
+        if on_topic
+          pass!
+        else
+          fail! "Input does not match allowed topics: #{allowed.join(', ')}"
+        end
+      end
+    end
+  end
+end

data/lib/guardrails_ruby/checks/toxic_language.rb ADDED Viewed

@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+module GuardrailsRuby
+  module Checks
+    class ToxicLanguage < Check
+      check_name :toxic_language
+      direction :both
+      # Basic keyword-based detection; LLM-based detection can be added later
+      TOXIC_PATTERNS = [
+        /\b(kill|murder|attack)\s+(you|him|her|them)\b/i,
+        /\b(threat(en)?|bomb|terroris[mt])\b/i,
+        /\bi\s+(will|am going to)\s+(hurt|harm|destroy)\b/i
+      ].freeze
+      def call(text, context: {})
+        matched = TOXIC_PATTERNS.select { |p| text.match?(p) }
+        if matched.any?
+          fail! "Toxic language detected",
+            matches: matched.map(&:source)
+        else
+          pass!
+        end
+      end
+    end
+  end
+end

data/lib/guardrails_ruby/configuration.rb ADDED Viewed

@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+module GuardrailsRuby
+  class Configuration
+    attr_accessor :default_input_checks, :default_output_checks,
+                  :on_violation, :judge_llm
+    def initialize
+      @default_input_checks = []
+      @default_output_checks = []
+      @on_violation = nil
+      @judge_llm = nil
+    end
+  end
+end

data/lib/guardrails_ruby/guard.rb ADDED Viewed

@@ -0,0 +1,129 @@
+# frozen_string_literal: true
+module GuardrailsRuby
+  class Blocked < StandardError; end
+  class Guard
+    def initialize(&block)
+      @input_checks = []
+      @output_checks = []
+      instance_eval(&block) if block
+    end
+    # DSL: define input checks
+    def input(&block)
+      @current_checks = @input_checks
+      instance_eval(&block)
+      @current_checks = nil
+    end
+    # DSL: define output checks
+    def output(&block)
+      @current_checks = @output_checks
+      instance_eval(&block)
+      @current_checks = nil
+    end
+    # DSL: register a check by name with options
+    def check(name, **options)
+      check_class = Check.lookup(name)
+      raise ArgumentError, "Unknown check: #{name.inspect}" unless check_class
+      target = @current_checks
+      raise "check must be called inside an input or output block" unless target
+      target << check_class.new(**options)
+    end
+    # Run all input checks in order, return merged Result
+    def check_input(text)
+      run_checks(@input_checks, text)
+    end
+    # Run all output checks in order, return merged Result
+    def check_output(input: nil, output:, context: {})
+      ctx = context.merge(input: input)
+      run_checks(@output_checks, output, context: ctx)
+    end
+    # Wrap an LLM call with input + output guards
+    def call(user_input, context: {})
+      input_result = check_input(user_input)
+      handle_violations(input_result)
+      sanitized_input = input_result.sanitized || user_input
+      raw_output = yield(sanitized_input)
+      output_result = check_output(input: sanitized_input, output: raw_output, context: context)
+      handle_violations(output_result)
+      output_result.sanitized || raw_output
+    end
+    private
+    def run_checks(checks, text, context: {})
+      combined = Result.new(original_text: text)
+      current_text = text
+      checks.each do |chk|
+        result = chk.call(current_text, context: context)
+        result = Result.new(original_text: current_text, violations: result.violations)
+        # Apply redaction so subsequent checks see sanitized text
+        if result.sanitized && result.sanitized != current_text
+          current_text = result.sanitized
+        end
+        combined = combined.merge(result)
+      end
+      # Ensure final result has the latest sanitized text
+      Result.new(
+        original_text: text,
+        violations: combined.violations
+      ).tap do |final|
+        # Store the running sanitized text by adding a synthetic redaction if text changed
+        if current_text != text && !combined.violations.any? { |v| v.action == :redact && v.sanitized }
+          # Text was modified through redactions; the sanitized method will find it via violations
+        end
+        # We need the final sanitized text accessible; use a simple approach:
+        # Re-build with a result that tracks the current text
+        return FinalResult.new(original_text: text, violations: combined.violations, final_text: current_text)
+      end
+    end
+    def handle_violations(result)
+      result.violations.each do |violation|
+        # Notify global callback
+        if GuardrailsRuby.configuration.on_violation
+          GuardrailsRuby.configuration.on_violation.call(violation)
+        end
+        case violation.action
+        when :block
+          raise Blocked, violation.detail
+        when :warn
+          warn "[GuardrailsRuby WARN] #{violation}"
+        when :log
+          # silently record - already in violations
+        when :redact
+          # handled via sanitized text
+        end
+      end
+    end
+  end
+  # Internal result subclass that tracks the final sanitized text through multiple checks
+  class FinalResult < Result
+    def initialize(original_text: nil, violations: [], final_text: nil)
+      super(original_text: original_text, violations: violations)
+      @final_text = final_text
+    end
+    def sanitized
+      @final_text || @original_text
+    end
+  end
+end

data/lib/guardrails_ruby/middleware.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+module GuardrailsRuby
+  class Middleware
+    def initialize(client, &block)
+      @client = client
+      @guard = Guard.new(&block)
+    end
+    def chat(input, **options)
+      @guard.call(input) do |sanitized_input|
+        @client.chat(sanitized_input, **options)
+      end
+    end
+    def respond_to_missing?(method_name, include_private = false)
+      @client.respond_to?(method_name, include_private) || super
+    end
+    private
+    def method_missing(method_name, *args, **kwargs, &block)
+      if @client.respond_to?(method_name)
+        @client.send(method_name, *args, **kwargs, &block)
+      else
+        super
+      end
+    end
+  end
+end

data/lib/guardrails_ruby/rails/controller.rb ADDED Viewed

@@ -0,0 +1,57 @@
+# frozen_string_literal: true
+module GuardrailsRuby
+  module Controller
+    def self.included(base)
+      base.extend(ClassMethods)
+    end
+    module ClassMethods
+      # Define guardrails for this controller using the Guard DSL.
+      #
+      #   guardrails do
+      #     input { check :prompt_injection; check :pii, action: :redact }
+      #     output { check :pii, action: :redact }
+      #   end
+      #
+      def guardrails(&block)
+        @_guardrails_guard = GuardrailsRuby::Guard.new(&block)
+      end
+      def _guardrails_guard
+        @_guardrails_guard
+      end
+    end
+    private
+    # Validate and sanitize user input through the configured guardrails.
+    # Defaults to params[:message] if no text is provided.
+    # Raises GuardrailsRuby::Blocked if any check has action: :block.
+    # Returns the sanitized text otherwise.
+    def guarded_input(text = params[:message])
+      guard = self.class._guardrails_guard
+      return text unless guard
+      result = guard.check_input(text)
+      if result.blocked?
+        raise GuardrailsRuby::Blocked, result.violations.first&.detail
+      end
+      result.sanitized
+    end
+    # Validate and sanitize LLM output through the configured guardrails.
+    # Raises GuardrailsRuby::Blocked if any check has action: :block.
+    # Returns the sanitized text otherwise.
+    def guarded_output(text)
+      guard = self.class._guardrails_guard
+      return text unless guard
+      result = guard.check_output(output: text)
+      if result.blocked?
+        raise GuardrailsRuby::Blocked, result.violations.first&.detail
+      end
+      result.sanitized
+    end
+  end
+end

data/lib/guardrails_ruby/rails/railtie.rb ADDED Viewed

@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+module GuardrailsRuby
+  module Rails
+    class Railtie < ::Rails::Railtie
+      initializer "guardrails_ruby.configure" do
+        # Auto-load configuration from config/initializers/guardrails.rb if present.
+        # The initializer file should call GuardrailsRuby.configure to set defaults.
+      end
+      initializer "guardrails_ruby.controller" do
+        ActiveSupport.on_load(:action_controller) do
+          # Make GuardrailsRuby::Controller available to all controllers
+          # but don't include it automatically - controllers opt in via:
+          #   include GuardrailsRuby::Controller
+        end
+      end
+    end
+  end
+end

data/lib/guardrails_ruby/redactors/keyword_redactor.rb ADDED Viewed

@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+module GuardrailsRuby
+  module Redactors
+    class KeywordRedactor
+      # Initialize with a list of keywords to redact.
+      #
+      #   redactor = KeywordRedactor.new(%w[secret password], replacement: "[HIDDEN]")
+      #   redactor.redact("The secret password is abc123")
+      #   # => "The [HIDDEN] [HIDDEN] is abc123"
+      #
+      def initialize(keywords, replacement: "[REDACTED]")
+        @keywords = keywords
+        @replacement = replacement
+      end
+      # Redact all occurrences of the configured keywords from text (case-insensitive).
+      # Uses word boundary matching to avoid partial-word replacements.
+      def redact(text)
+        result = text.dup
+        @keywords.each do |kw|
+          result.gsub!(/\b#{Regexp.escape(kw)}\b/i, @replacement)
+        end
+        result
+      end
+      # Detect which keywords are present in text. Returns an array of matched keywords.
+      def detect(text)
+        @keywords.select { |kw| text.match?(/\b#{Regexp.escape(kw)}\b/i) }
+      end
+    end
+  end
+end

data/lib/guardrails_ruby/redactors/pii_redactor.rb ADDED Viewed

@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+module GuardrailsRuby
+  module Redactors
+    class PIIRedactor
+      # Reuse patterns from Checks::PII
+      PATTERNS = GuardrailsRuby::Checks::PII::PATTERNS
+      REDACT_MAP = GuardrailsRuby::Checks::PII::REDACT_MAP
+      # Redact all PII from the given text, returning a new string.
+      #
+      #   GuardrailsRuby::Redactors::PIIRedactor.redact("My SSN is 123-45-6789")
+      #   # => "My SSN is [SSN REDACTED]"
+      #
+      def self.redact(text, types: nil)
+        new(types: types).redact(text)
+      end
+      # Initialize with optional type filter.
+      #
+      #   redactor = PIIRedactor.new(types: [:ssn, :email])
+      #   redactor.redact("Call 555-123-4567, SSN 123-45-6789")
+      #   # => "Call 555-123-4567, SSN [SSN REDACTED]"
+      #
+      def initialize(types: nil)
+        @types = types&.map(&:to_sym)
+      end
+      # Redact PII from text. Returns a new string with PII replaced by placeholders.
+      def redact(text)
+        result = text.dup
+        active_patterns.each do |type, pattern|
+          result.gsub!(pattern, REDACT_MAP[type])
+        end
+        result
+      end
+      # Detect PII in text without redacting. Returns a hash of { type => [matches] }.
+      def detect(text)
+        found = {}
+        active_patterns.each do |type, pattern|
+          matches = text.scan(pattern)
+          found[type] = matches if matches.any?
+        end
+        found
+      end
+      private
+      def active_patterns
+        if @types
+          PATTERNS.select { |type, _| @types.include?(type) }
+        else
+          PATTERNS
+        end
+      end
+    end
+  end
+end