RubyGems - sentinel_rb - Versions diffs - 0.1.0 - Mend

sentinel_rb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/.rubocop.yml +10 -0
data/.rubocop_todo.yml +72 -0
data/.sentinel-test.yml +20 -0
data/.sentinel.yml +29 -0
data/.sentinel.yml.example +74 -0
data/AGENTS.md +87 -0
data/CODE_OF_CONDUCT.md +132 -0
data/LICENSE.txt +21 -0
data/README.md +226 -0
data/Rakefile +12 -0
data/docs/architecture.md +130 -0
data/docs/development.md +376 -0
data/docs/usage.md +238 -0
data/exe/sentinel_rb +6 -0
data/lib/sentinel_rb/analyzer.rb +140 -0
data/lib/sentinel_rb/analyzers/base.rb +53 -0
data/lib/sentinel_rb/analyzers/base_model_usage.rb +188 -0
data/lib/sentinel_rb/analyzers/dangerous_tools.rb +283 -0
data/lib/sentinel_rb/analyzers/few_shot_bias.rb +75 -0
data/lib/sentinel_rb/analyzers/irrelevant_info.rb +164 -0
data/lib/sentinel_rb/analyzers/misinformation.rb +220 -0
data/lib/sentinel_rb/cli.rb +151 -0
data/lib/sentinel_rb/client/base.rb +34 -0
data/lib/sentinel_rb/client/mock.rb +167 -0
data/lib/sentinel_rb/client/openai.rb +167 -0
data/lib/sentinel_rb/client.rb +25 -0
data/lib/sentinel_rb/config.rb +64 -0
data/lib/sentinel_rb/report.rb +224 -0
data/lib/sentinel_rb/version.rb +5 -0
data/lib/sentinel_rb.rb +39 -0
data/sig/sentinel_rb.rbs +4 -0
data/test_prompts/a2_bad_prompt.md +5 -0
data/test_prompts/a2_good_prompt.md +9 -0
data/test_prompts/a3_bad_prompt.md +19 -0
data/test_prompts/a3_good_prompt.md +15 -0
data/test_prompts/a4_bad_prompt.md +13 -0
data/test_prompts/a4_good_prompt.md +11 -0
data/test_prompts/a5_bad_prompt.md +13 -0
data/test_prompts/a5_good_prompt.md +14 -0
data/test_prompts/bad_prompt.md +15 -0
data/test_prompts/comprehensive_good_prompt.md +11 -0
data/test_prompts/good_prompt.md +9 -0
data/test_prompts/multi_bad_prompt.md +11 -0
data/test_prompts/very_bad_prompt.md +7 -0
metadata +149 -0

data/lib/sentinel_rb/analyzers/dangerous_tools.rb ADDED Viewed

@@ -0,0 +1,283 @@
+# frozen_string_literal: true
+require_relative "base"
+module SentinelRb
+  module Analyzers
+    # A5: Dangerous Tools Detection
+    # Detects prompts that reference potentially dangerous tools, techniques, or requests
+    class DangerousTools < Base
+      def initialize(prompt, config, client)
+        super
+        @dangerous_keywords = config["dangerous_keywords"] || default_dangerous_keywords
+        @weapon_keywords = config["weapon_keywords"] || default_weapon_keywords
+        @cyber_keywords = config["cyber_keywords"] || default_cyber_keywords
+      end
+      def call
+        analyze(@prompt)
+      end
+      def analyze(prompt)
+        findings = []
+        # Check for weapon/explosive creation requests
+        findings.concat(check_weapon_creation(prompt))
+        # Check for cyber attack tools/techniques
+        findings.concat(check_cyber_attacks(prompt))
+        # Check for illegal substance creation
+        findings.concat(check_illegal_substances(prompt))
+        # Check for financial fraud techniques
+        findings.concat(check_financial_fraud(prompt))
+        # Check for identity theft/privacy violations
+        findings.concat(check_privacy_violations(prompt))
+        findings
+      end
+      private
+      def check_weapon_creation(prompt)
+        findings = []
+        weapon_patterns = [
+          {
+            pattern: /\b(how to make|create|build|manufacture).{0,50}(bomb|explosive|grenade|weapon)\b/i,
+            message: "Prompt requests instructions for creating weapons or explosives",
+            level: :error
+          },
+          {
+            pattern: /\b(recipe|instructions|guide).{0,30}(explosive|bomb|poison|toxin)\b/i,
+            message: "Prompt requests dangerous substance recipes",
+            level: :error
+          },
+          {
+            pattern: /\b(3d print|print).{0,20}(gun|weapon|firearm)\b/i,
+            message: "Prompt references 3D printing weapons",
+            level: :error
+          }
+        ]
+        weapon_patterns.each do |pattern_info|
+          next unless prompt.match?(pattern_info[:pattern])
+          findings << create_finding(
+            id: "A5",
+            level: pattern_info[:level],
+            message: pattern_info[:message],
+            details: {
+              category: "weapons",
+              pattern_matched: pattern_info[:pattern].source,
+              suggestion: "Avoid requesting information about creating dangerous weapons or substances"
+            }
+          )
+        end
+        findings
+      end
+      def check_cyber_attacks(prompt)
+        findings = []
+        cyber_patterns = [
+          {
+            pattern: /\b(hack|exploit|penetrate|breach).{0,30}(system|network|database|server)\b/i,
+            message: "Prompt references hacking or system exploitation",
+            level: :warn
+          },
+          {
+            pattern: /\b(ddos|denial of service|botnet|malware|ransomware)\b/i,
+            message: "Prompt references cyber attack tools or techniques",
+            level: :error
+          },
+          {
+            pattern: /\b(sql injection|xss|cross site|buffer overflow)\b/i,
+            message: "Prompt references specific attack vectors",
+            level: :warn
+          },
+          {
+            pattern: /\b(keylogger|trojan|backdoor|rootkit)\b/i,
+            message: "Prompt references malicious software types",
+            level: :error
+          }
+        ]
+        cyber_patterns.each do |pattern_info|
+          next unless prompt.match?(pattern_info[:pattern])
+          findings << create_finding(
+            id: "A5",
+            level: pattern_info[:level],
+            message: pattern_info[:message],
+            details: {
+              category: "cyber_security",
+              pattern_matched: pattern_info[:pattern].source,
+              suggestion: "For security education, use ethical hacking resources and controlled environments"
+            }
+          )
+        end
+        findings
+      end
+      def check_illegal_substances(prompt)
+        findings = []
+        substance_patterns = [
+          {
+            pattern: /\b(how to make|synthesize|produce|manufacture).{0,50}(meth|cocaine|heroin|fentanyl|lsd)\b/i,
+            message: "Prompt requests illegal drug manufacturing information",
+            level: :error
+          },
+          {
+            pattern: /\b(recipe|synthesis|production).{0,30}(amphetamine|mdma|ecstasy)\b/i,
+            message: "Prompt seeks controlled substance production methods",
+            level: :error
+          },
+          {
+            pattern: /\b(grow|cultivate).{0,20}(cannabis|marijuana).{0,30}(illegal|hidden)\b/i,
+            message: "Prompt references illegal cultivation methods",
+            level: :warn
+          }
+        ]
+        substance_patterns.each do |pattern_info|
+          next unless prompt.match?(pattern_info[:pattern])
+          findings << create_finding(
+            id: "A5",
+            level: pattern_info[:level],
+            message: pattern_info[:message],
+            details: {
+              category: "illegal_substances",
+              pattern_matched: pattern_info[:pattern].source,
+              suggestion: "Seek information about substance abuse prevention and treatment resources instead"
+            }
+          )
+        end
+        findings
+      end
+      def check_financial_fraud(prompt)
+        findings = []
+        fraud_patterns = [
+          {
+            pattern: /\b(credit card|debit card).{0,30}(fraud|scam|steal|clone)\b/i,
+            message: "Prompt references credit card fraud techniques",
+            level: :error
+          },
+          {
+            pattern: /\b(phishing|social engineering).{0,30}(bank|financial|password)\b/i,
+            message: "Prompt references financial phishing techniques",
+            level: :warn
+          },
+          {
+            pattern: /\b(money laundering|tax evasion|embezzlement)\b/i,
+            message: "Prompt references financial crimes",
+            level: :warn
+          },
+          {
+            pattern: /\b(counterfeit|fake).{0,20}(money|currency|bills)\b/i,
+            message: "Prompt references counterfeiting currency",
+            level: :error
+          }
+        ]
+        fraud_patterns.each do |pattern_info|
+          next unless prompt.match?(pattern_info[:pattern])
+          findings << create_finding(
+            id: "A5",
+            level: pattern_info[:level],
+            message: pattern_info[:message],
+            details: {
+              category: "financial_fraud",
+              pattern_matched: pattern_info[:pattern].source,
+              suggestion: "For financial security education, consult legitimate cybersecurity resources"
+            }
+          )
+        end
+        findings
+      end
+      def check_privacy_violations(prompt)
+        findings = []
+        privacy_patterns = [
+          {
+            pattern: /\b(dox|doxxing|personal information).{0,30}(find|locate|discover)\b/i,
+            message: "Prompt may reference doxxing or privacy violation techniques",
+            level: :warn
+          },
+          {
+            pattern: /\b(stalk|stalking|track|surveillance).{0,30}(person|individual|someone)\b/i,
+            message: "Prompt references stalking or unauthorized surveillance",
+            level: :error
+          },
+          {
+            pattern: /\b(identity theft|impersonate|assume identity)\b/i,
+            message: "Prompt references identity theft techniques",
+            level: :error
+          },
+          {
+            pattern: /\b(spy|spying|eavesdrop).{0,30}(secretly|hidden|covert)\b/i,
+            message: "Prompt references covert surveillance techniques",
+            level: :warn
+          }
+        ]
+        privacy_patterns.each do |pattern_info|
+          next unless prompt.match?(pattern_info[:pattern])
+          findings << create_finding(
+            id: "A5",
+            level: pattern_info[:level],
+            message: pattern_info[:message],
+            details: {
+              category: "privacy_violation",
+              pattern_matched: pattern_info[:pattern].source,
+              suggestion: "Respect privacy rights and use legitimate channels for information gathering"
+            }
+          )
+        end
+        findings
+      end
+      def default_dangerous_keywords
+        %w[
+          bomb explosive weapon gun firearm knife blade
+          poison toxin chemical biological nuclear radioactive
+          hack exploit malware virus trojan ransomware
+          fraud scam phishing counterfeit
+          drug cocaine heroin meth amphetamine
+        ]
+      end
+      def default_weapon_keywords
+        %w[
+          bomb explosive grenade dynamite c4 tnt
+          gun pistol rifle shotgun firearm ammunition
+          knife blade sword machete weapon
+          poison gas chemical biological agent
+        ]
+      end
+      def default_cyber_keywords
+        %w[
+          hack exploit penetration breach vulnerability
+          malware virus trojan backdoor rootkit
+          ddos botnet ransomware keylogger spyware
+          injection overflow xss csrf
+        ]
+      end
+    end
+  end
+end

data/lib/sentinel_rb/analyzers/few_shot_bias.rb ADDED Viewed

@@ -0,0 +1,75 @@
+# frozen_string_literal: true
+require_relative "base"
+module SentinelRb
+  module Analyzers
+    # A3: Few-shot Bias Detection
+    # Detects potential bias in few-shot examples that could skew model outputs
+    class FewShotBias < Base
+      ANALYZER_ID = "A3"
+      def call
+        findings = []
+        # Look for example patterns in the prompt
+        if has_examples?(@prompt)
+          # Check for gender bias patterns
+          findings.concat(check_simple_gender_bias(@prompt))
+        end
+        findings
+      end
+      private
+      def has_examples?(prompt)
+        example_indicators = [
+          /example\s*\d*:/i,
+          /input:\s*.+output:/i,
+          /q:\s*.+a:/i
+        ]
+        example_indicators.any? { |pattern| prompt.match?(pattern) }
+      end
+      def check_simple_gender_bias(prompt)
+        findings = []
+        # Count gender pronouns
+        male_count = prompt.scan(/\b(he|him|his|man|men|male)\b/i).length
+        female_count = prompt.scan(/\b(she|her|hers|woman|women|female)\b/i).length
+        total_gender = male_count + female_count
+        return findings if total_gender < 3 # Need at least 3 references to detect bias
+        # Check for significant imbalance
+        max_count = [male_count, female_count].max
+        bias_ratio = max_count.to_f / total_gender
+        divergence_threshold = @config["divergence_threshold"] || 0.25
+        if bias_ratio > (1.0 - divergence_threshold)
+          dominant_gender = male_count > female_count ? "male" : "female"
+          findings << create_finding(
+            id: ANALYZER_ID,
+            level: :warn,
+            message: "Few-shot examples show potential gender bias (#{(bias_ratio * 100).round(1)}% #{dominant_gender} references)",
+            details: {
+              male_references: male_count,
+              female_references: female_count,
+              bias_ratio: bias_ratio.round(3),
+              threshold: divergence_threshold,
+              suggestions: [
+                "Include more balanced gender representation in examples",
+                "Use gender-neutral examples when possible",
+                "Vary pronouns and names across examples"
+              ]
+            }
+          )
+        end
+        findings
+      end
+    end
+  end
+end

data/lib/sentinel_rb/analyzers/irrelevant_info.rb ADDED Viewed

@@ -0,0 +1,164 @@
+# frozen_string_literal: true
+require_relative "base"
+module SentinelRb
+  module Analyzers
+    # A1: Irrelevant Information Detector
+    # Detects prompts containing irrelevant or noisy information that could
+    # degrade LLM performance or confuse the model.
+    class IrrelevantInfo < Base
+      ANALYZER_ID = "A1"
+      def call
+        findings = []
+        begin
+          # Get relevance analysis from LLM
+          analysis = @client.analyze_content(@prompt)
+          relevance_score = analysis[:relevance_score]
+          threshold = @config.relevance_threshold
+          # Check if relevance score is below threshold
+          if threshold_exceeded?(relevance_score, threshold, higher_is_better: true)
+            findings << create_finding(
+              id: ANALYZER_ID,
+              level: :warn,
+              message: "Prompt contains potentially irrelevant information (relevance score: #{relevance_score.round(3)} < threshold: #{threshold})",
+              details: {
+                relevance_score: relevance_score,
+                threshold: threshold,
+                raw_response: analysis[:raw_response],
+                suggestions: generate_suggestions(relevance_score)
+              }
+            )
+          end
+        rescue StandardError => e
+          # If LLM analysis fails, we'll rely on heuristic checks only
+          # Log the error but don't fail the entire analysis
+          if @config["log_level"] == "debug"
+            warn "Warning: LLM analysis failed for irrelevant info detection: #{e.message}"
+          end
+        end
+        # Additional heuristic checks (these work even if LLM fails)
+        findings.concat(check_length_ratio)
+        findings.concat(check_repetitive_content)
+        findings.concat(check_off_topic_markers)
+        findings
+      end
+      private
+      # Check if prompt has unusual length patterns that might indicate noise
+      def check_length_ratio
+        return [] if @prompt.length < 100
+        sentences = @prompt.split(/[.!?]+/).reject(&:empty?)
+        return [] if sentences.length < 3
+        # Calculate variance in sentence lengths
+        lengths = sentences.map(&:length)
+        avg_length = lengths.sum.to_f / lengths.length
+        variance = lengths.map { |len| (len - avg_length)**2 }.sum / lengths.length
+        std_dev = Math.sqrt(variance)
+        # Flag if there's high variance (suggesting mix of very long and short sentences)
+        coefficient_of_variation = std_dev / avg_length
+        if coefficient_of_variation > 1.5
+          [create_finding(
+            id: ANALYZER_ID,
+            level: :info,
+            message: "Prompt has highly variable sentence lengths, which may indicate mixed content types",
+            details: {
+              coefficient_of_variation: coefficient_of_variation.round(3),
+              avg_sentence_length: avg_length.round(1),
+              sentence_count: sentences.length
+            }
+          )]
+        else
+          []
+        end
+      end
+      # Check for repetitive content that might be noise
+      def check_repetitive_content
+        words = @prompt.downcase.scan(/\w+/)
+        return [] if words.length < 5 # Reduced threshold for shorter test prompts
+        # Count word frequencies
+        word_counts = Hash.new(0)
+        words.each { |word| word_counts[word] += 1 }
+        # Find words that appear unusually often
+        avg_frequency = words.length.to_f / word_counts.keys.length
+        repetitive_words = word_counts.select do |word, count|
+          count > avg_frequency * 2 && word.length > 2 # More lenient thresholds
+        end
+        if repetitive_words.any?
+          [create_finding(
+            id: ANALYZER_ID,
+            level: :info,
+            message: "Prompt contains repetitive words that may indicate redundant content",
+            details: {
+              repetitive_words: repetitive_words.keys.take(5),
+              repetition_ratio: repetitive_words.values.sum.to_f / words.length
+            }
+          )]
+        else
+          []
+        end
+      end
+      # Check for common markers of off-topic content
+      def check_off_topic_markers
+        findings = []
+        # Common patterns that might indicate irrelevant content
+        noise_patterns = [
+          /\b(disclaimer|legal notice|copyright|terms of service)\b/i,
+          /\b(marketing|advertisement|promotional|sponsor)\b/i,
+          /\b(lorem ipsum|placeholder|example text|sample content)\b/i,
+          /\b(todo|fixme|note to self|reminder)\b/i
+        ]
+        noise_patterns.each_with_index do |pattern, index|
+          next unless @prompt.match?(pattern)
+          findings << create_finding(
+            id: ANALYZER_ID,
+            level: :info,
+            message: "Prompt contains potential noise markers (pattern #{index + 1})",
+            details: {
+              pattern_matched: pattern.source,
+              matches: @prompt.scan(pattern).flatten.uniq.take(3)
+            }
+          )
+        end
+        findings
+      end
+      # Generate helpful suggestions based on relevance score
+      def generate_suggestions(score)
+        suggestions = []
+        if score < 0.3
+          suggestions << "Consider rewriting the prompt to focus on a single, clear objective"
+          suggestions << "Remove any background information not directly relevant to the task"
+        elsif score < 0.5
+          suggestions << "Try to make the main task or question more prominent"
+          suggestions << "Consider breaking complex prompts into simpler, focused parts"
+        else
+          suggestions << "Consider minor refinements to improve clarity and focus"
+        end
+        suggestions << "Review the prompt for any repetitive or redundant information"
+        suggestions
+      end
+    end
+  end
+end