RubyGems - sentinel_rb - Versions diffs - 0.1.0 - Mend

sentinel_rb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/.rubocop.yml +10 -0
data/.rubocop_todo.yml +72 -0
data/.sentinel-test.yml +20 -0
data/.sentinel.yml +29 -0
data/.sentinel.yml.example +74 -0
data/AGENTS.md +87 -0
data/CODE_OF_CONDUCT.md +132 -0
data/LICENSE.txt +21 -0
data/README.md +226 -0
data/Rakefile +12 -0
data/docs/architecture.md +130 -0
data/docs/development.md +376 -0
data/docs/usage.md +238 -0
data/exe/sentinel_rb +6 -0
data/lib/sentinel_rb/analyzer.rb +140 -0
data/lib/sentinel_rb/analyzers/base.rb +53 -0
data/lib/sentinel_rb/analyzers/base_model_usage.rb +188 -0
data/lib/sentinel_rb/analyzers/dangerous_tools.rb +283 -0
data/lib/sentinel_rb/analyzers/few_shot_bias.rb +75 -0
data/lib/sentinel_rb/analyzers/irrelevant_info.rb +164 -0
data/lib/sentinel_rb/analyzers/misinformation.rb +220 -0
data/lib/sentinel_rb/cli.rb +151 -0
data/lib/sentinel_rb/client/base.rb +34 -0
data/lib/sentinel_rb/client/mock.rb +167 -0
data/lib/sentinel_rb/client/openai.rb +167 -0
data/lib/sentinel_rb/client.rb +25 -0
data/lib/sentinel_rb/config.rb +64 -0
data/lib/sentinel_rb/report.rb +224 -0
data/lib/sentinel_rb/version.rb +5 -0
data/lib/sentinel_rb.rb +39 -0
data/sig/sentinel_rb.rbs +4 -0
data/test_prompts/a2_bad_prompt.md +5 -0
data/test_prompts/a2_good_prompt.md +9 -0
data/test_prompts/a3_bad_prompt.md +19 -0
data/test_prompts/a3_good_prompt.md +15 -0
data/test_prompts/a4_bad_prompt.md +13 -0
data/test_prompts/a4_good_prompt.md +11 -0
data/test_prompts/a5_bad_prompt.md +13 -0
data/test_prompts/a5_good_prompt.md +14 -0
data/test_prompts/bad_prompt.md +15 -0
data/test_prompts/comprehensive_good_prompt.md +11 -0
data/test_prompts/good_prompt.md +9 -0
data/test_prompts/multi_bad_prompt.md +11 -0
data/test_prompts/very_bad_prompt.md +7 -0
metadata +149 -0

data/lib/sentinel_rb/analyzers/misinformation.rb ADDED Viewed

@@ -0,0 +1,220 @@
+# frozen_string_literal: true
+require_relative "base"
+module SentinelRb
+  module Analyzers
+    # A2: Misinformation Detection
+    # Detects prompts that may contain or encourage the generation of misinformation
+    class Misinformation < Base
+      def initialize(prompt, config, client)
+        super
+        @fact_check_threshold = config["fact_check_threshold"] || 0.7
+        @misinformation_keywords = config["misinformation_keywords"] || default_misinformation_keywords
+      end
+      def call
+        analyze(@prompt)
+      end
+      def analyze(prompt)
+        findings = []
+        # Check for explicit misinformation instruction patterns
+        findings.concat(check_misinformation_instructions(prompt))
+        # Check for conspiracy theory keywords
+        findings.concat(check_conspiracy_patterns(prompt))
+        # Check for medical/health misinformation patterns
+        findings.concat(check_medical_misinformation(prompt))
+        # For statements that claim to be facts, attempt fact-checking
+        findings.concat(check_factual_claims(prompt))
+        findings
+      end
+      private
+      def check_misinformation_instructions(prompt)
+        findings = []
+        instruction_patterns = [
+          {
+            pattern: /\b(spread|share|promote|tell people|convince others).{0,20}(false|fake|untrue|misleading)\b/i,
+            message: "Prompt appears to instruct spreading of false information",
+            level: :error
+          },
+          {
+            pattern: /\b(ignore|disregard|dismiss).{0,20}(facts|evidence|science|experts)\b/i,
+            message: "Prompt encourages ignoring factual evidence",
+            level: :warn
+          },
+          {
+            pattern: /\b(make up|fabricate|invent|create fake).{0,20}(facts|statistics|studies|evidence)\b/i,
+            message: "Prompt requests fabrication of false evidence",
+            level: :error
+          }
+        ]
+        instruction_patterns.each do |pattern_info|
+          matches = prompt.scan(pattern_info[:pattern])
+          next unless matches.any?
+          findings << create_finding(
+            id: "A2",
+            level: pattern_info[:level],
+            message: pattern_info[:message],
+            details: {
+              pattern_matched: pattern_info[:pattern].source,
+              matches: matches.flatten.uniq
+            }
+          )
+        end
+        findings
+      end
+      def check_conspiracy_patterns(prompt)
+        findings = []
+        conspiracy_patterns = [
+          /\b(covid.{0,10}hoax|vaccine.{0,10}dangerous|5g.{0,10}virus)\b/i,
+          /\b(flat.{0,5}earth|moon.{0,10}landing.{0,10}fake)\b/i,
+          /\b(chemtrails|lizard.{0,5}people|illuminati.{0,10}control)\b/i,
+          /\b(election.{0,10}(stolen|rigged)|deep.{0,5}state)\b/i
+        ]
+        conspiracy_count = 0
+        matched_patterns = []
+        conspiracy_patterns.each do |pattern|
+          matches = prompt.scan(pattern)
+          if matches.any?
+            conspiracy_count += matches.length
+            matched_patterns.concat(matches.flatten)
+          end
+        end
+        if conspiracy_count.positive?
+          findings << create_finding(
+            id: "A2",
+            level: conspiracy_count >= 3 ? :error : :warn,
+            message: "Prompt contains conspiracy theory references (#{conspiracy_count} instances)",
+            details: {
+              conspiracy_count: conspiracy_count,
+              matched_patterns: matched_patterns.uniq,
+              suggestions: [
+                "Consider removing conspiracy theory references",
+                "Focus on factual, evidence-based information",
+                "Verify claims with reliable sources"
+              ]
+            }
+          )
+        end
+        findings
+      end
+      def check_medical_misinformation(prompt)
+        findings = []
+        medical_misinformation_patterns = [
+          {
+            pattern: /\b(cure|heal|treat).{0,20}(cancer|diabetes|covid|aids).{0,20}(naturally|home remedy|without medicine)\b/i,
+            message: "Prompt contains potential medical misinformation about cures",
+            level: :error
+          },
+          {
+            pattern: /\b(vaccines?.{0,10}(cause|dangerous|harmful|toxic))\b/i,
+            message: "Prompt contains anti-vaccine misinformation",
+            level: :error
+          },
+          {
+            pattern: /\b(doctors?.{0,10}(hiding|concealing).{0,20}(truth|cure))\b/i,
+            message: "Prompt promotes medical conspiracy theories",
+            level: :warn
+          }
+        ]
+        medical_misinformation_patterns.each do |pattern_info|
+          matches = prompt.scan(pattern_info[:pattern])
+          next unless matches.any?
+          findings << create_finding(
+            id: "A2",
+            level: pattern_info[:level],
+            message: pattern_info[:message],
+            details: {
+              pattern_matched: pattern_info[:pattern].source,
+              matches: matches.flatten.uniq,
+              suggestions: [
+                "Remove medical misinformation claims",
+                "Consult qualified medical professionals",
+                "Use evidence-based medical information"
+              ]
+            }
+          )
+        end
+        findings
+      end
+      def check_factual_claims(prompt)
+        findings = []
+        # Look for statements that make factual claims
+        factual_claim_patterns = [
+          /studies show that/i,
+          /research proves/i,
+          /scientists have found/i,
+          /according to experts/i,
+          /statistics indicate/i
+        ]
+        claims_found = []
+        factual_claim_patterns.each do |pattern|
+          matches = prompt.scan(/[^.!?]*#{pattern}[^.!?]*[.!?]/)
+          claims_found.concat(matches) if matches.any?
+        end
+        if claims_found.any? && claims_found.length <= 3 # Don't fact-check too many claims
+          claims_found.each do |claim|
+            fact_check_result = @client.fact_check(claim.strip)
+            if fact_check_result[:confidence] < @fact_check_threshold
+              findings << create_finding(
+                id: "A2",
+                level: :info,
+                message: "Factual claim could not be verified with high confidence",
+                details: {
+                  claim: claim.strip,
+                  confidence: fact_check_result[:confidence],
+                  reason: fact_check_result[:reason],
+                  suggestions: [
+                    "Verify the claim with reliable sources",
+                    "Consider adding source citations",
+                    "Use more cautious language for unverified claims"
+                  ]
+                }
+              )
+            end
+          rescue StandardError => e
+            # Fact-checking failed, but don't break the analysis
+            puts "Debug: Fact-checking failed for claim: #{e.message}" if ENV["DEBUG"]
+          end
+        end
+        findings
+      end
+      def default_misinformation_keywords
+        [
+          "fake news", "hoax", "conspiracy", "cover-up", "they don't want you to know",
+          "mainstream media lies", "suppressed truth", "hidden agenda", "false flag"
+        ]
+      end
+    end
+  end
+end

data/lib/sentinel_rb/cli.rb ADDED Viewed

@@ -0,0 +1,151 @@
+# frozen_string_literal: true
+require "thor"
+require_relative "analyzer"
+require_relative "report"
+module SentinelRb
+  # Command Line Interface for SentinelRb
+  class CLI < Thor
+    desc "analyze", "Analyze prompt files for antipatterns"
+    option :glob, type: :string, desc: "Glob pattern for files to analyze"
+    option :files, type: :array, desc: "Specific files to analyze"
+    option :config, type: :string, default: ".sentinel.yml", desc: "Configuration file path"
+    option :format, type: :string, default: "table", desc: "Output format (table, json, detailed)"
+    option :output, type: :string, desc: "Output file path (default: stdout)"
+    option :analyzers, type: :array, desc: "Specific analyzers to run (e.g., A1,A2)"
+    option :no_summary, type: :boolean, default: false, desc: "Skip summary output"
+    option :no_color, type: :boolean, default: false, desc: "Disable colored output"
+    option :quiet, type: :boolean, default: false, desc: "Suppress non-error output"
+    option :verbose, type: :boolean, default: false, desc: "Enable verbose output"
+    def analyze
+      # Load configuration
+      config = load_config(options[:config])
+      analyzer = SentinelRb::Analyzer.new(config)
+      # Determine files to analyze
+      files_to_analyze = determine_files(options)
+      error_exit("No files found to analyze. Use --glob or --files to specify files.") if files_to_analyze.empty?
+      say("Analyzing #{files_to_analyze.length} files...") unless options[:quiet]
+      # Run analysis
+      results = files_to_analyze.map do |file|
+        say("  Analyzing #{file}...") if options[:verbose]
+        analyzer.analyze_file(file, analyzer_ids: options[:analyzers])
+      end
+      # Format and output results
+      formatted_output = SentinelRb::Report::Formatter.format(
+        results,
+        format: options[:format],
+        show_summary: !options[:no_summary],
+        colorize: !options[:no_color] && $stdout.tty?
+      )
+      output_results(formatted_output, options[:output])
+      # Exit with appropriate code
+      summary = analyzer.summarize_results(results)
+      exit_code = summary[:total_findings].positive? ? 1 : 0
+      exit(exit_code)
+    rescue StandardError => e
+      error_exit("Analysis failed: #{e.message}")
+    end
+    desc "version", "Show SentinelRb version"
+    def version
+      say("SentinelRb #{SentinelRb::VERSION}")
+    end
+    desc "config", "Show current configuration"
+    option :config, type: :string, default: ".sentinel.yml", desc: "Configuration file path"
+    def config
+      config = load_config(options[:config])
+      say("Configuration loaded from: #{options[:config]}")
+      say("")
+      config.to_h.each do |key, value|
+        say("#{key}: #{value}")
+      end
+    end
+    desc "test_connection", "Test connection to LLM provider"
+    option :config, type: :string, default: ".sentinel.yml", desc: "Configuration file path"
+    def test_connection
+      config = load_config(options[:config])
+      client = SentinelRb::Client::Factory.create(config)
+      say("Testing connection to #{config.provider}...")
+      begin
+        # Test with a simple analysis
+        result = client.analyze_content("This is a test prompt for connection verification.")
+        if result[:relevance_score]
+          say("✅ Connection successful!")
+          say("Test analysis score: #{result[:relevance_score].round(3)}")
+        else
+          error_exit("❌ Connection failed: No response received")
+        end
+      rescue StandardError => e
+        error_exit("❌ Connection failed: #{e.message}")
+      end
+    end
+    no_commands do
+      private
+      def load_config(config_path)
+        if !File.exist?(config_path) && !options[:quiet]
+          say("Configuration file #{config_path} not found, using defaults.", :yellow)
+        end
+        SentinelRb::Config.load(config_path)
+      rescue StandardError => e
+        error_exit("Failed to load configuration: #{e.message}")
+      end
+      def determine_files(options)
+        files = []
+        files.concat(options[:files]) if options[:files]
+        files.concat(Dir.glob(options[:glob])) if options[:glob]
+        # Default glob if no files specified
+        if files.empty?
+          default_patterns = [
+            "prompts/**/*.{md,txt,json}",
+            "**/*.prompt",
+            "**/*.prompt.md"
+          ]
+          default_patterns.each do |pattern|
+            found_files = Dir.glob(pattern)
+            if found_files.any?
+              files.concat(found_files)
+              break
+            end
+          end
+        end
+        files.uniq.select { |f| File.file?(f) }
+      end
+      def output_results(content, output_file)
+        if output_file
+          File.write(output_file, content)
+          say("Results written to #{output_file}") unless options[:quiet]
+        else
+          say(content)
+        end
+      end
+      def error_exit(message)
+        say("Error: #{message}", :red)
+        exit(1)
+      end
+    end
+  end
+end

data/lib/sentinel_rb/client/base.rb ADDED Viewed

@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+module SentinelRb
+  module Client
+    # Base class for LLM client implementations
+    class Base
+      def initialize(config)
+        @config = config
+      end
+      # Abstract method: Calculate semantic similarity between two texts
+      # @param text1 [String] First text
+      # @param text2 [String] Second text
+      # @return [Float] Similarity score between 0.0 and 1.0
+      def similarity(text1, text2)
+        raise NotImplementedError, "Subclasses must implement #similarity"
+      end
+      # Abstract method: Check factual accuracy of a statement
+      # @param statement [String] Statement to fact-check
+      # @return [Hash] Result with :accurate boolean and :confidence score
+      def fact_check(statement)
+        raise NotImplementedError, "Subclasses must implement #fact_check"
+      end
+      # Abstract method: Analyze content for relevance and quality
+      # @param prompt [String] Prompt text to analyze
+      # @return [Hash] Analysis results
+      def analyze_content(prompt)
+        raise NotImplementedError, "Subclasses must implement #analyze_content"
+      end
+    end
+  end
+end

data/lib/sentinel_rb/client/mock.rb ADDED Viewed

@@ -0,0 +1,167 @@
+# frozen_string_literal: true
+require_relative "base"
+module SentinelRb
+  module Client
+    # Enhanced mock client with improved detection for testing
+    class Mock < Base
+      def initialize(config)
+        super
+        @relevance_scores = config["mock_scores"] || {}
+      end
+      def similarity(text1, text2)
+        # Enhanced similarity calculation with Japanese support
+        words1 = extract_words(text1)
+        words2 = extract_words(text2)
+        return 0.0 if words1.empty? || words2.empty?
+        intersection = (words1 & words2).length
+        union = (words1 | words2).length
+        intersection.to_f / union
+      end
+      def analyze_content(prompt)
+        # Enhanced mock analysis with better pattern detection
+        score = calculate_mock_relevance(prompt)
+        {
+          relevance_score: score,
+          raw_response: score.to_s
+        }
+      end
+      def fact_check(_statement)
+        # Mock fact-checking - always returns neutral result
+        {
+          accurate: true,
+          confidence: 0.8,
+          reason: "Mock fact-check: No real verification performed"
+        }
+      end
+      private
+      def extract_words(text)
+        # Extract words supporting both English and Japanese
+        text.downcase.scan(/[\w\p{Hiragana}\p{Katakana}\p{Han}]+/)
+      end
+      def calculate_mock_relevance(prompt)
+        # Enhanced scoring with more aggressive detection
+        score = 0.75 # Start with medium-high base score
+        # More comprehensive marketing language detection
+        marketing_patterns = [
+          # English marketing terms
+          /\b(sale|discount|offer|buy now|limited time|special|exclusive|amazing|incredible)\b/i,
+          /\b(deal|bargain|cheap|free|bonus|gift|prize|winner|congratulations)\b/i,
+          /\b(urgent|hurry|act now|don't miss|last chance|final call)\b/i,
+          # Japanese marketing terms
+          /\b(セール|割引|特別|お得|今すぐ|限定|無料|プレゼント|キャンペーン)\b/i,
+          /\b(急げ|お急ぎ|見逃すな|最後|チャンス|特価|激安)\b/i,
+          # Punctuation patterns
+          /!{2,}/, # Multiple exclamation marks
+          /？{2,}/, # Multiple question marks
+          /[★☆]{2,}/ # Multiple stars
+        ]
+        marketing_count = 0
+        marketing_patterns.each do |pattern|
+          matches = prompt.scan(pattern).length
+          next unless matches.positive?
+          marketing_count += matches
+          penalty = matches * 0.15 # More aggressive penalty
+          score -= penalty
+          puts "Debug: Marketing pattern found: #{pattern.source} (#{matches} matches, -#{penalty})" if ENV["DEBUG"]
+        end
+        # Enhanced noise marker detection
+        noise_patterns = [
+          /\b(todo|fixme|disclaimer|note|warning|注意|注記)\b/i,
+          /\b(legal notice|copyright|terms|conditions|利用規約|法的免責|免責事項)\b/i,
+          /\b(placeholder|example|sample|template|テンプレート|例)\b/i,
+          /※.*/, # Japanese note markers
+          /\[.*\]/, # Bracketed content (often metadata)
+          /\{.*\}/ # Braced content (often placeholders)
+        ]
+        noise_count = 0
+        noise_patterns.each do |pattern|
+          matches = prompt.scan(pattern).length
+          next unless matches.positive?
+          noise_count += matches
+          penalty = matches * 0.2 # Significant penalty for noise
+          score -= penalty
+          puts "Debug: Noise pattern found: #{pattern.source} (#{matches} matches, -#{penalty})" if ENV["DEBUG"]
+        end
+        # Enhanced repetition detection
+        words = extract_words(prompt)
+        if words.length > 5
+          word_counts = Hash.new(0)
+          words.each { |word| word_counts[word] += 1 }
+          # More aggressive repetition detection
+          repetitive_words = word_counts.select do |word, count|
+            count >= 2 && word.length > 1 # Lower threshold, shorter words included
+          end
+          if repetitive_words.any?
+            # Calculate repetition severity
+            total_repetitions = repetitive_words.values.sum - repetitive_words.length
+            repetition_ratio = total_repetitions.to_f / words.length
+            penalty = repetition_ratio * 0.5 # Up to 50% penalty for heavy repetition
+            score -= penalty
+            if ENV["DEBUG"]
+              puts "Debug: Repetitive words found: #{repetitive_words.keys} (ratio: #{repetition_ratio.round(3)}, -#{penalty.round(3)})"
+            end
+          end
+        end
+        # Detect excessive capitalization (shouting)
+        caps_ratio = prompt.scan(/[A-Z]/).length.to_f / prompt.length
+        if caps_ratio > 0.3 # More than 30% caps
+          caps_penalty = (caps_ratio - 0.3) * 0.4
+          score -= caps_penalty
+          if ENV["DEBUG"]
+            puts "Debug: Excessive capitalization found: #{(caps_ratio * 100).round(1)}% (-#{caps_penalty.round(3)})"
+          end
+        end
+        # Detect very short sentences (fragmented content)
+        sentences = prompt.split(/[.!?。！？]/).reject(&:empty?)
+        if sentences.length > 3
+          short_sentences = sentences.select { |s| s.strip.split.length < 3 }
+          if short_sentences.length > sentences.length * 0.4 # More than 40% short sentences
+            fragmentation_penalty = 0.15
+            score -= fragmentation_penalty
+            if ENV["DEBUG"]
+              puts "Debug: Fragmented content detected: #{short_sentences.length}/#{sentences.length} short sentences (-#{fragmentation_penalty})"
+            end
+          end
+        end
+        # Apply cumulative penalty for multiple issues
+        total_issues = marketing_count + noise_count
+        if total_issues >= 5
+          cumulative_penalty = (total_issues - 4) * 0.05 # Additional penalty for many issues
+          score -= cumulative_penalty
+          puts "Debug: Cumulative penalty for #{total_issues} issues: -#{cumulative_penalty}" if ENV["DEBUG"]
+        end
+        # Ensure score is within bounds
+        final_score = [[score, 0.0].max, 1.0].min
+        if ENV["DEBUG"]
+          puts "Debug: Final relevance score: #{final_score} (marketing: #{marketing_count}, noise: #{noise_count})"
+        end
+        final_score
+      end
+    end
+  end
+end