sentinel_rb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +10 -0
  4. data/.rubocop_todo.yml +72 -0
  5. data/.sentinel-test.yml +20 -0
  6. data/.sentinel.yml +29 -0
  7. data/.sentinel.yml.example +74 -0
  8. data/AGENTS.md +87 -0
  9. data/CODE_OF_CONDUCT.md +132 -0
  10. data/LICENSE.txt +21 -0
  11. data/README.md +226 -0
  12. data/Rakefile +12 -0
  13. data/docs/architecture.md +130 -0
  14. data/docs/development.md +376 -0
  15. data/docs/usage.md +238 -0
  16. data/exe/sentinel_rb +6 -0
  17. data/lib/sentinel_rb/analyzer.rb +140 -0
  18. data/lib/sentinel_rb/analyzers/base.rb +53 -0
  19. data/lib/sentinel_rb/analyzers/base_model_usage.rb +188 -0
  20. data/lib/sentinel_rb/analyzers/dangerous_tools.rb +283 -0
  21. data/lib/sentinel_rb/analyzers/few_shot_bias.rb +75 -0
  22. data/lib/sentinel_rb/analyzers/irrelevant_info.rb +164 -0
  23. data/lib/sentinel_rb/analyzers/misinformation.rb +220 -0
  24. data/lib/sentinel_rb/cli.rb +151 -0
  25. data/lib/sentinel_rb/client/base.rb +34 -0
  26. data/lib/sentinel_rb/client/mock.rb +167 -0
  27. data/lib/sentinel_rb/client/openai.rb +167 -0
  28. data/lib/sentinel_rb/client.rb +25 -0
  29. data/lib/sentinel_rb/config.rb +64 -0
  30. data/lib/sentinel_rb/report.rb +224 -0
  31. data/lib/sentinel_rb/version.rb +5 -0
  32. data/lib/sentinel_rb.rb +39 -0
  33. data/sig/sentinel_rb.rbs +4 -0
  34. data/test_prompts/a2_bad_prompt.md +5 -0
  35. data/test_prompts/a2_good_prompt.md +9 -0
  36. data/test_prompts/a3_bad_prompt.md +19 -0
  37. data/test_prompts/a3_good_prompt.md +15 -0
  38. data/test_prompts/a4_bad_prompt.md +13 -0
  39. data/test_prompts/a4_good_prompt.md +11 -0
  40. data/test_prompts/a5_bad_prompt.md +13 -0
  41. data/test_prompts/a5_good_prompt.md +14 -0
  42. data/test_prompts/bad_prompt.md +15 -0
  43. data/test_prompts/comprehensive_good_prompt.md +11 -0
  44. data/test_prompts/good_prompt.md +9 -0
  45. data/test_prompts/multi_bad_prompt.md +11 -0
  46. data/test_prompts/very_bad_prompt.md +7 -0
  47. metadata +149 -0
@@ -0,0 +1,220 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base"
4
+
5
+ module SentinelRb
6
+ module Analyzers
7
+ # A2: Misinformation Detection
8
+ # Detects prompts that may contain or encourage the generation of misinformation
9
+ class Misinformation < Base
10
+ def initialize(prompt, config, client)
11
+ super
12
+ @fact_check_threshold = config["fact_check_threshold"] || 0.7
13
+ @misinformation_keywords = config["misinformation_keywords"] || default_misinformation_keywords
14
+ end
15
+
16
+ def call
17
+ analyze(@prompt)
18
+ end
19
+
20
+ def analyze(prompt)
21
+ findings = []
22
+
23
+ # Check for explicit misinformation instruction patterns
24
+ findings.concat(check_misinformation_instructions(prompt))
25
+
26
+ # Check for conspiracy theory keywords
27
+ findings.concat(check_conspiracy_patterns(prompt))
28
+
29
+ # Check for medical/health misinformation patterns
30
+ findings.concat(check_medical_misinformation(prompt))
31
+
32
+ # For statements that claim to be facts, attempt fact-checking
33
+ findings.concat(check_factual_claims(prompt))
34
+
35
+ findings
36
+ end
37
+
38
+ private
39
+
40
+ def check_misinformation_instructions(prompt)
41
+ findings = []
42
+
43
+ instruction_patterns = [
44
+ {
45
+ pattern: /\b(spread|share|promote|tell people|convince others).{0,20}(false|fake|untrue|misleading)\b/i,
46
+ message: "Prompt appears to instruct spreading of false information",
47
+ level: :error
48
+ },
49
+ {
50
+ pattern: /\b(ignore|disregard|dismiss).{0,20}(facts|evidence|science|experts)\b/i,
51
+ message: "Prompt encourages ignoring factual evidence",
52
+ level: :warn
53
+ },
54
+ {
55
+ pattern: /\b(make up|fabricate|invent|create fake).{0,20}(facts|statistics|studies|evidence)\b/i,
56
+ message: "Prompt requests fabrication of false evidence",
57
+ level: :error
58
+ }
59
+ ]
60
+
61
+ instruction_patterns.each do |pattern_info|
62
+ matches = prompt.scan(pattern_info[:pattern])
63
+ next unless matches.any?
64
+
65
+ findings << create_finding(
66
+ id: "A2",
67
+ level: pattern_info[:level],
68
+ message: pattern_info[:message],
69
+ details: {
70
+ pattern_matched: pattern_info[:pattern].source,
71
+ matches: matches.flatten.uniq
72
+ }
73
+ )
74
+ end
75
+
76
+ findings
77
+ end
78
+
79
+ def check_conspiracy_patterns(prompt)
80
+ findings = []
81
+
82
+ conspiracy_patterns = [
83
+ /\b(covid.{0,10}hoax|vaccine.{0,10}dangerous|5g.{0,10}virus)\b/i,
84
+ /\b(flat.{0,5}earth|moon.{0,10}landing.{0,10}fake)\b/i,
85
+ /\b(chemtrails|lizard.{0,5}people|illuminati.{0,10}control)\b/i,
86
+ /\b(election.{0,10}(stolen|rigged)|deep.{0,5}state)\b/i
87
+ ]
88
+
89
+ conspiracy_count = 0
90
+ matched_patterns = []
91
+
92
+ conspiracy_patterns.each do |pattern|
93
+ matches = prompt.scan(pattern)
94
+ if matches.any?
95
+ conspiracy_count += matches.length
96
+ matched_patterns.concat(matches.flatten)
97
+ end
98
+ end
99
+
100
+ if conspiracy_count.positive?
101
+ findings << create_finding(
102
+ id: "A2",
103
+ level: conspiracy_count >= 3 ? :error : :warn,
104
+ message: "Prompt contains conspiracy theory references (#{conspiracy_count} instances)",
105
+ details: {
106
+ conspiracy_count: conspiracy_count,
107
+ matched_patterns: matched_patterns.uniq,
108
+ suggestions: [
109
+ "Consider removing conspiracy theory references",
110
+ "Focus on factual, evidence-based information",
111
+ "Verify claims with reliable sources"
112
+ ]
113
+ }
114
+ )
115
+ end
116
+
117
+ findings
118
+ end
119
+
120
+ def check_medical_misinformation(prompt)
121
+ findings = []
122
+
123
+ medical_misinformation_patterns = [
124
+ {
125
+ pattern: /\b(cure|heal|treat).{0,20}(cancer|diabetes|covid|aids).{0,20}(naturally|home remedy|without medicine)\b/i,
126
+ message: "Prompt contains potential medical misinformation about cures",
127
+ level: :error
128
+ },
129
+ {
130
+ pattern: /\b(vaccines?.{0,10}(cause|dangerous|harmful|toxic))\b/i,
131
+ message: "Prompt contains anti-vaccine misinformation",
132
+ level: :error
133
+ },
134
+ {
135
+ pattern: /\b(doctors?.{0,10}(hiding|concealing).{0,20}(truth|cure))\b/i,
136
+ message: "Prompt promotes medical conspiracy theories",
137
+ level: :warn
138
+ }
139
+ ]
140
+
141
+ medical_misinformation_patterns.each do |pattern_info|
142
+ matches = prompt.scan(pattern_info[:pattern])
143
+ next unless matches.any?
144
+
145
+ findings << create_finding(
146
+ id: "A2",
147
+ level: pattern_info[:level],
148
+ message: pattern_info[:message],
149
+ details: {
150
+ pattern_matched: pattern_info[:pattern].source,
151
+ matches: matches.flatten.uniq,
152
+ suggestions: [
153
+ "Remove medical misinformation claims",
154
+ "Consult qualified medical professionals",
155
+ "Use evidence-based medical information"
156
+ ]
157
+ }
158
+ )
159
+ end
160
+
161
+ findings
162
+ end
163
+
164
+ def check_factual_claims(prompt)
165
+ findings = []
166
+
167
+ # Look for statements that make factual claims
168
+ factual_claim_patterns = [
169
+ /studies show that/i,
170
+ /research proves/i,
171
+ /scientists have found/i,
172
+ /according to experts/i,
173
+ /statistics indicate/i
174
+ ]
175
+
176
+ claims_found = []
177
+ factual_claim_patterns.each do |pattern|
178
+ matches = prompt.scan(/[^.!?]*#{pattern}[^.!?]*[.!?]/)
179
+ claims_found.concat(matches) if matches.any?
180
+ end
181
+
182
+ if claims_found.any? && claims_found.length <= 3 # Don't fact-check too many claims
183
+ claims_found.each do |claim|
184
+ fact_check_result = @client.fact_check(claim.strip)
185
+
186
+ if fact_check_result[:confidence] < @fact_check_threshold
187
+ findings << create_finding(
188
+ id: "A2",
189
+ level: :info,
190
+ message: "Factual claim could not be verified with high confidence",
191
+ details: {
192
+ claim: claim.strip,
193
+ confidence: fact_check_result[:confidence],
194
+ reason: fact_check_result[:reason],
195
+ suggestions: [
196
+ "Verify the claim with reliable sources",
197
+ "Consider adding source citations",
198
+ "Use more cautious language for unverified claims"
199
+ ]
200
+ }
201
+ )
202
+ end
203
+ rescue StandardError => e
204
+ # Fact-checking failed, but don't break the analysis
205
+ puts "Debug: Fact-checking failed for claim: #{e.message}" if ENV["DEBUG"]
206
+ end
207
+ end
208
+
209
+ findings
210
+ end
211
+
212
+ def default_misinformation_keywords
213
+ [
214
+ "fake news", "hoax", "conspiracy", "cover-up", "they don't want you to know",
215
+ "mainstream media lies", "suppressed truth", "hidden agenda", "false flag"
216
+ ]
217
+ end
218
+ end
219
+ end
220
+ end
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "thor"
4
+ require_relative "analyzer"
5
+ require_relative "report"
6
+
7
+ module SentinelRb
8
+ # Command Line Interface for SentinelRb
9
+ class CLI < Thor
10
+ desc "analyze", "Analyze prompt files for antipatterns"
11
+ option :glob, type: :string, desc: "Glob pattern for files to analyze"
12
+ option :files, type: :array, desc: "Specific files to analyze"
13
+ option :config, type: :string, default: ".sentinel.yml", desc: "Configuration file path"
14
+ option :format, type: :string, default: "table", desc: "Output format (table, json, detailed)"
15
+ option :output, type: :string, desc: "Output file path (default: stdout)"
16
+ option :analyzers, type: :array, desc: "Specific analyzers to run (e.g., A1,A2)"
17
+ option :no_summary, type: :boolean, default: false, desc: "Skip summary output"
18
+ option :no_color, type: :boolean, default: false, desc: "Disable colored output"
19
+ option :quiet, type: :boolean, default: false, desc: "Suppress non-error output"
20
+ option :verbose, type: :boolean, default: false, desc: "Enable verbose output"
21
+ def analyze
22
+ # Load configuration
23
+ config = load_config(options[:config])
24
+ analyzer = SentinelRb::Analyzer.new(config)
25
+
26
+ # Determine files to analyze
27
+ files_to_analyze = determine_files(options)
28
+ error_exit("No files found to analyze. Use --glob or --files to specify files.") if files_to_analyze.empty?
29
+
30
+ say("Analyzing #{files_to_analyze.length} files...") unless options[:quiet]
31
+
32
+ # Run analysis
33
+ results = files_to_analyze.map do |file|
34
+ say(" Analyzing #{file}...") if options[:verbose]
35
+ analyzer.analyze_file(file, analyzer_ids: options[:analyzers])
36
+ end
37
+
38
+ # Format and output results
39
+ formatted_output = SentinelRb::Report::Formatter.format(
40
+ results,
41
+ format: options[:format],
42
+ show_summary: !options[:no_summary],
43
+ colorize: !options[:no_color] && $stdout.tty?
44
+ )
45
+
46
+ output_results(formatted_output, options[:output])
47
+
48
+ # Exit with appropriate code
49
+ summary = analyzer.summarize_results(results)
50
+ exit_code = summary[:total_findings].positive? ? 1 : 0
51
+ exit(exit_code)
52
+ rescue StandardError => e
53
+ error_exit("Analysis failed: #{e.message}")
54
+ end
55
+
56
+ desc "version", "Show SentinelRb version"
57
+ def version
58
+ say("SentinelRb #{SentinelRb::VERSION}")
59
+ end
60
+
61
+ desc "config", "Show current configuration"
62
+ option :config, type: :string, default: ".sentinel.yml", desc: "Configuration file path"
63
+ def config
64
+ config = load_config(options[:config])
65
+ say("Configuration loaded from: #{options[:config]}")
66
+ say("")
67
+
68
+ config.to_h.each do |key, value|
69
+ say("#{key}: #{value}")
70
+ end
71
+ end
72
+
73
+ desc "test_connection", "Test connection to LLM provider"
74
+ option :config, type: :string, default: ".sentinel.yml", desc: "Configuration file path"
75
+ def test_connection
76
+ config = load_config(options[:config])
77
+ client = SentinelRb::Client::Factory.create(config)
78
+
79
+ say("Testing connection to #{config.provider}...")
80
+
81
+ begin
82
+ # Test with a simple analysis
83
+ result = client.analyze_content("This is a test prompt for connection verification.")
84
+
85
+ if result[:relevance_score]
86
+ say("✅ Connection successful!")
87
+ say("Test analysis score: #{result[:relevance_score].round(3)}")
88
+ else
89
+ error_exit("❌ Connection failed: No response received")
90
+ end
91
+ rescue StandardError => e
92
+ error_exit("❌ Connection failed: #{e.message}")
93
+ end
94
+ end
95
+
96
+ no_commands do
97
+ private
98
+
99
+ def load_config(config_path)
100
+ if !File.exist?(config_path) && !options[:quiet]
101
+ say("Configuration file #{config_path} not found, using defaults.", :yellow)
102
+ end
103
+
104
+ SentinelRb::Config.load(config_path)
105
+ rescue StandardError => e
106
+ error_exit("Failed to load configuration: #{e.message}")
107
+ end
108
+
109
+ def determine_files(options)
110
+ files = []
111
+
112
+ files.concat(options[:files]) if options[:files]
113
+
114
+ files.concat(Dir.glob(options[:glob])) if options[:glob]
115
+
116
+ # Default glob if no files specified
117
+ if files.empty?
118
+ default_patterns = [
119
+ "prompts/**/*.{md,txt,json}",
120
+ "**/*.prompt",
121
+ "**/*.prompt.md"
122
+ ]
123
+
124
+ default_patterns.each do |pattern|
125
+ found_files = Dir.glob(pattern)
126
+ if found_files.any?
127
+ files.concat(found_files)
128
+ break
129
+ end
130
+ end
131
+ end
132
+
133
+ files.uniq.select { |f| File.file?(f) }
134
+ end
135
+
136
+ def output_results(content, output_file)
137
+ if output_file
138
+ File.write(output_file, content)
139
+ say("Results written to #{output_file}") unless options[:quiet]
140
+ else
141
+ say(content)
142
+ end
143
+ end
144
+
145
+ def error_exit(message)
146
+ say("Error: #{message}", :red)
147
+ exit(1)
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SentinelRb
4
+ module Client
5
+ # Base class for LLM client implementations
6
+ class Base
7
+ def initialize(config)
8
+ @config = config
9
+ end
10
+
11
+ # Abstract method: Calculate semantic similarity between two texts
12
+ # @param text1 [String] First text
13
+ # @param text2 [String] Second text
14
+ # @return [Float] Similarity score between 0.0 and 1.0
15
+ def similarity(text1, text2)
16
+ raise NotImplementedError, "Subclasses must implement #similarity"
17
+ end
18
+
19
+ # Abstract method: Check factual accuracy of a statement
20
+ # @param statement [String] Statement to fact-check
21
+ # @return [Hash] Result with :accurate boolean and :confidence score
22
+ def fact_check(statement)
23
+ raise NotImplementedError, "Subclasses must implement #fact_check"
24
+ end
25
+
26
+ # Abstract method: Analyze content for relevance and quality
27
+ # @param prompt [String] Prompt text to analyze
28
+ # @return [Hash] Analysis results
29
+ def analyze_content(prompt)
30
+ raise NotImplementedError, "Subclasses must implement #analyze_content"
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,167 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base"
4
+
5
+ module SentinelRb
6
+ module Client
7
+ # Enhanced mock client with improved detection for testing
8
+ class Mock < Base
9
+ def initialize(config)
10
+ super
11
+ @relevance_scores = config["mock_scores"] || {}
12
+ end
13
+
14
+ def similarity(text1, text2)
15
+ # Enhanced similarity calculation with Japanese support
16
+ words1 = extract_words(text1)
17
+ words2 = extract_words(text2)
18
+
19
+ return 0.0 if words1.empty? || words2.empty?
20
+
21
+ intersection = (words1 & words2).length
22
+ union = (words1 | words2).length
23
+
24
+ intersection.to_f / union
25
+ end
26
+
27
+ def analyze_content(prompt)
28
+ # Enhanced mock analysis with better pattern detection
29
+ score = calculate_mock_relevance(prompt)
30
+
31
+ {
32
+ relevance_score: score,
33
+ raw_response: score.to_s
34
+ }
35
+ end
36
+
37
+ def fact_check(_statement)
38
+ # Mock fact-checking - always returns neutral result
39
+ {
40
+ accurate: true,
41
+ confidence: 0.8,
42
+ reason: "Mock fact-check: No real verification performed"
43
+ }
44
+ end
45
+
46
+ private
47
+
48
+ def extract_words(text)
49
+ # Extract words supporting both English and Japanese
50
+ text.downcase.scan(/[\w\p{Hiragana}\p{Katakana}\p{Han}]+/)
51
+ end
52
+
53
+ def calculate_mock_relevance(prompt)
54
+ # Enhanced scoring with more aggressive detection
55
+ score = 0.75 # Start with medium-high base score
56
+
57
+ # More comprehensive marketing language detection
58
+ marketing_patterns = [
59
+ # English marketing terms
60
+ /\b(sale|discount|offer|buy now|limited time|special|exclusive|amazing|incredible)\b/i,
61
+ /\b(deal|bargain|cheap|free|bonus|gift|prize|winner|congratulations)\b/i,
62
+ /\b(urgent|hurry|act now|don't miss|last chance|final call)\b/i,
63
+ # Japanese marketing terms
64
+ /\b(セール|割引|特別|お得|今すぐ|限定|無料|プレゼント|キャンペーン)\b/i,
65
+ /\b(急げ|お急ぎ|見逃すな|最後|チャンス|特価|激安)\b/i,
66
+ # Punctuation patterns
67
+ /!{2,}/, # Multiple exclamation marks
68
+ /?{2,}/, # Multiple question marks
69
+ /[★☆]{2,}/ # Multiple stars
70
+ ]
71
+
72
+ marketing_count = 0
73
+ marketing_patterns.each do |pattern|
74
+ matches = prompt.scan(pattern).length
75
+ next unless matches.positive?
76
+
77
+ marketing_count += matches
78
+ penalty = matches * 0.15 # More aggressive penalty
79
+ score -= penalty
80
+ puts "Debug: Marketing pattern found: #{pattern.source} (#{matches} matches, -#{penalty})" if ENV["DEBUG"]
81
+ end
82
+
83
+ # Enhanced noise marker detection
84
+ noise_patterns = [
85
+ /\b(todo|fixme|disclaimer|note|warning|注意|注記)\b/i,
86
+ /\b(legal notice|copyright|terms|conditions|利用規約|法的免責|免責事項)\b/i,
87
+ /\b(placeholder|example|sample|template|テンプレート|例)\b/i,
88
+ /※.*/, # Japanese note markers
89
+ /\[.*\]/, # Bracketed content (often metadata)
90
+ /\{.*\}/ # Braced content (often placeholders)
91
+ ]
92
+
93
+ noise_count = 0
94
+ noise_patterns.each do |pattern|
95
+ matches = prompt.scan(pattern).length
96
+ next unless matches.positive?
97
+
98
+ noise_count += matches
99
+ penalty = matches * 0.2 # Significant penalty for noise
100
+ score -= penalty
101
+ puts "Debug: Noise pattern found: #{pattern.source} (#{matches} matches, -#{penalty})" if ENV["DEBUG"]
102
+ end
103
+
104
+ # Enhanced repetition detection
105
+ words = extract_words(prompt)
106
+ if words.length > 5
107
+ word_counts = Hash.new(0)
108
+ words.each { |word| word_counts[word] += 1 }
109
+
110
+ # More aggressive repetition detection
111
+ repetitive_words = word_counts.select do |word, count|
112
+ count >= 2 && word.length > 1 # Lower threshold, shorter words included
113
+ end
114
+
115
+ if repetitive_words.any?
116
+ # Calculate repetition severity
117
+ total_repetitions = repetitive_words.values.sum - repetitive_words.length
118
+ repetition_ratio = total_repetitions.to_f / words.length
119
+ penalty = repetition_ratio * 0.5 # Up to 50% penalty for heavy repetition
120
+ score -= penalty
121
+ if ENV["DEBUG"]
122
+ puts "Debug: Repetitive words found: #{repetitive_words.keys} (ratio: #{repetition_ratio.round(3)}, -#{penalty.round(3)})"
123
+ end
124
+ end
125
+ end
126
+
127
+ # Detect excessive capitalization (shouting)
128
+ caps_ratio = prompt.scan(/[A-Z]/).length.to_f / prompt.length
129
+ if caps_ratio > 0.3 # More than 30% caps
130
+ caps_penalty = (caps_ratio - 0.3) * 0.4
131
+ score -= caps_penalty
132
+ if ENV["DEBUG"]
133
+ puts "Debug: Excessive capitalization found: #{(caps_ratio * 100).round(1)}% (-#{caps_penalty.round(3)})"
134
+ end
135
+ end
136
+
137
+ # Detect very short sentences (fragmented content)
138
+ sentences = prompt.split(/[.!?。!?]/).reject(&:empty?)
139
+ if sentences.length > 3
140
+ short_sentences = sentences.select { |s| s.strip.split.length < 3 }
141
+ if short_sentences.length > sentences.length * 0.4 # More than 40% short sentences
142
+ fragmentation_penalty = 0.15
143
+ score -= fragmentation_penalty
144
+ if ENV["DEBUG"]
145
+ puts "Debug: Fragmented content detected: #{short_sentences.length}/#{sentences.length} short sentences (-#{fragmentation_penalty})"
146
+ end
147
+ end
148
+ end
149
+
150
+ # Apply cumulative penalty for multiple issues
151
+ total_issues = marketing_count + noise_count
152
+ if total_issues >= 5
153
+ cumulative_penalty = (total_issues - 4) * 0.05 # Additional penalty for many issues
154
+ score -= cumulative_penalty
155
+ puts "Debug: Cumulative penalty for #{total_issues} issues: -#{cumulative_penalty}" if ENV["DEBUG"]
156
+ end
157
+
158
+ # Ensure score is within bounds
159
+ final_score = [[score, 0.0].max, 1.0].min
160
+ if ENV["DEBUG"]
161
+ puts "Debug: Final relevance score: #{final_score} (marketing: #{marketing_count}, noise: #{noise_count})"
162
+ end
163
+ final_score
164
+ end
165
+ end
166
+ end
167
+ end