rubocop-prompt 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ # RuboCop Integration
2
+
3
+ ## Extension Architecture
4
+ This gem extends RuboCop functionality by providing prompt-based features for enhanced code analysis.
5
+
6
+ ## Key Components
7
+ - **Prompt Module**: Core functionality for prompt-based interactions
8
+ - **RuboCop Integration**: Seamless integration with RuboCop's analyzer
9
+ - **Configuration**: Customizable settings for prompt behavior
10
+
11
+ ## Implementation Notes
12
+ - Follow RuboCop's plugin architecture
13
+ - Extend existing RuboCop classes where appropriate
14
+ - Maintain compatibility with different RuboCop versions
15
+ - Provide clear error messages and user feedback
16
+
17
+ ## Configuration
18
+ - Support YAML-based configuration
19
+ - Allow customization of prompt behavior
20
+ - Integrate with existing `.rubocop.yml` files
21
+
22
+ ## Testing RuboCop Extensions
23
+ - Test against multiple RuboCop versions
24
+ - Include integration tests with real Ruby code
25
+ - Mock RuboCop internals where necessary
26
+ - Test configuration loading and validation
@@ -0,0 +1,146 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rubocop"
4
+
5
+ module RuboCop
6
+ module Cop
7
+ module Prompt
8
+ # Checks that labeled sections (### Text) appear at the beginning or end of files,
9
+ # not in the middle.
10
+ #
11
+ # This cop identifies code in classes, modules, or methods with "prompt" in their names
12
+ # and ensures that any labeled sections (lines starting with ###) are positioned
13
+ # at the beginning or end of the content, not in the middle sections.
14
+ #
15
+ # @example
16
+ # # bad
17
+ # system: <<~PROMPT
18
+ # # System Instructions
19
+ # You are an AI assistant.
20
+ # ### Important Note
21
+ # Please follow these guidelines.
22
+ # More instructions here.
23
+ # PROMPT
24
+ #
25
+ # # good
26
+ # system: <<~PROMPT
27
+ # ### Important Note
28
+ # Please follow these guidelines.
29
+ # # System Instructions
30
+ # You are an AI assistant.
31
+ # PROMPT
32
+ class CriticalFirstLast < RuboCop::Cop::Base
33
+ MSG = "Labeled sections (### text) should appear at the beginning or end, not in the middle"
34
+
35
+ def on_pair(node)
36
+ return unless system_pair?(node)
37
+ return unless in_prompt_context?(node)
38
+
39
+ value_node = node.children[1]
40
+ content = extract_content(value_node)
41
+
42
+ return if content.nil? || content.strip.empty?
43
+
44
+ check_labeled_sections(node, content)
45
+ end
46
+
47
+ def on_str(node)
48
+ return unless in_prompt_context?(node)
49
+ return if node.each_ancestor(:pair).any? { |ancestor| system_pair?(ancestor) }
50
+
51
+ content = node.children[0]
52
+ return if content.nil? || content.strip.empty?
53
+
54
+ check_labeled_sections(node, content)
55
+ end
56
+
57
+ def on_dstr(node)
58
+ return unless in_prompt_context?(node)
59
+ return if node.each_ancestor(:pair).any? { |ancestor| system_pair?(ancestor) }
60
+
61
+ content = extract_content(node)
62
+ return if content.nil? || content.strip.empty?
63
+
64
+ check_labeled_sections(node, content)
65
+ end
66
+
67
+ private
68
+
69
+ def system_pair?(node)
70
+ return false unless node.type == :pair
71
+
72
+ key_node = node.children[0]
73
+ key_node.type == :sym && key_node.children[0] == :system
74
+ end
75
+
76
+ def in_prompt_context?(node)
77
+ # Check if we're inside a class, module, or method that contains "prompt"
78
+ node.each_ancestor(:class, :module, :def, :defs) do |ancestor|
79
+ return true if has_prompt_in_name?(ancestor)
80
+ end
81
+ false
82
+ end
83
+
84
+ def has_prompt_in_name?(node)
85
+ case node.type
86
+ when :class, :module
87
+ name_node = node.children[0]
88
+ if name_node.type == :const
89
+ name_node.children[1].to_s.downcase.include?("prompt")
90
+ else
91
+ false
92
+ end
93
+ when :def, :defs
94
+ node.method_name.to_s.downcase.include?("prompt")
95
+ else
96
+ false
97
+ end
98
+ end
99
+
100
+ def extract_content(node)
101
+ case node.type
102
+ when :str
103
+ node.children[0]
104
+ when :dstr
105
+ # Handle heredoc content
106
+ node.children.filter_map do |child|
107
+ child.children[0] if child.type == :str
108
+ end.join
109
+ end
110
+ end
111
+
112
+ def check_labeled_sections(node, content)
113
+ # Normalize line endings and split into lines
114
+ normalized_content = content.gsub(/\\n/, "\n")
115
+ lines = normalized_content.split("\n").map(&:strip).reject(&:empty?)
116
+
117
+ # Find all lines that start with ###
118
+ labeled_sections = []
119
+ lines.each_with_index do |line, index|
120
+ if line.match?(/^###\s+.+/)
121
+ labeled_sections << index
122
+ end
123
+ end
124
+
125
+ return if labeled_sections.empty?
126
+
127
+ # Check if any labeled sections are in the middle
128
+ total_lines = lines.size
129
+ return if total_lines <= 6 # Need at least 7 lines to have meaningful middle
130
+
131
+ # Define first third and last third boundaries
132
+ first_third = [(total_lines / 3.0).ceil, 2].max
133
+ last_third = total_lines - [(total_lines / 3.0).ceil, 2].max
134
+
135
+ middle_sections = labeled_sections.select do |line_index|
136
+ line_index >= first_third && line_index < last_third
137
+ end
138
+
139
+ return if middle_sections.empty?
140
+
141
+ add_offense(node)
142
+ end
143
+ end
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,95 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rubocop"
4
+
5
+ module RuboCop
6
+ module Cop
7
+ module Prompt
8
+ # Checks that system: blocks start with a Markdown heading.
9
+ #
10
+ # This cop identifies code in classes, modules, or methods with "prompt" in their names
11
+ # and ensures that any system: blocks begin with a Markdown heading (# text).
12
+ #
13
+ # @example
14
+ # # bad
15
+ # system: <<~PROMPT
16
+ # You are an AI assistant.
17
+ # PROMPT
18
+ #
19
+ # # good
20
+ # system: <<~PROMPT
21
+ # # System Instructions
22
+ # You are an AI assistant.
23
+ # PROMPT
24
+ class InvalidFormat < RuboCop::Cop::Base
25
+ MSG = "system: block should start with a Markdown heading (# text)"
26
+
27
+ def on_pair(node)
28
+ return unless system_pair?(node)
29
+ return unless in_prompt_context?(node)
30
+
31
+ value_node = node.children[1]
32
+ content = extract_content(value_node)
33
+
34
+ return if content.nil? || content.strip.empty?
35
+ return if starts_with_markdown_heading?(content)
36
+
37
+ add_offense(node)
38
+ end
39
+
40
+ private
41
+
42
+ def system_pair?(node)
43
+ return false unless node.type == :pair
44
+
45
+ key_node = node.children[0]
46
+ key_node.type == :sym && key_node.children[0] == :system
47
+ end
48
+
49
+ def in_prompt_context?(node)
50
+ # Check if we're inside a class, module, or method that contains "prompt"
51
+ node.each_ancestor(:class, :module, :def, :defs) do |ancestor|
52
+ return true if has_prompt_in_name?(ancestor)
53
+ end
54
+ false
55
+ end
56
+
57
+ def has_prompt_in_name?(node)
58
+ case node.type
59
+ when :class, :module
60
+ name_node = node.children[0]
61
+ if name_node.type == :const
62
+ name_node.children[1].to_s.downcase.include?("prompt")
63
+ else
64
+ false
65
+ end
66
+ when :def, :defs
67
+ node.method_name.to_s.downcase.include?("prompt")
68
+ else
69
+ false
70
+ end
71
+ end
72
+
73
+ def extract_content(node)
74
+ case node.type
75
+ when :str
76
+ node.children[0]
77
+ when :dstr
78
+ # Handle heredoc content
79
+ node.children.filter_map do |child|
80
+ child.children[0] if child.type == :str
81
+ end.join
82
+ end
83
+ end
84
+
85
+ def starts_with_markdown_heading?(content)
86
+ # Remove leading whitespace and check if it starts with #
87
+ trimmed = content.gsub("\\n", "\n").strip
88
+ # Check if the first non-empty line starts with # followed by space and text
89
+ first_line = trimmed.lines.first&.strip
90
+ first_line&.match?(/^#\s+.+/)
91
+ end
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rubocop"
4
+ require "tiktoken_ruby"
5
+
6
+ module RuboCop
7
+ module Cop
8
+ module Prompt
9
+ # Checks that documentation text in prompt-related code doesn't exceed the maximum token limit.
10
+ #
11
+ # This cop identifies code in classes, modules, or methods with "prompt" in their names
12
+ # and calculates the token count for any string literals or heredoc content using tiktoken_ruby.
13
+ # By default, it warns when the content exceeds 4000 tokens.
14
+ #
15
+ # @example
16
+ # # bad (assuming very long content that exceeds token limit)
17
+ # def generate_prompt
18
+ # <<~PROMPT
19
+ # # This is a very long prompt that contains thousands of tokens...
20
+ # # [many lines of text]
21
+ # PROMPT
22
+ # end
23
+ #
24
+ # # good
25
+ # def generate_prompt
26
+ # <<~PROMPT
27
+ # # A concise prompt that stays within token limits
28
+ # You are a helpful assistant.
29
+ # PROMPT
30
+ # end
31
+ class MaxTokens < RuboCop::Cop::Base
32
+ MSG = "Documentation text exceeds maximum token limit (%<actual>d > %<max>d tokens)"
33
+
34
+ # Default maximum token count
35
+ DEFAULT_MAX_TOKENS = 4000
36
+
37
+ def on_str(node)
38
+ return unless in_prompt_context?(node)
39
+
40
+ content = node.children[0]
41
+ return if content.nil? || content.strip.empty?
42
+
43
+ check_token_count(node, content)
44
+ end
45
+
46
+ def on_dstr(node)
47
+ return unless in_prompt_context?(node)
48
+
49
+ # Handle heredoc content
50
+ content = node.children.filter_map do |child|
51
+ child.children[0] if child.type == :str
52
+ end.join
53
+
54
+ return if content.strip.empty?
55
+
56
+ check_token_count(node, content)
57
+ end
58
+
59
+ private
60
+
61
+ def check_token_count(node, content)
62
+ token_count = calculate_tokens(content)
63
+ max_tokens = cop_config["MaxTokens"] || DEFAULT_MAX_TOKENS
64
+
65
+ return unless token_count > max_tokens
66
+
67
+ add_offense(
68
+ node,
69
+ message: format(MSG, actual: token_count, max: max_tokens)
70
+ )
71
+ end
72
+
73
+ def calculate_tokens(content)
74
+ # Use tiktoken_ruby to calculate token count
75
+ # Using cl100k_base encoding (used by GPT-3.5/GPT-4)
76
+ encoder = Tiktoken.get_encoding("cl100k_base")
77
+ encoder.encode(content).length
78
+ rescue StandardError => e
79
+ # If tiktoken_ruby fails for any reason, fall back to character count / 4
80
+ # This is a rough approximation: 1 token ≈ 4 characters for English text
81
+ warn "Failed to calculate tokens with tiktoken_ruby: #{e.message}. Using character approximation."
82
+ content.length / 4
83
+ end
84
+
85
+ def in_prompt_context?(node)
86
+ # Check if we're inside a class, module, or method that contains "prompt"
87
+ node.each_ancestor(:class, :module, :def, :defs) do |ancestor|
88
+ return true if has_prompt_in_name?(ancestor)
89
+ end
90
+ false
91
+ end
92
+
93
+ def has_prompt_in_name?(node)
94
+ case node.type
95
+ when :class, :module
96
+ name_node = node.children[0]
97
+ if name_node.type == :const
98
+ name_node.children[1].to_s.downcase.include?("prompt")
99
+ else
100
+ false
101
+ end
102
+ when :def, :defs
103
+ node.method_name.to_s.downcase.include?("prompt")
104
+ else
105
+ false
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,145 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rubocop"
4
+
5
+ module RuboCop
6
+ module Cop
7
+ module Prompt
8
+ # Checks for missing stop tokens or max_tokens in OpenAI::Client.chat calls.
9
+ #
10
+ # This cop identifies OpenAI::Client.chat method calls and ensures they include
11
+ # either stop: or max_tokens: parameters to prevent runaway generation and
12
+ # ensure predictable behavior.
13
+ #
14
+ # @example
15
+ # # bad
16
+ # OpenAI::Client.new.chat(
17
+ # parameters: {
18
+ # model: "gpt-4",
19
+ # messages: [{ role: "user", content: "Hello" }]
20
+ # }
21
+ # )
22
+ #
23
+ # # bad
24
+ # client.chat(
25
+ # parameters: {
26
+ # model: "gpt-4",
27
+ # messages: messages
28
+ # }
29
+ # )
30
+ #
31
+ # # good
32
+ # OpenAI::Client.new.chat(
33
+ # parameters: {
34
+ # model: "gpt-4",
35
+ # messages: [{ role: "user", content: "Hello" }],
36
+ # max_tokens: 100
37
+ # }
38
+ # )
39
+ #
40
+ # # good
41
+ # client.chat(
42
+ # parameters: {
43
+ # model: "gpt-4",
44
+ # messages: messages,
45
+ # stop: ["END", "\n"]
46
+ # }
47
+ # )
48
+ #
49
+ # # good
50
+ # client.chat(
51
+ # parameters: {
52
+ # model: "gpt-4",
53
+ # messages: messages,
54
+ # max_tokens: 1000,
55
+ # stop: ["END"]
56
+ # }
57
+ # )
58
+ class MissingStop < RuboCop::Cop::Base
59
+ MSG = "OpenAI::Client.chat call should include 'stop:' or 'max_tokens:' parameter to prevent runaway generation"
60
+
61
+ def on_send(node)
62
+ return unless openai_chat_call?(node)
63
+
64
+ parameters_hash = extract_parameters_hash(node)
65
+ return unless parameters_hash
66
+
67
+ return if has_stop_or_max_tokens?(parameters_hash)
68
+
69
+ add_offense(node)
70
+ end
71
+
72
+ private
73
+
74
+ def openai_chat_call?(node)
75
+ return false unless node.method_name == :chat
76
+
77
+ # Check if this is called on OpenAI::Client instance
78
+ # This could be either:
79
+ # 1. OpenAI::Client.new.chat
80
+ # 2. client.chat (where client is an OpenAI::Client instance)
81
+ receiver = node.receiver
82
+ return false unless receiver
83
+
84
+ # Case 1: OpenAI::Client.new.chat
85
+ return openai_client_const?(receiver.receiver) if receiver.type == :send && receiver.method_name == :new
86
+
87
+ # Case 2: For now, we'll be conservative and only check explicit OpenAI::Client calls
88
+ # to avoid false positives. In the future, this could be enhanced with more
89
+ # sophisticated type analysis.
90
+ false
91
+ end
92
+
93
+ def openai_client_const?(node)
94
+ return false unless node&.type == :const
95
+
96
+ # Check for OpenAI::Client constant
97
+ # The AST structure is: s(:const, s(:const, nil, :OpenAI), :Client)
98
+ if node.children[0]&.type == :const
99
+ outer_const = node.children[0]
100
+ # Check if it's s(:const, nil, :OpenAI) and current is :Client
101
+ outer_const.children[0].nil? && outer_const.children[1] == :OpenAI && node.children[1] == :Client
102
+ else
103
+ false
104
+ end
105
+ end
106
+
107
+ def extract_parameters_hash(node)
108
+ # Look for parameters: { ... } in the method arguments
109
+ node.arguments.each do |arg|
110
+ next unless arg.type == :hash
111
+
112
+ arg.children.each do |pair|
113
+ next unless pair.type == :pair
114
+
115
+ key_node = pair.children[0]
116
+ value_node = pair.children[1]
117
+
118
+ next unless key_node.type == :sym && key_node.children[0] == :parameters
119
+ return value_node if value_node.type == :hash
120
+
121
+ # If parameters is not a hash (e.g., a variable), we can't analyze it
122
+ return nil
123
+ end
124
+ end
125
+
126
+ nil
127
+ end
128
+
129
+ def has_stop_or_max_tokens?(hash_node)
130
+ return false unless hash_node.type == :hash
131
+
132
+ hash_node.children.any? do |pair|
133
+ next false unless pair.type == :pair
134
+
135
+ key_node = pair.children[0]
136
+ next false unless key_node.type == :sym
137
+
138
+ key_name = key_node.children[0]
139
+ %i[stop max_tokens].include?(key_name)
140
+ end
141
+ end
142
+ end
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rubocop"
4
+
5
+ module RuboCop
6
+ module Cop
7
+ module Prompt
8
+ # Checks for dynamic variable interpolation in SYSTEM heredocs.
9
+ #
10
+ # This cop identifies code in classes, modules, or methods with "prompt" in their names
11
+ # and ensures that SYSTEM heredocs do not contain dynamic variable interpolations like #{user_msg}.
12
+ # Dynamic interpolation in system prompts can lead to prompt injection vulnerabilities.
13
+ #
14
+ # @example
15
+ # # bad
16
+ # <<~SYSTEM
17
+ # You are an AI assistant. The user said: #{user_msg}
18
+ # SYSTEM
19
+ #
20
+ # # bad
21
+ # <<~SYSTEM
22
+ # Process this request: #{params[:input]}
23
+ # SYSTEM
24
+ #
25
+ # # good
26
+ # <<~SYSTEM
27
+ # You are an AI assistant.
28
+ # SYSTEM
29
+ #
30
+ # # good (using separate user message)
31
+ # system_prompt = <<~SYSTEM
32
+ # You are an AI assistant.
33
+ # SYSTEM
34
+ # user_message = user_msg
35
+ class SystemInjection < RuboCop::Cop::Base
36
+ MSG = "Avoid dynamic interpolation in SYSTEM heredocs to prevent prompt injection vulnerabilities"
37
+
38
+ def on_dstr(node)
39
+ return unless in_prompt_context?(node)
40
+ return unless system_heredoc?(node)
41
+ return unless has_interpolation?(node)
42
+
43
+ add_offense(node)
44
+ end
45
+
46
+ private
47
+
48
+ def in_prompt_context?(node)
49
+ # Check if we're inside a class, module, or method that contains "prompt"
50
+ node.each_ancestor(:class, :module, :def, :defs) do |ancestor|
51
+ return true if has_prompt_in_name?(ancestor)
52
+ end
53
+ false
54
+ end
55
+
56
+ def has_prompt_in_name?(node)
57
+ case node.type
58
+ when :class, :module
59
+ name_node = node.children[0]
60
+ if name_node.type == :const
61
+ name_node.children[1].to_s.downcase.include?("prompt")
62
+ else
63
+ false
64
+ end
65
+ when :def, :defs
66
+ node.method_name.to_s.downcase.include?("prompt")
67
+ else
68
+ false
69
+ end
70
+ end
71
+
72
+ def system_heredoc?(node)
73
+ return false unless node.type == :dstr
74
+
75
+ # Check if this heredoc has the SYSTEM delimiter
76
+ # Get the source of the heredoc opening
77
+ source = node.source_range.source_buffer.source
78
+ line_start = node.source_range.begin_pos
79
+
80
+ # Find the start of the line containing the heredoc
81
+ line_begin = source.rindex("\n", line_start - 1) || 0
82
+ line_begin += 1 if line_begin > 0
83
+
84
+ # Get the line content
85
+ line_end = source.index("\n", line_start) || source.length
86
+ line_content = source[line_begin...line_end]
87
+
88
+ # Check if line contains SYSTEM heredoc marker
89
+ line_content.include?("<<~SYSTEM") || line_content.include?("<<SYSTEM")
90
+ end
91
+
92
+ def has_interpolation?(node)
93
+ # Check if any child nodes are interpolations (begin nodes)
94
+ node.children.any? { |child| child.type == :begin }
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end