RubyGems - rubocop-prompt - Versions diffs - 0.1.0 - Mend

rubocop-prompt 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/.rubocop.yml +18 -0
data/AGENTS.md +37 -0
data/CHANGELOG.md +23 -0
data/CODE_OF_CONDUCT.md +132 -0
data/LICENSE.txt +21 -0
data/README.md +604 -0
data/Rakefile +12 -0
data/config/default.yml +31 -0
data/docs/development-guidelines.md +31 -0
data/docs/project-overview.md +32 -0
data/docs/rubocop-integration.md +26 -0
data/lib/rubocop/cop/prompt/critical_first_last.rb +146 -0
data/lib/rubocop/cop/prompt/invalid_format.rb +95 -0
data/lib/rubocop/cop/prompt/max_tokens.rb +111 -0
data/lib/rubocop/cop/prompt/missing_stop.rb +145 -0
data/lib/rubocop/cop/prompt/system_injection.rb +99 -0
data/lib/rubocop/cop/prompt/temperature_range.rb +235 -0
data/lib/rubocop/prompt/plugin.rb +31 -0
data/lib/rubocop/prompt/version.rb +7 -0
data/lib/rubocop/prompt.rb +16 -0
data/sig/rubocop/prompt.rbs +6 -0
metadata +138 -0

data/docs/rubocop-integration.md ADDED Viewed

@@ -0,0 +1,26 @@
+# RuboCop Integration
+## Extension Architecture
+This gem extends RuboCop functionality by providing prompt-based features for enhanced code analysis.
+## Key Components
+- **Prompt Module**: Core functionality for prompt-based interactions
+- **RuboCop Integration**: Seamless integration with RuboCop's analyzer
+- **Configuration**: Customizable settings for prompt behavior
+## Implementation Notes
+- Follow RuboCop's plugin architecture
+- Extend existing RuboCop classes where appropriate
+- Maintain compatibility with different RuboCop versions
+- Provide clear error messages and user feedback
+## Configuration
+- Support YAML-based configuration
+- Allow customization of prompt behavior
+- Integrate with existing `.rubocop.yml` files
+## Testing RuboCop Extensions
+- Test against multiple RuboCop versions
+- Include integration tests with real Ruby code
+- Mock RuboCop internals where necessary
+- Test configuration loading and validation

data/lib/rubocop/cop/prompt/critical_first_last.rb ADDED Viewed

@@ -0,0 +1,146 @@
+# frozen_string_literal: true
+require "rubocop"
+module RuboCop
+  module Cop
+    module Prompt
+      # Checks that labeled sections (### Text) appear at the beginning or end of files,
+      # not in the middle.
+      #
+      # This cop identifies code in classes, modules, or methods with "prompt" in their names
+      # and ensures that any labeled sections (lines starting with ###) are positioned
+      # at the beginning or end of the content, not in the middle sections.
+      #
+      # @example
+      #   # bad
+      #   system: <<~PROMPT
+      #     # System Instructions
+      #     You are an AI assistant.
+      #     ### Important Note
+      #     Please follow these guidelines.
+      #     More instructions here.
+      #   PROMPT
+      #
+      #   # good
+      #   system: <<~PROMPT
+      #     ### Important Note
+      #     Please follow these guidelines.
+      #     # System Instructions
+      #     You are an AI assistant.
+      #   PROMPT
+      class CriticalFirstLast < RuboCop::Cop::Base
+        MSG = "Labeled sections (### text) should appear at the beginning or end, not in the middle"
+        def on_pair(node)
+          return unless system_pair?(node)
+          return unless in_prompt_context?(node)
+          value_node = node.children[1]
+          content = extract_content(value_node)
+          return if content.nil? || content.strip.empty?
+          check_labeled_sections(node, content)
+        end
+        def on_str(node)
+          return unless in_prompt_context?(node)
+          return if node.each_ancestor(:pair).any? { |ancestor| system_pair?(ancestor) }
+          content = node.children[0]
+          return if content.nil? || content.strip.empty?
+          check_labeled_sections(node, content)
+        end
+        def on_dstr(node)
+          return unless in_prompt_context?(node)
+          return if node.each_ancestor(:pair).any? { |ancestor| system_pair?(ancestor) }
+          content = extract_content(node)
+          return if content.nil? || content.strip.empty?
+          check_labeled_sections(node, content)
+        end
+        private
+        def system_pair?(node)
+          return false unless node.type == :pair
+          key_node = node.children[0]
+          key_node.type == :sym && key_node.children[0] == :system
+        end
+        def in_prompt_context?(node)
+          # Check if we're inside a class, module, or method that contains "prompt"
+          node.each_ancestor(:class, :module, :def, :defs) do |ancestor|
+            return true if has_prompt_in_name?(ancestor)
+          end
+          false
+        end
+        def has_prompt_in_name?(node)
+          case node.type
+          when :class, :module
+            name_node = node.children[0]
+            if name_node.type == :const
+              name_node.children[1].to_s.downcase.include?("prompt")
+            else
+              false
+            end
+          when :def, :defs
+            node.method_name.to_s.downcase.include?("prompt")
+          else
+            false
+          end
+        end
+        def extract_content(node)
+          case node.type
+          when :str
+            node.children[0]
+          when :dstr
+            # Handle heredoc content
+            node.children.filter_map do |child|
+              child.children[0] if child.type == :str
+            end.join
+          end
+        end
+        def check_labeled_sections(node, content)
+          # Normalize line endings and split into lines
+          normalized_content = content.gsub(/\\n/, "\n")
+          lines = normalized_content.split("\n").map(&:strip).reject(&:empty?)
+          # Find all lines that start with ###
+          labeled_sections = []
+          lines.each_with_index do |line, index|
+            if line.match?(/^###\s+.+/)
+              labeled_sections << index
+            end
+          end
+          return if labeled_sections.empty?
+          # Check if any labeled sections are in the middle
+          total_lines = lines.size
+          return if total_lines <= 6 # Need at least 7 lines to have meaningful middle
+          # Define first third and last third boundaries
+          first_third = [(total_lines / 3.0).ceil, 2].max
+          last_third = total_lines - [(total_lines / 3.0).ceil, 2].max
+          middle_sections = labeled_sections.select do |line_index|
+            line_index >= first_third && line_index < last_third
+          end
+          return if middle_sections.empty?
+          add_offense(node)
+        end
+      end
+    end
+  end
+end

data/lib/rubocop/cop/prompt/invalid_format.rb ADDED Viewed

@@ -0,0 +1,95 @@
+# frozen_string_literal: true
+require "rubocop"
+module RuboCop
+  module Cop
+    module Prompt
+      # Checks that system: blocks start with a Markdown heading.
+      #
+      # This cop identifies code in classes, modules, or methods with "prompt" in their names
+      # and ensures that any system: blocks begin with a Markdown heading (# text).
+      #
+      # @example
+      #   # bad
+      #   system: <<~PROMPT
+      #     You are an AI assistant.
+      #   PROMPT
+      #
+      #   # good
+      #   system: <<~PROMPT
+      #     # System Instructions
+      #     You are an AI assistant.
+      #   PROMPT
+      class InvalidFormat < RuboCop::Cop::Base
+        MSG = "system: block should start with a Markdown heading (# text)"
+        def on_pair(node)
+          return unless system_pair?(node)
+          return unless in_prompt_context?(node)
+          value_node = node.children[1]
+          content = extract_content(value_node)
+          return if content.nil? || content.strip.empty?
+          return if starts_with_markdown_heading?(content)
+          add_offense(node)
+        end
+        private
+        def system_pair?(node)
+          return false unless node.type == :pair
+          key_node = node.children[0]
+          key_node.type == :sym && key_node.children[0] == :system
+        end
+        def in_prompt_context?(node)
+          # Check if we're inside a class, module, or method that contains "prompt"
+          node.each_ancestor(:class, :module, :def, :defs) do |ancestor|
+            return true if has_prompt_in_name?(ancestor)
+          end
+          false
+        end
+        def has_prompt_in_name?(node)
+          case node.type
+          when :class, :module
+            name_node = node.children[0]
+            if name_node.type == :const
+              name_node.children[1].to_s.downcase.include?("prompt")
+            else
+              false
+            end
+          when :def, :defs
+            node.method_name.to_s.downcase.include?("prompt")
+          else
+            false
+          end
+        end
+        def extract_content(node)
+          case node.type
+          when :str
+            node.children[0]
+          when :dstr
+            # Handle heredoc content
+            node.children.filter_map do |child|
+              child.children[0] if child.type == :str
+            end.join
+          end
+        end
+        def starts_with_markdown_heading?(content)
+          # Remove leading whitespace and check if it starts with #
+          trimmed = content.gsub("\\n", "\n").strip
+          # Check if the first non-empty line starts with # followed by space and text
+          first_line = trimmed.lines.first&.strip
+          first_line&.match?(/^#\s+.+/)
+        end
+      end
+    end
+  end
+end

data/lib/rubocop/cop/prompt/max_tokens.rb ADDED Viewed

@@ -0,0 +1,111 @@
+# frozen_string_literal: true
+require "rubocop"
+require "tiktoken_ruby"
+module RuboCop
+  module Cop
+    module Prompt
+      # Checks that documentation text in prompt-related code doesn't exceed the maximum token limit.
+      #
+      # This cop identifies code in classes, modules, or methods with "prompt" in their names
+      # and calculates the token count for any string literals or heredoc content using tiktoken_ruby.
+      # By default, it warns when the content exceeds 4000 tokens.
+      #
+      # @example
+      #   # bad (assuming very long content that exceeds token limit)
+      #   def generate_prompt
+      #     <<~PROMPT
+      #       # This is a very long prompt that contains thousands of tokens...
+      #       # [many lines of text]
+      #     PROMPT
+      #   end
+      #
+      #   # good
+      #   def generate_prompt
+      #     <<~PROMPT
+      #       # A concise prompt that stays within token limits
+      #       You are a helpful assistant.
+      #     PROMPT
+      #   end
+      class MaxTokens < RuboCop::Cop::Base
+        MSG = "Documentation text exceeds maximum token limit (%<actual>d > %<max>d tokens)"
+        # Default maximum token count
+        DEFAULT_MAX_TOKENS = 4000
+        def on_str(node)
+          return unless in_prompt_context?(node)
+          content = node.children[0]
+          return if content.nil? || content.strip.empty?
+          check_token_count(node, content)
+        end
+        def on_dstr(node)
+          return unless in_prompt_context?(node)
+          # Handle heredoc content
+          content = node.children.filter_map do |child|
+            child.children[0] if child.type == :str
+          end.join
+          return if content.strip.empty?
+          check_token_count(node, content)
+        end
+        private
+        def check_token_count(node, content)
+          token_count = calculate_tokens(content)
+          max_tokens = cop_config["MaxTokens"] || DEFAULT_MAX_TOKENS
+          return unless token_count > max_tokens
+          add_offense(
+            node,
+            message: format(MSG, actual: token_count, max: max_tokens)
+          )
+        end
+        def calculate_tokens(content)
+          # Use tiktoken_ruby to calculate token count
+          # Using cl100k_base encoding (used by GPT-3.5/GPT-4)
+          encoder = Tiktoken.get_encoding("cl100k_base")
+          encoder.encode(content).length
+        rescue StandardError => e
+          # If tiktoken_ruby fails for any reason, fall back to character count / 4
+          # This is a rough approximation: 1 token ≈ 4 characters for English text
+          warn "Failed to calculate tokens with tiktoken_ruby: #{e.message}. Using character approximation."
+          content.length / 4
+        end
+        def in_prompt_context?(node)
+          # Check if we're inside a class, module, or method that contains "prompt"
+          node.each_ancestor(:class, :module, :def, :defs) do |ancestor|
+            return true if has_prompt_in_name?(ancestor)
+          end
+          false
+        end
+        def has_prompt_in_name?(node)
+          case node.type
+          when :class, :module
+            name_node = node.children[0]
+            if name_node.type == :const
+              name_node.children[1].to_s.downcase.include?("prompt")
+            else
+              false
+            end
+          when :def, :defs
+            node.method_name.to_s.downcase.include?("prompt")
+          else
+            false
+          end
+        end
+      end
+    end
+  end
+end

data/lib/rubocop/cop/prompt/missing_stop.rb ADDED Viewed

@@ -0,0 +1,145 @@
+# frozen_string_literal: true
+require "rubocop"
+module RuboCop
+  module Cop
+    module Prompt
+      # Checks for missing stop tokens or max_tokens in OpenAI::Client.chat calls.
+      #
+      # This cop identifies OpenAI::Client.chat method calls and ensures they include
+      # either stop: or max_tokens: parameters to prevent runaway generation and
+      # ensure predictable behavior.
+      #
+      # @example
+      #   # bad
+      #   OpenAI::Client.new.chat(
+      #     parameters: {
+      #       model: "gpt-4",
+      #       messages: [{ role: "user", content: "Hello" }]
+      #     }
+      #   )
+      #
+      #   # bad
+      #   client.chat(
+      #     parameters: {
+      #       model: "gpt-4",
+      #       messages: messages
+      #     }
+      #   )
+      #
+      #   # good
+      #   OpenAI::Client.new.chat(
+      #     parameters: {
+      #       model: "gpt-4",
+      #       messages: [{ role: "user", content: "Hello" }],
+      #       max_tokens: 100
+      #     }
+      #   )
+      #
+      #   # good
+      #   client.chat(
+      #     parameters: {
+      #       model: "gpt-4",
+      #       messages: messages,
+      #       stop: ["END", "\n"]
+      #     }
+      #   )
+      #
+      #   # good
+      #   client.chat(
+      #     parameters: {
+      #       model: "gpt-4",
+      #       messages: messages,
+      #       max_tokens: 1000,
+      #       stop: ["END"]
+      #     }
+      #   )
+      class MissingStop < RuboCop::Cop::Base
+        MSG = "OpenAI::Client.chat call should include 'stop:' or 'max_tokens:' parameter to prevent runaway generation"
+        def on_send(node)
+          return unless openai_chat_call?(node)
+          parameters_hash = extract_parameters_hash(node)
+          return unless parameters_hash
+          return if has_stop_or_max_tokens?(parameters_hash)
+          add_offense(node)
+        end
+        private
+        def openai_chat_call?(node)
+          return false unless node.method_name == :chat
+          # Check if this is called on OpenAI::Client instance
+          # This could be either:
+          # 1. OpenAI::Client.new.chat
+          # 2. client.chat (where client is an OpenAI::Client instance)
+          receiver = node.receiver
+          return false unless receiver
+          # Case 1: OpenAI::Client.new.chat
+          return openai_client_const?(receiver.receiver) if receiver.type == :send && receiver.method_name == :new
+          # Case 2: For now, we'll be conservative and only check explicit OpenAI::Client calls
+          # to avoid false positives. In the future, this could be enhanced with more
+          # sophisticated type analysis.
+          false
+        end
+        def openai_client_const?(node)
+          return false unless node&.type == :const
+          # Check for OpenAI::Client constant
+          # The AST structure is: s(:const, s(:const, nil, :OpenAI), :Client)
+          if node.children[0]&.type == :const
+            outer_const = node.children[0]
+            # Check if it's s(:const, nil, :OpenAI) and current is :Client
+            outer_const.children[0].nil? && outer_const.children[1] == :OpenAI && node.children[1] == :Client
+          else
+            false
+          end
+        end
+        def extract_parameters_hash(node)
+          # Look for parameters: { ... } in the method arguments
+          node.arguments.each do |arg|
+            next unless arg.type == :hash
+            arg.children.each do |pair|
+              next unless pair.type == :pair
+              key_node = pair.children[0]
+              value_node = pair.children[1]
+              next unless key_node.type == :sym && key_node.children[0] == :parameters
+              return value_node if value_node.type == :hash
+              # If parameters is not a hash (e.g., a variable), we can't analyze it
+              return nil
+            end
+          end
+          nil
+        end
+        def has_stop_or_max_tokens?(hash_node)
+          return false unless hash_node.type == :hash
+          hash_node.children.any? do |pair|
+            next false unless pair.type == :pair
+            key_node = pair.children[0]
+            next false unless key_node.type == :sym
+            key_name = key_node.children[0]
+            %i[stop max_tokens].include?(key_name)
+          end
+        end
+      end
+    end
+  end
+end

data/lib/rubocop/cop/prompt/system_injection.rb ADDED Viewed

@@ -0,0 +1,99 @@
+# frozen_string_literal: true
+require "rubocop"
+module RuboCop
+  module Cop
+    module Prompt
+      # Checks for dynamic variable interpolation in SYSTEM heredocs.
+      #
+      # This cop identifies code in classes, modules, or methods with "prompt" in their names
+      # and ensures that SYSTEM heredocs do not contain dynamic variable interpolations like #{user_msg}.
+      # Dynamic interpolation in system prompts can lead to prompt injection vulnerabilities.
+      #
+      # @example
+      #   # bad
+      #   <<~SYSTEM
+      #     You are an AI assistant. The user said: #{user_msg}
+      #   SYSTEM
+      #
+      #   # bad
+      #   <<~SYSTEM
+      #     Process this request: #{params[:input]}
+      #   SYSTEM
+      #
+      #   # good
+      #   <<~SYSTEM
+      #     You are an AI assistant.
+      #   SYSTEM
+      #
+      #   # good (using separate user message)
+      #   system_prompt = <<~SYSTEM
+      #     You are an AI assistant.
+      #   SYSTEM
+      #   user_message = user_msg
+      class SystemInjection < RuboCop::Cop::Base
+        MSG = "Avoid dynamic interpolation in SYSTEM heredocs to prevent prompt injection vulnerabilities"
+        def on_dstr(node)
+          return unless in_prompt_context?(node)
+          return unless system_heredoc?(node)
+          return unless has_interpolation?(node)
+          add_offense(node)
+        end
+        private
+        def in_prompt_context?(node)
+          # Check if we're inside a class, module, or method that contains "prompt"
+          node.each_ancestor(:class, :module, :def, :defs) do |ancestor|
+            return true if has_prompt_in_name?(ancestor)
+          end
+          false
+        end
+        def has_prompt_in_name?(node)
+          case node.type
+          when :class, :module
+            name_node = node.children[0]
+            if name_node.type == :const
+              name_node.children[1].to_s.downcase.include?("prompt")
+            else
+              false
+            end
+          when :def, :defs
+            node.method_name.to_s.downcase.include?("prompt")
+          else
+            false
+          end
+        end
+        def system_heredoc?(node)
+          return false unless node.type == :dstr
+          # Check if this heredoc has the SYSTEM delimiter
+          # Get the source of the heredoc opening
+          source = node.source_range.source_buffer.source
+          line_start = node.source_range.begin_pos
+          # Find the start of the line containing the heredoc
+          line_begin = source.rindex("\n", line_start - 1) || 0
+          line_begin += 1 if line_begin > 0
+          # Get the line content
+          line_end = source.index("\n", line_start) || source.length
+          line_content = source[line_begin...line_end]
+          # Check if line contains SYSTEM heredoc marker
+          line_content.include?("<<~SYSTEM") || line_content.include?("<<SYSTEM")
+        end
+        def has_interpolation?(node)
+          # Check if any child nodes are interpolations (begin nodes)
+          node.children.any? { |child| child.type == :begin }
+        end
+      end
+    end
+  end
+end