RubyGems - rails_mcp_code_search - Versions diffs - 0.1.0 - Mend

rails_mcp_code_search 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +7 -0
data/LICENSE +21 -0
data/README.md +176 -0
data/Rakefile +10 -0
data/exe/rails-mcp-code-search +5 -0
data/lib/rails_mcp_code_search/background_worker.rb +150 -0
data/lib/rails_mcp_code_search/chunk.rb +7 -0
data/lib/rails_mcp_code_search/database.rb +96 -0
data/lib/rails_mcp_code_search/embeddings/adapter.rb +13 -0
data/lib/rails_mcp_code_search/embeddings/local_adapter.rb +29 -0
data/lib/rails_mcp_code_search/embeddings/openai_adapter.rb +59 -0
data/lib/rails_mcp_code_search/erb_parser.rb +89 -0
data/lib/rails_mcp_code_search/indexer.rb +172 -0
data/lib/rails_mcp_code_search/ruby_parser.rb +104 -0
data/lib/rails_mcp_code_search/runtime.rb +63 -0
data/lib/rails_mcp_code_search/server.rb +25 -0
data/lib/rails_mcp_code_search/sliding_window_parser.rb +39 -0
data/lib/rails_mcp_code_search/tools/base_tool.rb +26 -0
data/lib/rails_mcp_code_search/tools/reindex_tool.rb +51 -0
data/lib/rails_mcp_code_search/tools/search_tool.rb +128 -0
data/lib/rails_mcp_code_search/tools/status_tool.rb +64 -0
data/lib/rails_mcp_code_search/version.rb +3 -0
data/lib/rails_mcp_code_search.rb +26 -0
metadata +186 -0

data/lib/rails_mcp_code_search/indexer.rb ADDED Viewed

@@ -0,0 +1,172 @@
+require "open3"
+require "digest"
+module RailsMcpCodeSearch
+  class Indexer
+    INCLUDE_PATTERNS = %w[**/*.rb **/*.erb **/*.js **/*.ts **/*.yml **/*.yaml **/*.md].freeze
+    EXCLUDE_PATTERNS = %w[vendor/ node_modules/ tmp/ log/ .git/].freeze
+    BATCH_SIZE = 50
+    NotAGitRepo = Class.new(StandardError)
+    attr_reader :errors
+    def initialize(embedding_adapter:, project_path: Dir.pwd, logger: nil)
+      @embedding_adapter = embedding_adapter
+      @project_path = File.realpath(project_path)
+      @logger = logger
+      @errors = []
+    end
+    def index_all
+      @errors = []
+      files = discover_files
+      return if files.empty?
+      process_files(files)
+      update_metadata
+    end
+    def index_files(file_paths)
+      @errors = []
+      safe_paths = file_paths.select { valid_path?(_1) }
+      process_files(safe_paths)
+    end
+    def changed_files
+      @_changed_files_cache ||= {}
+      now = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+      if @_changed_files_cache[:at] && (now - @_changed_files_cache[:at]) < 3
+        return @_changed_files_cache[:files]
+      end
+      stdout, _, status = Open3.capture3("git", "diff", "--name-only", "HEAD", chdir: @project_path)
+      files = status.success? ? stdout.lines.map(&:strip).select { valid_path?(_1) } : []
+      @_changed_files_cache = { files:, at: now }
+      files
+    end
+    def discover_files
+      tracked = git_ls_files
+      untracked = git_ls_files("--others", "--exclude-standard")
+      all_files = (tracked + untracked).uniq
+      all_files.select { include_file?(_1) && valid_path?(_1) }
+    end
+    private
+    def git_ls_files(*args)
+      stdout, stderr, status = Open3.capture3("git", "ls-files", *args, chdir: @project_path)
+      raise NotAGitRepo, "Not a git repository: #{@project_path}" unless status.success?
+      stdout.lines.map(&:strip)
+    end
+    def include_file?(path)
+      return false if EXCLUDE_PATTERNS.any? { path.start_with?(_1) || path.include?("/#{_1}") }
+      INCLUDE_PATTERNS.any? { File.fnmatch?(_1, path, File::FNM_PATHNAME) }
+    end
+    def valid_path?(path)
+      full_path = File.join(@project_path, path)
+      return false unless File.exist?(full_path)
+      real = File.realpath(full_path)
+      real.start_with?(@project_path)
+    rescue Errno::ENOENT
+      false
+    end
+    def process_files(files)
+      # Remove chunks for deleted files
+      existing_paths = Chunk.distinct.pluck(:file_path)
+      deleted = existing_paths - files
+      Chunk.where(file_path: deleted).delete_all if deleted.any?
+      chunks_to_embed = []
+      files.each do |file_path|
+        full_path = File.join(@project_path, file_path)
+        source = File.read(full_path, encoding: "utf-8")
+        unless source.valid_encoding?
+          @errors << { file: file_path, error: "Invalid UTF-8 encoding" }
+          next
+        end
+        file_checksum = Digest::SHA256.hexdigest(source)
+        # Skip unchanged files
+        existing = Chunk.where(file_path:).first
+        next if existing && existing.checksum == file_checksum
+        # Remove old chunks for this file
+        Chunk.where(file_path:).delete_all
+        parsed = parse_file(file_path, source)
+        next if parsed.empty?
+        parsed.each do |result|
+          chunk_checksum = Digest::SHA256.hexdigest(result.content)
+          chunk = Chunk.create!(
+            file_path:,
+            line_start: result.line_start,
+            line_end: result.line_end,
+            chunk_type: result.chunk_type,
+            qualified_name: result.qualified_name,
+            content: result.content,
+            checksum: file_checksum
+          )
+          chunks_to_embed << chunk
+        end
+        # Batch embed
+        if chunks_to_embed.size >= BATCH_SIZE
+          embed_batch(chunks_to_embed)
+          chunks_to_embed = []
+        end
+      rescue => e
+        @errors << { file: file_path, error: e.message }
+        log(:warn, "Error indexing #{file_path}: #{e.message}")
+      end
+      embed_batch(chunks_to_embed) if chunks_to_embed.any?
+    end
+    def parse_file(file_path, source)
+      if file_path.end_with?(".rb")
+        RubyParser.parse(source, file_path:)
+      elsif file_path.end_with?(".erb")
+        ErbParser.parse(source, file_path:)
+      else
+        SlidingWindowParser.parse(source, file_path:)
+      end
+    end
+    def embed_batch(chunks)
+      return if chunks.empty?
+      texts = chunks.map(&:content)
+      vectors = @embedding_adapter.embed(texts)
+      chunks.each_with_index do |chunk, i|
+        chunk.update!(embedding: vectors[i])
+      end
+      GC.start
+    rescue => e
+      @errors << { file: "batch_embed", error: e.message }
+      log(:warn, "Embedding batch failed: #{e.message}")
+    end
+    def update_metadata
+      Database::Metadata.set "last_reindex_at", Time.now.iso8601
+      Database::Metadata.set "embedding_provider", @embedding_adapter.class.name.split("::").last
+      Database::Metadata.set "embedding_dimensions", @embedding_adapter.dimensions
+    end
+    def log(level, message)
+      @logger&.send(level, message)
+    end
+  end
+end

data/lib/rails_mcp_code_search/ruby_parser.rb ADDED Viewed

@@ -0,0 +1,104 @@
+require "prism"
+module RailsMcpCodeSearch
+  class RubyParser
+    ParseError = Class.new(StandardError)
+    Result = Data.define(:content, :line_start, :line_end, :chunk_type, :qualified_name)
+    def self.parse(source, file_path: nil)
+      new.parse(source, file_path:)
+    end
+    def parse(source, file_path: nil)
+      result = Prism.parse(source)
+      raise ParseError, result.errors.map(&:message).join(", ") unless result.success?
+      visitor = Visitor.new(source)
+      visitor.visit(result.value)
+      visitor.chunks
+    rescue ParseError
+      SlidingWindowParser.parse(source, file_path:)
+    end
+    class Visitor < Prism::Visitor
+      attr_reader :chunks
+      def initialize(source)
+        super()
+        @source = source
+        @lines = source.lines
+        @scope_stack = []
+        @chunks = []
+      end
+      def visit_class_node(node)
+        visit_container(node, "class")
+      end
+      def visit_module_node(node)
+        visit_container(node, "module")
+      end
+      def visit_def_node(node)
+        name = node.name.to_s
+        qualified = build_qualified_name(name, instance_method: true)
+        add_chunk(node, "method", qualified)
+      end
+      def visit_singleton_class_node(node)
+        # Extract class methods defined inside `class << self`
+        @in_singleton = true
+        super
+        @in_singleton = false
+      end
+      private
+      def visit_container(node, type)
+        name = constant_name(node.constant_path)
+        @scope_stack.push(name)
+        qualified = @scope_stack.join("::")
+        line_start = node.location.start_line
+        line_end = node.location.end_line
+        content = @lines[(line_start - 1)..(line_end - 1)].join
+        @chunks << Result.new(content:, line_start:, line_end:, chunk_type: type, qualified_name: qualified)
+        visit_child_nodes(node)
+        @scope_stack.pop
+      end
+      def add_chunk(node, type, qualified_name)
+        line_start = node.location.start_line
+        line_end = node.location.end_line
+        content = @lines[(line_start - 1)..(line_end - 1)].join
+        @chunks << Result.new(content:, line_start:, line_end:, chunk_type: type, qualified_name:)
+      end
+      def build_qualified_name(method_name, instance_method: true)
+        prefix = @scope_stack.join("::")
+        separator = (@in_singleton ? "." : (instance_method ? "#" : "."))
+        prefix.empty? ? method_name : "#{prefix}#{separator}#{method_name}"
+      end
+      def constant_name(node)
+        case node
+        when Prism::ConstantReadNode
+          node.name.to_s
+        when Prism::ConstantPathNode
+          parts = []
+          current = node
+          while current.is_a?(Prism::ConstantPathNode)
+            parts.unshift(current.name.to_s)
+            current = current.parent
+          end
+          parts.unshift(current.name.to_s) if current.is_a?(Prism::ConstantReadNode)
+          parts.join("::")
+        else
+          node.to_s
+        end
+      end
+    end
+  end
+end

data/lib/rails_mcp_code_search/runtime.rb ADDED Viewed

@@ -0,0 +1,63 @@
+require "logger"
+module RailsMcpCodeSearch
+  class Runtime
+    attr_reader :db_path, :embedding_adapter, :indexer, :worker, :logger, :project_path
+    def self.boot(project_path: Dir.pwd, db_path: nil)
+      new(project_path:, db_path:).tap(&:boot)
+    end
+    def initialize(project_path: Dir.pwd, db_path: nil)
+      @project_path = project_path
+      @db_path = db_path
+      @logger = Logger.new($stderr, level: log_level)
+      @logger.formatter = proc { |severity, _time, _progname, msg| "[rails-mcp-code-search] #{severity}: #{msg}\n" }
+    end
+    def boot
+      @db_path = Database.setup(project_path: @project_path, db_path: @db_path)
+      @embedding_adapter = build_adapter
+      check_dimension_mismatch
+      @indexer = Indexer.new(embedding_adapter: @embedding_adapter, project_path: @project_path, logger: @logger)
+      @worker = BackgroundWorker.new(indexer: @indexer, logger: @logger)
+      @worker.start
+      setup_shutdown_hooks
+      @logger.info "Booted for #{@project_path}"
+    end
+    def shutdown
+      @worker&.stop
+      @logger.info "Shut down"
+    end
+    private
+    def build_adapter
+      case ENV.fetch("RAILS_MCP_CODE_SEARCH_PROVIDER", "local")
+      when "openai" then Embeddings::OpenaiAdapter.new
+      else Embeddings::LocalAdapter.new
+      end
+    end
+    def check_dimension_mismatch
+      stored = Database::Metadata.get("embedding_dimensions")&.to_i
+      return unless stored
+      return if stored == @embedding_adapter.dimensions
+      @logger.warn "Dimension mismatch (stored: #{stored}, active: #{@embedding_adapter.dimensions}). Triggering full reindex."
+      Chunk.delete_all
+      Database::Metadata.set "embedding_dimensions", @embedding_adapter.dimensions
+    end
+    def setup_shutdown_hooks
+      at_exit { shutdown }
+      trap("INT") { shutdown; exit }
+      trap("TERM") { shutdown; exit }
+    end
+    def log_level
+      ENV.fetch("RAILS_MCP_CODE_SEARCH_LOG_LEVEL", "info")
+    end
+  end
+end

data/lib/rails_mcp_code_search/server.rb ADDED Viewed

@@ -0,0 +1,25 @@
+require "mcp"
+module RailsMcpCodeSearch
+  class Server
+    TOOLS = [
+      Tools::ReindexTool,
+      Tools::SearchTool,
+      Tools::StatusTool
+    ].freeze
+    def self.start(project_path: Dir.pwd, db_path: nil)
+      runtime = Runtime.boot(project_path:, db_path:)
+      server = ::MCP::Server.new(
+        name: "rails-mcp-code-search",
+        version: VERSION,
+        tools: TOOLS,
+        server_context: { runtime: }
+      )
+      transport = ::MCP::Server::Transports::StdioTransport.new(server)
+      transport.open
+    end
+  end
+end

data/lib/rails_mcp_code_search/sliding_window_parser.rb ADDED Viewed

@@ -0,0 +1,39 @@
+module RailsMcpCodeSearch
+  class SlidingWindowParser
+    WINDOW_SIZE = 50
+    OVERLAP = 10
+    MAX_CHUNKS = 200
+    Result = RubyParser::Result
+    def self.parse(source, file_path: nil)
+      new.parse(source, file_path:)
+    end
+    def parse(source, file_path: nil)
+      lines = source.lines
+      return [] if lines.empty?
+      chunks = []
+      step = WINDOW_SIZE - OVERLAP
+      offset = 0
+      while offset < lines.size && chunks.size < MAX_CHUNKS
+        window_end = [ offset + WINDOW_SIZE, lines.size ].min
+        content = lines[offset...window_end].join
+        chunks << Result.new(
+          content:,
+          line_start: offset + 1,
+          line_end: window_end,
+          chunk_type: "window",
+          qualified_name: nil
+        )
+        offset += step
+      end
+      chunks
+    end
+  end
+end

data/lib/rails_mcp_code_search/tools/base_tool.rb ADDED Viewed

@@ -0,0 +1,26 @@
+require "mcp"
+require "json"
+module RailsMcpCodeSearch
+  module Tools
+    class BaseTool < ::MCP::Tool
+      class << self
+        private
+        def runtime_for(server_context:)
+          server_context[:runtime]
+        end
+        def text_response(data)
+          text = data.is_a?(String) ? data : JSON.generate(data)
+          ::MCP::Tool::Response.new([ { type: "text", text: } ])
+        end
+        def error_response(error:, message:, recoverable: false, suggested_action: nil)
+          data = { error:, message:, recoverable:, suggested_action: }.compact
+          ::MCP::Tool::Response.new([ { type: "text", text: JSON.generate(data) } ], error: true)
+        end
+      end
+    end
+  end
+end

data/lib/rails_mcp_code_search/tools/reindex_tool.rb ADDED Viewed

@@ -0,0 +1,51 @@
+module RailsMcpCodeSearch
+  module Tools
+    class ReindexTool < BaseTool
+      tool_name "reindex"
+      description "Trigger codebase reindex. Use full=true to rebuild the entire index. " \
+                  "Returns immediately — use the status tool to check progress."
+      input_schema(
+        properties: {
+          full: { type: "boolean", description: "Full reindex (default: incremental)" }
+        }
+      )
+      annotations(
+        title: "Reindex Code",
+        read_only_hint: false,
+        destructive_hint: false,
+        idempotent_hint: true,
+        open_world_hint: false
+      )
+      def self.call(server_context:, full: nil)
+        runtime = runtime_for(server_context:)
+        full = full == true
+        if full
+          runtime.worker.enqueue(:full_index)
+          runtime.worker.increment_reindex_count
+          estimated = runtime.indexer.discover_files.size rescue 0
+          text_response({
+            status: "reindex_started",
+            mode: "full",
+            estimated_files: estimated
+          })
+        else
+          changed = runtime.indexer.changed_files
+          if changed.empty?
+            text_response({ status: "no_changes", mode: "incremental", changed_files: 0 })
+          else
+            runtime.worker.enqueue(:index_files, payload: changed)
+            runtime.worker.increment_reindex_count
+            text_response({ status: "reindex_started", mode: "incremental", changed_files: changed.size })
+          end
+        end
+      rescue => e
+        error_response(error: "reindex_error", message: e.message, recoverable: true)
+      end
+    end
+  end
+end

data/lib/rails_mcp_code_search/tools/search_tool.rb ADDED Viewed

@@ -0,0 +1,128 @@
+module RailsMcpCodeSearch
+  module Tools
+    class SearchTool < BaseTool
+      tool_name "search"
+      description "Search the codebase using semantic similarity. Use this when you need to find " \
+                  "code by concept or behavior (e.g., 'authentication logic', 'payment processing') " \
+                  "rather than by exact identifier. For exact string matches, prefer Grep. " \
+                  "Returns code chunks ranked by cosine similarity. " \
+                  "Scores above 0.7 are typically strong matches, 0.5-0.7 are partial matches."
+      input_schema(
+        properties: {
+          query: { type: "string", description: "Search query (natural language or code)" },
+          limit: { type: "integer", description: "Max results (default 10)" },
+          file_pattern: { type: "string", description: "Glob pattern to filter results by file path (e.g. 'app/models/**/*.rb'). Applied after similarity search." }
+        },
+        required: %w[query]
+      )
+      annotations(
+        title: "Search Code",
+        read_only_hint: true,
+        destructive_hint: false,
+        idempotent_hint: true,
+        open_world_hint: false
+      )
+      def self.call(query:, server_context:, limit: nil, file_pattern: nil)
+        runtime = runtime_for(server_context:)
+        limit = (limit || 10).clamp(1, 50)
+        if Chunk.count == 0
+          worker_state = runtime.worker.state
+          if worker_state == :indexing
+            return error_response(error: "indexing_in_progress", message: "Index is still building. Try again in a moment.", recoverable: true, suggested_action: "status")
+          else
+            return error_response(error: "index_empty", message: "No files indexed yet. Call reindex first.", recoverable: true, suggested_action: "reindex")
+          end
+        end
+        # Smart reindex: enqueue changed files and wait briefly
+        trigger_smart_reindex(runtime)
+        # Generate query embedding
+        query_vector = runtime.embedding_adapter.embed([ query ]).first
+        # KNN search — over-fetch if filtering by file pattern
+        fetch_limit = file_pattern ? limit * 5 : limit
+        raw_results = Chunk.nearest_neighbors(:embedding, query_vector, distance: "cosine").first(fetch_limit)
+        # Filter by file pattern
+        filtered_out = 0
+        if file_pattern
+          before_count = raw_results.size
+          raw_results = raw_results.select { File.fnmatch?(file_pattern, _1.file_path, File::FNM_PATHNAME) }
+          filtered_out = before_count - raw_results.size
+        end
+        # Dedup overlapping results from same file
+        results = dedup_overlapping(raw_results)
+        results = results.first(limit)
+        # Track metrics
+        runtime.worker.enqueue_hit_counts(results.map(&:id))
+        runtime.worker.increment_search_count
+        stale = runtime.worker.state == :indexing
+        text_response({
+          results: results.map { format_result(_1) },
+          metadata: {
+            query:,
+            limit:,
+            count: results.size,
+            has_more: raw_results.size > limit,
+            index_state: runtime.worker.state.to_s,
+            index_completeness: runtime.worker.state == :idle ? 1.0 : runtime.worker.progress,
+            results_may_be_stale: stale,
+            total_indexed_chunks: Chunk.count,
+            filtered_out_count: filtered_out
+          }
+        })
+      rescue => e
+        error_response(error: "search_error", message: e.message, recoverable: true)
+      end
+      class << self
+        private
+        def trigger_smart_reindex(runtime)
+          changed = runtime.indexer.changed_files
+          return if changed.empty?
+          runtime.worker.enqueue(:index_files, payload: changed)
+          runtime.worker.wait_for_reindex(timeout: 0.2)
+        end
+        def dedup_overlapping(results)
+          seen = {}
+          results.reject do |r|
+            key = r.file_path
+            if seen[key]
+              overlap = seen[key].any? do |prev|
+                r.line_start <= prev.line_end && r.line_end >= prev.line_start
+              end
+              overlap
+            else
+              seen[key] = [ r ]
+              false
+            end.tap { seen[key] = (seen[key] || []) + [ r ] unless _1 }
+          end
+        end
+        def format_result(chunk)
+          {
+            file_path: chunk.file_path,
+            line_start: chunk.line_start,
+            line_end: chunk.line_end,
+            chunk_type: chunk.chunk_type,
+            qualified_name: chunk.qualified_name,
+            content: chunk.content,
+            similarity: (1.0 - chunk.neighbor_distance).round(4)
+          }
+        end
+      end
+    end
+  end
+end

data/lib/rails_mcp_code_search/tools/status_tool.rb ADDED Viewed

@@ -0,0 +1,64 @@
+module RailsMcpCodeSearch
+  module Tools
+    class StatusTool < BaseTool
+      tool_name "status"
+      description "Show index health and readiness. Use to check if indexing is complete " \
+                  "before searching, or to diagnose issues."
+      input_schema(properties: {})
+      annotations(
+        title: "Index Status",
+        read_only_hint: true,
+        destructive_hint: false,
+        idempotent_hint: true,
+        open_world_hint: false
+      )
+      def self.call(server_context:)
+        runtime = runtime_for(server_context:)
+        worker = runtime.worker
+        chunk_count = Chunk.count
+        file_count = Chunk.distinct.pluck(:file_path).size
+        state = if worker.state == :error
+          "error"
+        elsif worker.state == :indexing
+          "indexing"
+        elsif chunk_count == 0
+          "empty"
+        else
+          "ready"
+        end
+        db_size = File.size(runtime.db_path) rescue 0
+        top_chunks = Chunk.where("hit_count > 0").order(hit_count: :desc).limit(5).map do |c|
+          { file_path: c.file_path, qualified_name: c.qualified_name, hit_count: c.hit_count }
+        end
+        text_response({
+          state:,
+          chunk_count:,
+          file_count:,
+          db_size_bytes: db_size,
+          index_completeness: worker.state == :idle ? 1.0 : worker.progress,
+          embedding_provider: runtime.embedding_adapter.class.name.split("::").last.sub("Adapter", "").downcase,
+          embedding_dimensions: runtime.embedding_adapter.dimensions,
+          project_path: runtime.project_path,
+          indexing_errors: worker.errors.first(10),
+          stats: {
+            total_searches: Database::Metadata.get("total_searches").to_i,
+            total_reindexes: Database::Metadata.get("total_reindexes").to_i,
+            last_search_at: Database::Metadata.get("last_search_at"),
+            last_reindex_at: Database::Metadata.get("last_reindex_at")
+          },
+          top_chunks_by_hits: top_chunks
+        })
+      rescue => e
+        error_response(error: "status_error", message: e.message, recoverable: true)
+      end
+    end
+  end
+end

data/lib/rails_mcp_code_search/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module RailsMcpCodeSearch
+  VERSION = ENV.fetch("RAILS_MCP_CODE_SEARCH_VERSION", "0.0.0.dev")
+end