RubyGems - llms-txt-ruby - Versions diffs - 0.0.0 → 0.1.0 - Mend

llms-txt-ruby 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +4 -4
data/.github/workflows/ci.yml +71 -0
data/.github/workflows/push.yml +35 -0
data/.gitignore +10 -1
data/.rubocop.yml +27 -0
data/.ruby-version +1 -0
data/Gemfile +10 -0
data/Gemfile.lock +88 -0
data/README.md +279 -30
data/Rakefile +10 -0
data/bin/llms-txt +242 -0
data/lib/llms_txt/bulk_transformer.rb +137 -0
data/lib/llms_txt/config.rb +113 -0
data/lib/llms_txt/errors.rb +31 -0
data/lib/llms_txt/generator.rb +234 -0
data/lib/llms_txt/markdown_transformer.rb +90 -0
data/lib/llms_txt/parser.rb +223 -0
data/lib/llms_txt/validator.rb +222 -0
data/lib/llms_txt/version.rb +6 -0
data/lib/llms_txt.rb +130 -0
data/llms-txt-ruby.gemspec +18 -9
data/llms-txt.yml.example +26 -0
data/mise.toml +2 -0
data/renovate.json +30 -0
metadata +115 -10
data/lib/llms-txt/version.rb +0 -3

data/lib/llms_txt/generator.rb ADDED Viewed

@@ -0,0 +1,234 @@
+# frozen_string_literal: true
+module LlmsTxt
+  # Simple generator that creates llms.txt from existing markdown documentation
+  #
+  # Takes a documentation directory or file and generates a properly formatted llms.txt file by
+  # analyzing markdown files, extracting titles and descriptions, and organizing them by priority.
+  #
+  # @example Generate from docs directory
+  #   generator = LlmsTxt::Generator.new('./docs', base_url: 'https://myproject.io')
+  #   content = generator.generate
+  #
+  # @api public
+  class Generator
+    # @return [String] path to documentation directory or file
+    attr_reader :docs_path
+    # @return [Hash] generation options
+    attr_reader :options
+    # Initialize a new generator
+    #
+    # @param docs_path [String] path to documentation directory or file
+    # @param options [Hash] generation options
+    # @option options [String] :base_url base URL for expanding relative links
+    # @option options [String] :title project title (overrides auto-detection)
+    # @option options [String] :description project description (overrides auto-detection)
+    # @option options [String] :output output file path for saving
+    # @option options [Boolean] :verbose enable verbose output
+    def initialize(docs_path, options = {})
+      @docs_path = docs_path
+      @options = options
+    end
+    # Generate llms.txt content from documentation
+    #
+    # Scans documentation files, extracts metadata, prioritizes them, and builds a formatted
+    # llms.txt file.
+    #
+    # @return [String] generated llms.txt content
+    def generate
+      docs = find_documentation_files
+      content = build_llms_txt(docs)
+      if output_path = options[:output]
+        File.write(output_path, content)
+      end
+      content
+    end
+    private
+    # Locates and analyzes documentation files from docs_path
+    #
+    # Handles both single file and directory paths
+    #
+    # @return [Array<Hash>] array of analyzed file metadata
+    def find_documentation_files
+      return [] unless File.exist?(docs_path)
+      if File.file?(docs_path)
+        [analyze_file(docs_path)]
+      else
+        find_markdown_files_in_directory
+      end
+    end
+    # Recursively finds and analyzes markdown files in directory
+    #
+    # Sorts by priority (README, guides, etc.) and skips hidden files
+    #
+    # @return [Array<Hash>] sorted array of analyzed file metadata
+    def find_markdown_files_in_directory
+      files = []
+      Find.find(docs_path) do |path|
+        next unless File.file?(path)
+        next unless path.match?(/\.md$/i)
+        next if File.basename(path).start_with?('.')
+        files << analyze_file(path)
+      end
+      files.sort_by { |f| f[:priority] }
+    end
+    # Extracts metadata from a documentation file
+    #
+    # Analyzes file content to extract title, description, and priority
+    #
+    # @param file_path [String] path to file to analyze
+    # @return [Hash] file metadata with :path, :title, :description, :priority
+    def analyze_file(file_path)
+      # Handle single file case differently
+      relative_path = if File.file?(docs_path)
+                       File.basename(file_path)
+                     else
+                       Pathname.new(file_path).relative_path_from(Pathname.new(docs_path)).to_s
+                     end
+      content = File.read(file_path)
+      {
+        path: relative_path,
+        title: extract_title(content, file_path),
+        description: extract_description(content),
+        priority: calculate_priority(file_path)
+      }
+    end
+    # Extracts title from file content or generates from filename
+    #
+    # Prefers first H1 header, falls back to formatted filename
+    #
+    # @param content [String] file content
+    # @param file_path [String] path to file
+    # @return [String] extracted or generated title
+    def extract_title(content, file_path)
+      # Try to extract title from first # header
+      if content.match(/^#\s+(.+)/)
+        $1.strip
+      else
+        # Use filename as fallback
+        File.basename(file_path, '.md').gsub(/[_-]/, ' ').split.map(&:capitalize).join(' ')
+      end
+    end
+    # Extracts description from file content
+    #
+    # Takes first paragraph after title, truncated to 200 characters
+    #
+    # @param content [String] file content
+    # @return [String] extracted description
+    def extract_description(content)
+      lines = content.lines
+      # Skip title line and empty lines
+      description_lines = lines.drop_while { |line| line.start_with?('#') || line.strip.empty? }
+      # Get first paragraph
+      first_paragraph = description_lines.take_while { |line| !line.strip.empty? }
+      first_paragraph.join(' ').strip.slice(0, 200)
+    end
+    # Assigns priority to file based on filename patterns
+    #
+    # README gets highest priority, followed by guides, tutorials, API docs
+    #
+    # @param file_path [String] path to file
+    # @return [Integer] priority value (1-7, lower is higher priority)
+    def calculate_priority(file_path)
+      basename = File.basename(file_path).downcase
+      return 1 if basename.start_with?('readme')
+      return 2 if basename.include?('getting')
+      return 3 if basename.include?('guide')
+      return 4 if basename.include?('tutorial')
+      return 5 if basename.include?('api')
+      return 6 if basename.include?('reference')
+      7 # default priority
+    end
+    # Constructs llms.txt content from analyzed documentation files
+    #
+    # Combines title, description, and documentation links into formatted output
+    #
+    # @param docs [Array<Hash>] analyzed file metadata
+    # @return [String] formatted llms.txt content
+    def build_llms_txt(docs)
+      title = options[:title] || detect_project_title(docs)
+      description = options[:description] || detect_project_description(docs)
+      content = []
+      content << "# #{title}"
+      content << ""
+      content << "> #{description}" if description
+      content << ""
+      if docs.any?
+        content << "## Documentation"
+        content << ""
+        docs.each do |doc|
+          url = build_url(doc[:path])
+          if doc[:description] && !doc[:description].empty?
+            content << "- [#{doc[:title]}](#{url}): #{doc[:description]}"
+          else
+            content << "- [#{doc[:title]}](#{url})"
+          end
+        end
+      end
+      content.join("\n") + "\n"
+    end
+    # Attempts to detect project title from README or directory name
+    #
+    # @param docs [Array<Hash>] analyzed file metadata
+    # @return [String] detected project title
+    def detect_project_title(docs)
+      readme = docs.find { |doc| doc[:path].downcase.include?('readme') }
+      return readme[:title] if readme
+      File.basename(File.expand_path('.'))
+    end
+    # Attempts to extract project description from README
+    #
+    # @param docs [Array<Hash>] analyzed file metadata
+    # @return [String, nil] detected project description or nil
+    def detect_project_description(docs)
+      readme = docs.find { |doc| doc[:path].downcase.include?('readme') }
+      return readme[:description] if readme&.fetch(:description, nil)
+      nil
+    end
+    # Constructs full URL from path using base_url option if provided
+    #
+    # @param path [String] relative path to file
+    # @return [String] full URL or relative path
+    def build_url(path)
+      if base_url = options[:base_url]
+        File.join(base_url, path)
+      else
+        path
+      end
+    end
+  end
+end

data/lib/llms_txt/markdown_transformer.rb ADDED Viewed

@@ -0,0 +1,90 @@
+# frozen_string_literal: true
+module LlmsTxt
+  # Transforms markdown files to be AI-friendly
+  #
+  # Processes individual markdown files to make them more suitable for LLM consumption by
+  # expanding relative links to absolute URLs and converting HTML URLs to markdown-friendly
+  # formats.
+  #
+  # @example Transform with base URL
+  #   transformer = LlmsTxt::MarkdownTransformer.new('README.md',
+  #     base_url: 'https://myproject.io'
+  #   )
+  #   content = transformer.transform
+  #
+  # @api public
+  class MarkdownTransformer
+    # @return [String] path to markdown file
+    attr_reader :file_path
+    # @return [Hash] transformation options
+    attr_reader :options
+    # Initialize a new markdown transformer
+    #
+    # @param file_path [String] path to markdown file to transform
+    # @param options [Hash] transformation options
+    # @option options [String] :base_url base URL for expanding relative links
+    # @option options [Boolean] :convert_urls convert HTML URLs to markdown format
+    def initialize(file_path, options = {})
+      @file_path = file_path
+      @options = options
+    end
+    # Transform markdown content to be AI-friendly
+    #
+    # Applies transformations to make the markdown more suitable for LLM processing:
+    # - Expands relative links to absolute URLs (if base_url provided)
+    # - Converts HTML URLs to markdown format (if convert_urls enabled)
+    #
+    # @return [String] transformed markdown content
+    def transform
+      content = File.read(file_path)
+      content = expand_relative_links(content) if options[:base_url]
+      content = convert_html_urls(content) if options[:convert_urls]
+      content
+    end
+    private
+    # Expand relative links to absolute URLs
+    #
+    # Converts markdown links like `[text](./path.md)` to `[text](https://base.url/path.md)`.
+    # Leaves absolute URLs and anchors unchanged.
+    #
+    # @param content [String] markdown content to process
+    # @return [String] content with expanded links
+    def expand_relative_links(content)
+      base_url = options[:base_url]
+      content.gsub(/\[([^\]]+)\]\(([^)]+)\)/) do |match|
+        text = $1
+        url = $2
+        if url.start_with?('http://', 'https://', '//', '#')
+          match # Already absolute or anchor
+        else
+          # Clean up relative path
+          clean_url = url.gsub(%r{^\./}, '') # Remove leading './'
+          expanded_url = File.join(base_url, clean_url)
+          "[#{text}](#{expanded_url})"
+        end
+      end
+    end
+    # Convert HTML URLs to markdown-friendly format
+    #
+    # Changes URLs ending in .html or .htm to .md for better LLM understanding
+    #
+    # @param content [String] markdown content to process
+    # @return [String] content with converted URLs
+    def convert_html_urls(content)
+      content.gsub(%r{https?://[^\s<>]+\.html?(?=[)\s]|$)}) do |url|
+        url.sub(/\.html?$/, '.md')
+      end
+    end
+  end
+end

data/lib/llms_txt/parser.rb ADDED Viewed

@@ -0,0 +1,223 @@
+# frozen_string_literal: true
+module LlmsTxt
+  # Parses llms.txt files into structured data
+  #
+  # Reads and parses llms.txt files according to the llms.txt specification,
+  # extracting the title, description, and structured sections (Documentation,
+  # Examples, Optional) with their links.
+  #
+  # @example Parse an llms.txt file
+  #   parser = LlmsTxt::Parser.new('llms.txt')
+  #   parsed = parser.parse
+  #   parsed.title              # => "My Project"
+  #   parsed.description        # => "Project description"
+  #   parsed.documentation_links # => [{title: "README", url: "...", description: "..."}]
+  #
+  # @api public
+  class Parser
+    # @return [String] path to the llms.txt file
+    attr_reader :file_path
+    # @return [String] raw content of the llms.txt file
+    attr_reader :content
+    # Initialize a new parser
+    #
+    # @param file_path [String] path to the llms.txt file to parse
+    def initialize(file_path)
+      @file_path = file_path
+      @content = File.read(file_path)
+    end
+    # Parse the llms.txt file
+    #
+    # Parses the file content and returns a {ParsedContent} object containing
+    # the extracted title, description, and structured sections with links.
+    #
+    # @return [ParsedContent] parsed content with title, description, and sections
+    def parse
+      sections = {}
+      current_section = nil
+      current_content = []
+      lines = content.lines
+      lines.each_with_index do |line, index|
+        if line.start_with?('# ')
+          save_section(sections, current_section, current_content) if current_section
+          sections[:title] = line[2..].strip if sections.empty?
+          current_section = :description if index == 1 && line.start_with?('> ')
+          current_content = []
+        elsif line.start_with?('> ') && sections[:title] && !sections[:description]
+          sections[:description] = line[2..].strip
+        elsif line.start_with?('## ')
+          save_section(sections, current_section, current_content) if current_section
+          current_section = line[3..].strip.downcase.gsub(/\s+/, '_').to_sym
+          current_content = []
+        elsif !line.strip.empty?
+          current_content << line
+        end
+      end
+      save_section(sections, current_section, current_content) if current_section
+      ParsedContent.new(sections)
+    end
+    private
+    # Parses and stores section content in the sections hash
+    #
+    # Skips empty sections and delegates to parse_section_content for processing
+    #
+    # @param sections [Hash] accumulator hash for sections
+    # @param section_name [Symbol] name of the section
+    # @param content [Array<String>] raw content lines
+    def save_section(sections, section_name, content)
+      return if content.empty?
+      sections[section_name] ||= []
+      sections[section_name] = parse_section_content(content.join)
+    end
+    # Extracts markdown links from section content into structured format
+    #
+    # Scans for markdown list items with links and descriptions. Returns raw content
+    # if no links are found in the expected format.
+    #
+    # @param content [String] raw section content
+    # @return [Array<Hash>, String] array of link hashes or raw content if no links found
+    def parse_section_content(content)
+      links = []
+      content.scan(/^[-*]\s*\[([^\]]+)\]\(([^)]+)\):\s*(.*)$/m) do |title, url, description|
+        links << {
+          title: title,
+          url: url,
+          description: description.strip
+        }
+      end
+      links.empty? ? content.strip : links
+    end
+  end
+  # Represents parsed llms.txt content with structured access to sections
+  #
+  # Provides convenient access to parsed llms.txt sections including title,
+  # description, and link collections. Can be converted to Hash or XML formats.
+  #
+  # @example Access parsed content
+  #   parsed.title              # => "My Project"
+  #   parsed.description        # => "A description"
+  #   parsed.documentation_links # => [{title: "...", url: "...", description: "..."}]
+  #   parsed.to_h               # => Hash representation
+  #   parsed.to_xml             # => XML string
+  #
+  # @api public
+  class ParsedContent
+    # @return [Hash] the parsed sections hash
+    attr_reader :sections
+    # Initialize parsed content
+    #
+    # @param sections [Hash] hash containing parsed sections (:title, :description, :documentation, etc.)
+    def initialize(sections)
+      @sections = sections
+    end
+    # Get the project title
+    #
+    # @return [String, nil] the H1 title or nil if not present
+    def title
+      sections[:title]
+    end
+    # Get the project description
+    #
+    # @return [String, nil] the description blockquote or nil if not present
+    def description
+      sections[:description]
+    end
+    # Get documentation links
+    #
+    # @return [Array<Hash>] array of documentation links with :title, :url, and :description
+    def documentation_links
+      sections[:documentation] || []
+    end
+    # Get example links
+    #
+    # @return [Array<Hash>] array of example links with :title, :url, and :description
+    def example_links
+      sections[:examples] || []
+    end
+    # Get optional links
+    #
+    # @return [Array<Hash>] array of optional links with :title, :url, and :description
+    def optional_links
+      sections[:optional] || []
+    end
+    # Convert to hash representation
+    #
+    # @return [Hash] hash containing all parsed sections
+    def to_h
+      sections
+    end
+    # Convert to XML representation
+    #
+    # Generates an XML document with all parsed sections and links.
+    #
+    # @return [String] XML string representation
+    def to_xml
+      builder = []
+      builder << '<?xml version="1.0" encoding="UTF-8"?>'
+      builder << '<llms_context>'
+      builder << "  <title>#{title}</title>" if title
+      builder << "  <description>#{description}</description>" if description
+      add_xml_section(builder, 'documentation', documentation_links)
+      add_xml_section(builder, 'examples', example_links)
+      add_xml_section(builder, 'optional', optional_links) if sections[:optional]
+      builder << '</llms_context>'
+      builder.join("\n")
+    end
+    private
+    # Appends section XML elements to builder array
+    #
+    # Handles both array of link hashes and raw string content
+    #
+    # @param builder [Array<String>] XML lines accumulator
+    # @param name [String] section name
+    # @param links [Array<Hash>, String] section links or content
+    def add_xml_section(builder, name, links)
+      return if links.empty?
+      builder << "  <#{name}>"
+      if links.is_a?(Array)
+        links.each do |link|
+          builder << '    <link>'
+          builder << "      <title>#{link[:title]}</title>"
+          builder << "      <url>#{link[:url]}</url>"
+          builder << "      <description>#{link[:description]}</description>"
+          builder << '    </link>'
+        end
+      else
+        builder << "    #{links}"
+      end
+      builder << "  </#{name}>"
+    end
+  end
+end