RubyGems - llm-docs-builder - Versions diffs - 0.6.0 → 0.8.0 - Mend

llm-docs-builder 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +4 -4
data/.rspec +3 -0
data/CHANGELOG.md +59 -0
data/Gemfile.lock +1 -1
data/README.md +241 -541
data/bin/rspecs +2 -1
data/lib/llm_docs_builder/cli.rb +1 -62
data/lib/llm_docs_builder/comparator.rb +4 -16
data/lib/llm_docs_builder/config.rb +74 -5
data/lib/llm_docs_builder/generator.rb +67 -8
data/lib/llm_docs_builder/markdown_transformer.rb +61 -126
data/lib/llm_docs_builder/output_formatter.rb +93 -0
data/lib/llm_docs_builder/parser.rb +1 -59
data/lib/llm_docs_builder/text_compressor.rb +164 -0
data/lib/llm_docs_builder/token_estimator.rb +52 -0
data/lib/llm_docs_builder/transformers/base_transformer.rb +30 -0
data/lib/llm_docs_builder/transformers/content_cleanup_transformer.rb +106 -0
data/lib/llm_docs_builder/transformers/enhancement_transformer.rb +95 -0
data/lib/llm_docs_builder/transformers/heading_transformer.rb +72 -0
data/lib/llm_docs_builder/transformers/link_transformer.rb +84 -0
data/lib/llm_docs_builder/transformers/whitespace_transformer.rb +44 -0
data/lib/llm_docs_builder/version.rb +1 -1
metadata +11 -3
data/CLAUDE.md +0 -178
data/llm-docs-builder.yml +0 -7

data/bin/rspecs CHANGED Viewed

@@ -4,4 +4,5 @@
 set -e
 echo "Running all tests..."
-bundle exec rspec --format documentation
+# Explicitly specify the spec directory to ensure all tests are discovered
+bundle exec rspec spec/ --format documentation

data/lib/llm_docs_builder/cli.rb CHANGED Viewed

@@ -295,8 +295,6 @@ module LlmDocsBuilder
         puts "Documentation Links: #{parsed.documentation_links.size}"
         puts "Example Links: #{parsed.example_links.size}" if parsed.respond_to?(:example_links)
         puts "Optional Links: #{parsed.optional_links.size}" if parsed.respond_to?(:optional_links)
-      elsif parsed.respond_to?(:to_xml)
-        puts parsed.to_xml
       end
     end
@@ -335,72 +333,13 @@ module LlmDocsBuilder
       begin
         result = comparator.compare
-        display_comparison_results(result)
+        OutputFormatter.display_comparison_results(result)
       rescue LlmDocsBuilder::Errors::BaseError => e
         puts "Error during comparison: #{e.message}"
         exit 1
       end
     end
-    # Display formatted comparison results
-    #
-    # @param result [Hash] comparison results from Comparator
-    def display_comparison_results(result)
-      puts ''
-      puts '=' * 60
-      puts 'Context Window Comparison'
-      puts '=' * 60
-      puts ''
-      puts "Human version:  #{format_bytes(result[:human_size])} (~#{format_number(result[:human_tokens])} tokens)"
-      puts "  Source: #{result[:human_source]}"
-      puts ''
-      puts "AI version:     #{format_bytes(result[:ai_size])} (~#{format_number(result[:ai_tokens])} tokens)"
-      puts "  Source: #{result[:ai_source]}"
-      puts ''
-      puts '-' * 60
-      if result[:reduction_bytes].positive?
-        puts "Reduction:      #{format_bytes(result[:reduction_bytes])} (#{result[:reduction_percent]}%)"
-        puts "Token savings:  #{format_number(result[:token_reduction])} tokens (#{result[:token_reduction_percent]}%)"
-        puts "Factor:         #{result[:factor]}x smaller"
-      elsif result[:reduction_bytes].negative?
-        increase_bytes = result[:reduction_bytes].abs
-        increase_percent = result[:reduction_percent].abs
-        token_increase = result[:token_reduction].abs
-        token_increase_percent = result[:token_reduction_percent].abs
-        puts "Increase:       #{format_bytes(increase_bytes)} (#{increase_percent}%)"
-        puts "Token increase: #{format_number(token_increase)} tokens (#{token_increase_percent}%)"
-        puts "Factor:         #{result[:factor]}x larger"
-      else
-        puts 'Same size'
-      end
-      puts '=' * 60
-      puts ''
-    end
-    # Format bytes into human-readable string
-    #
-    # @param bytes [Integer] number of bytes
-    # @return [String] formatted string with units
-    def format_bytes(bytes)
-      if bytes < 1024
-        "#{bytes} bytes"
-      elsif bytes < 1024 * 1024
-        "#{(bytes / 1024.0).round(1)} KB"
-      else
-        "#{(bytes / (1024.0 * 1024)).round(2)} MB"
-      end
-    end
-    # Format number with comma separators for readability
-    #
-    # @param number [Integer] number to format
-    # @return [String] formatted number with commas
-    def format_number(number)
-      number.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
-    end
     # Validate llms.txt file format
     #
     # Checks if llms.txt file follows proper format with title, description, and documentation links.

data/lib/llm_docs_builder/comparator.rb CHANGED Viewed

@@ -231,9 +231,10 @@ module LlmDocsBuilder
                  Float::INFINITY
                end
-      # Estimate tokens
-      human_tokens = estimate_tokens(human_content)
-      ai_tokens = estimate_tokens(ai_content)
+      # Estimate tokens using TokenEstimator
+      estimator = TokenEstimator.new
+      human_tokens = estimator.estimate(human_content)
+      ai_tokens = estimator.estimate(ai_content)
       token_reduction = human_tokens - ai_tokens
       token_reduction_percent = if human_tokens.positive?
                                   ((token_reduction.to_f / human_tokens) * 100).round
@@ -256,18 +257,5 @@ module LlmDocsBuilder
       }
     end
-    # Estimate token count using character-based approximation
-    #
-    # Uses the common heuristic that ~4 characters equals 1 token for English text.
-    # This provides reasonable estimates for documentation content without requiring
-    # external tokenizer dependencies.
-    #
-    # @param content [String] text content to estimate tokens for
-    # @return [Integer] estimated number of tokens
-    def estimate_tokens(content)
-      # Use 4 characters per token as a reasonable approximation
-      # This is a common heuristic for English text and works well for documentation
-      (content.length / 4.0).round
-    end
   end
 end

data/lib/llm_docs_builder/config.rb CHANGED Viewed

@@ -70,28 +70,97 @@ module LlmDocsBuilder
         remove_comments: if options.key?(:remove_comments)
                            options[:remove_comments]
                          else
-                           self['remove_comments'] || false
+                           self['remove_comments'] || true
                          end,
         normalize_whitespace: if options.key?(:normalize_whitespace)
                                 options[:normalize_whitespace]
                               else
-                                self['normalize_whitespace'] || false
+                                self['normalize_whitespace'] || true
                               end,
         remove_badges: if options.key?(:remove_badges)
                          options[:remove_badges]
                        else
-                         self['remove_badges'] || false
+                         self['remove_badges'] || true
                        end,
         remove_frontmatter: if options.key?(:remove_frontmatter)
                               options[:remove_frontmatter]
                             else
-                              self['remove_frontmatter'] || false
+                              self['remove_frontmatter'] || true
                             end,
         verbose: options.key?(:verbose) ? options[:verbose] : (self['verbose'] || false),
         # Bulk transformation options
         suffix: options[:suffix] || self['suffix'] || '.llm',
         excludes: options[:excludes] || self['excludes'] || [],
-        bulk: options.key?(:bulk) ? options[:bulk] : (self['bulk'] || false)
+        bulk: options.key?(:bulk) ? options[:bulk] : (self['bulk'] || false),
+        # New compression options
+        remove_code_examples: if options.key?(:remove_code_examples)
+                                options[:remove_code_examples]
+                              else
+                                self['remove_code_examples'] || false
+                              end,
+        remove_images: if options.key?(:remove_images)
+                         options[:remove_images]
+                       else
+                         self['remove_images'] || false
+                       end,
+        simplify_links: if options.key?(:simplify_links)
+                          options[:simplify_links]
+                        else
+                          self['simplify_links'] || false
+                        end,
+        remove_blockquotes: if options.key?(:remove_blockquotes)
+                              options[:remove_blockquotes]
+                            else
+                              self['remove_blockquotes'] || false
+                            end,
+        generate_toc: if options.key?(:generate_toc)
+                        options[:generate_toc]
+                      else
+                        self['generate_toc'] || false
+                      end,
+        custom_instruction: options[:custom_instruction] || self['custom_instruction'],
+        remove_stopwords: if options.key?(:remove_stopwords)
+                            options[:remove_stopwords]
+                          else
+                            self['remove_stopwords'] || false
+                          end,
+        remove_duplicates: if options.key?(:remove_duplicates)
+                             options[:remove_duplicates]
+                           else
+                             self['remove_duplicates'] || false
+                           end,
+        # New RAG enhancement options
+        normalize_headings: if options.key?(:normalize_headings)
+                              options[:normalize_headings]
+                            else
+                              self['normalize_headings'] || false
+                            end,
+        heading_separator: options[:heading_separator] || self['heading_separator'] || ' / ',
+        include_metadata: if options.key?(:include_metadata)
+                            options[:include_metadata]
+                          else
+                            self['include_metadata'] || false
+                          end,
+        include_tokens: if options.key?(:include_tokens)
+                          options[:include_tokens]
+                        else
+                          self['include_tokens'] || false
+                        end,
+        include_timestamps: if options.key?(:include_timestamps)
+                              options[:include_timestamps]
+                            else
+                              self['include_timestamps'] || false
+                            end,
+        include_priority: if options.key?(:include_priority)
+                            options[:include_priority]
+                          else
+                            self['include_priority'] || false
+                          end,
+        calculate_compression: if options.key?(:calculate_compression)
+                                 options[:calculate_compression]
+                               else
+                                 self['calculate_compression'] || false
+                               end
       }
     end

data/lib/llm_docs_builder/generator.rb CHANGED Viewed

@@ -88,10 +88,10 @@ module LlmDocsBuilder
     # Extracts metadata from a documentation file
     #
-    # Analyzes file content to extract title, description, and priority
+    # Analyzes file content to extract title, description, priority, and optional metadata
     #
     # @param file_path [String] path to file to analyze
-    # @return [Hash] file metadata with :path, :title, :description, :priority
+    # @return [Hash] file metadata with :path, :title, :description, :priority, :tokens, :updated
     def analyze_file(file_path)
       # Handle single file case differently
       relative_path = if File.file?(docs_path)
@@ -102,12 +102,28 @@ module LlmDocsBuilder
       content = File.read(file_path)
-      {
+      metadata = {
         path: relative_path,
         title: extract_title(content, file_path),
         description: extract_description(content),
         priority: calculate_priority(file_path)
       }
+      # Add optional enhanced metadata
+      if options[:include_metadata]
+        metadata[:tokens] = TokenEstimator.estimate(content) if options[:include_tokens]
+        metadata[:updated] = File.mtime(file_path).strftime('%Y-%m-%d') if options[:include_timestamps]
+        # Calculate compression ratio if transformation is enabled
+        if options[:calculate_compression]
+          transformed = apply_transformations(content, file_path)
+          original_tokens = TokenEstimator.estimate(content)
+          transformed_tokens = TokenEstimator.estimate(transformed)
+          metadata[:compression] = (transformed_tokens.to_f / original_tokens).round(2)
+        end
+      end
+      metadata
     end
     # Extracts title from file content or generates from filename
@@ -164,6 +180,21 @@ module LlmDocsBuilder
       7 # default priority
     end
+    # Applies transformations to content for compression ratio calculation
+    #
+    # @param content [String] original content
+    # @param file_path [String] path to file
+    # @return [String] transformed content
+    def apply_transformations(content, file_path)
+      transformer = MarkdownTransformer.new(file_path, options)
+      # Read file again through transformer to get transformed version
+      transformer.transform
+    rescue StandardError
+      # If transformation fails, return original content
+      content
+    end
     # Constructs llms.txt content from analyzed documentation files
     #
     # Combines title, description, and documentation links into formatted output
@@ -186,11 +217,24 @@ module LlmDocsBuilder
         docs.each do |doc|
           url = build_url(doc[:path])
-          content << if doc[:description] && !doc[:description].empty?
-                       "- [#{doc[:title]}](#{url}): #{doc[:description]}"
-                     else
-                       "- [#{doc[:title]}](#{url})"
-                     end
+          line = if doc[:description] && !doc[:description].empty?
+                   "- [#{doc[:title]}](#{url}): #{doc[:description]}"
+                 else
+                   "- [#{doc[:title]}](#{url})"
+                 end
+          # Append metadata if enabled
+          if options[:include_metadata]
+            metadata_parts = []
+            metadata_parts << "tokens:#{doc[:tokens]}" if doc[:tokens]
+            metadata_parts << "compression:#{doc[:compression]}" if doc[:compression]
+            metadata_parts << "updated:#{doc[:updated]}" if doc[:updated]
+            metadata_parts << priority_label(doc[:priority]) if options[:include_priority]
+            line += " #{metadata_parts.join(' ')}" unless metadata_parts.empty?
+          end
+          content << line
         end
       end
@@ -230,5 +274,20 @@ module LlmDocsBuilder
         path
       end
     end
+    # Converts numeric priority to human-readable label
+    #
+    # @param priority [Integer] priority value (1-7)
+    # @return [String] priority label (high, medium, low)
+    def priority_label(priority)
+      case priority
+      when 1..2
+        'priority:high'
+      when 3..5
+        'priority:medium'
+      when 6..7
+        'priority:low'
+      end
+    end
   end
 end

data/lib/llm_docs_builder/markdown_transformer.rb CHANGED Viewed

@@ -3,9 +3,8 @@
 module LlmDocsBuilder
   # Transforms markdown files to be AI-friendly
   #
-  # Processes individual markdown files to make them more suitable for LLM consumption by
-  # expanding relative links to absolute URLs and converting HTML URLs to markdown-friendly
-  # formats.
+  # Orchestrates a pipeline of specialized transformers to process markdown content.
+  # Each transformer is responsible for a specific aspect of the transformation.
   #
   # @example Transform with base URL
   #   transformer = LlmDocsBuilder::MarkdownTransformer.new('README.md',
@@ -31,163 +30,99 @@ module LlmDocsBuilder
     # @option options [Boolean] :normalize_whitespace normalize excessive whitespace
     # @option options [Boolean] :remove_badges remove badge/shield images
     # @option options [Boolean] :remove_frontmatter remove YAML/TOML frontmatter
+    # @option options [Boolean] :remove_code_examples remove code blocks and inline code
+    # @option options [Boolean] :remove_images remove image syntax
+    # @option options [Boolean] :simplify_links shorten verbose link text
+    # @option options [Boolean] :remove_blockquotes remove blockquote formatting
+    # @option options [Boolean] :generate_toc generate table of contents at the top
+    # @option options [String] :custom_instruction custom instruction text to inject at top
+    # @option options [Boolean] :remove_stopwords remove common stopwords (aggressive)
+    # @option options [Boolean] :remove_duplicates remove duplicate paragraphs
     def initialize(file_path, options = {})
       @file_path = file_path
       @options = options
     end
-    # Transform markdown content to be AI-friendly
+    # Transform markdown content using a pipeline of transformers
     #
-    # Applies transformations to make the markdown more suitable for LLM processing:
-    # - Removes YAML/TOML frontmatter (if remove_frontmatter enabled)
-    # - Expands relative links to absolute URLs (if base_url provided)
-    # - Converts HTML URLs to markdown format (if convert_urls enabled)
-    # - Removes HTML comments (if remove_comments enabled)
-    # - Removes badge/shield images (if remove_badges enabled)
-    # - Normalizes excessive whitespace (if normalize_whitespace enabled)
+    # Processes content through specialized transformers in order:
+    # 1. ContentCleanupTransformer - Removes unwanted elements
+    # 2. LinkTransformer - Processes links
+    # 3. HeadingTransformer - Normalizes heading hierarchy (if enabled)
+    # 4. TextCompressor - Advanced compression (if enabled)
+    # 5. EnhancementTransformer - Adds TOC and instructions
+    # 6. WhitespaceTransformer - Normalizes whitespace
     #
     # @return [String] transformed markdown content
     def transform
       content = File.read(file_path)
-      # Remove frontmatter first (before any other processing)
-      content = remove_frontmatter(content) if options[:remove_frontmatter]
-      # Link transformations
-      content = expand_relative_links(content) if options[:base_url]
-      content = convert_html_urls(content) if options[:convert_urls]
-      # Content cleanup
-      content = remove_comments(content) if options[:remove_comments]
-      content = remove_badges(content) if options[:remove_badges]
-      # Whitespace normalization last (after all other transformations)
-      content = normalize_whitespace(content) if options[:normalize_whitespace]
+      # Build and execute transformation pipeline
+      content = cleanup_transformer.transform(content, options)
+      content = link_transformer.transform(content, options)
+      content = heading_transformer.transform(content, options)
+      content = compress_content(content) if should_compress?
+      content = enhancement_transformer.transform(content, options)
+      content = whitespace_transformer.transform(content, options)
       content
     end
     private
-    # Expand relative links to absolute URLs
+    # Get content cleanup transformer instance
     #
-    # Converts markdown links like `[text](./path.md)` to `[text](https://base.url/path.md)`.
-    # Leaves absolute URLs and anchors unchanged.
-    #
-    # @param content [String] markdown content to process
-    # @return [String] content with expanded links
-    def expand_relative_links(content)
-      base_url = options[:base_url]
-      content.gsub(/\[([^\]]+)\]\(([^)]+)\)/) do |match|
-        text = ::Regexp.last_match(1)
-        url = ::Regexp.last_match(2)
-        if url.start_with?('http://', 'https://', '//', '#')
-          match # Already absolute or anchor
-        else
-          # Clean up relative path
-          clean_url = url.gsub(%r{^\./}, '') # Remove leading './'
-          expanded_url = File.join(base_url, clean_url)
-          "[#{text}](#{expanded_url})"
-        end
-      end
+    # @return [Transformers::ContentCleanupTransformer]
+    def cleanup_transformer
+      @cleanup_transformer ||= Transformers::ContentCleanupTransformer.new
     end
-    # Convert HTML URLs to markdown-friendly format
+    # Get link transformer instance
     #
-    # Changes URLs ending in .html or .htm to .md for better LLM understanding
-    #
-    # @param content [String] markdown content to process
-    # @return [String] content with converted URLs
-    def convert_html_urls(content)
-      content.gsub(%r{https?://[^\s<>]+\.html?(?=[)\s]|$)}) do |url|
-        url.sub(/\.html?$/, '.md')
-      end
+    # @return [Transformers::LinkTransformer]
+    def link_transformer
+      @link_transformer ||= Transformers::LinkTransformer.new
     end
-    # Remove HTML comments from markdown content
-    #
-    # Strips out HTML comments (<!-- ... -->) which are typically metadata for developers
-    # and not relevant for LLM consumption. This reduces token usage and improves clarity.
-    #
-    # Handles:
-    # - Single-line comments: <!-- comment -->
-    # - Multi-line comments spanning multiple lines
-    # - Multiple comments in the same content
+    # Get heading transformer instance
     #
-    # @param content [String] markdown content to process
-    # @return [String] content with comments removed
-    def remove_comments(content)
-      # Remove HTML comments (single and multi-line)
-      # The .*? makes it non-greedy so it stops at the first -->
-      content.gsub(/<!--.*?-->/m, '')
+    # @return [Transformers::HeadingTransformer]
+    def heading_transformer
+      @heading_transformer ||= Transformers::HeadingTransformer.new
     end
-    # Remove badge and shield images from markdown
+    # Get enhancement transformer instance
     #
-    # Strips out badge/shield images (typically from shields.io, badge.fury.io, etc.)
-    # which are visual indicators for humans but provide no value to LLMs.
-    #
-    # Recognizes common patterns:
-    # - [![Badge](badge.svg)](link) (linked badges)
-    # - ![Badge](badge.svg) (unlinked badges)
-    # - Common badge domains: shields.io, badge.fury.io, travis-ci.org, etc.
-    #
-    # @param content [String] markdown content to process
-    # @return [String] content with badges removed
-    def remove_badges(content)
-      # Remove linked badges: [![...](badge-url)](link-url)
-      content = content.gsub(/\[\!\[([^\]]*)\]\([^\)]*(?:badge|shield|svg|travis|coveralls|fury)[^\)]*\)\]\([^\)]*\)/i, '')
-      # Remove standalone badges: ![...](badge-url)
-      content = content.gsub(/!\[([^\]]*)\]\([^\)]*(?:badge|shield|svg|travis|coveralls|fury)[^\)]*\)/i, '')
-      content
+    # @return [Transformers::EnhancementTransformer]
+    def enhancement_transformer
+      @enhancement_transformer ||= Transformers::EnhancementTransformer.new
     end
-    # Remove YAML or TOML frontmatter from markdown
-    #
-    # Strips out frontmatter blocks which are metadata used by static site generators
-    # (Jekyll, Hugo, etc.) but not relevant for LLM consumption.
+    # Get whitespace transformer instance
     #
-    # Recognizes:
-    # - YAML frontmatter: --- ... ---
-    # - TOML frontmatter: +++ ... +++
-    #
-    # @param content [String] markdown content to process
-    # @return [String] content with frontmatter removed
-    def remove_frontmatter(content)
-      # Remove YAML frontmatter (--- ... ---)
-      content = content.sub(/\A---\s*$.*?^---\s*$/m, '')
-      # Remove TOML frontmatter (+++ ... +++)
-      content = content.sub(/\A\+\+\+\s*$.*?^\+\+\+\s*$/m, '')
-      content
+    # @return [Transformers::WhitespaceTransformer]
+    def whitespace_transformer
+      @whitespace_transformer ||= Transformers::WhitespaceTransformer.new
     end
-    # Normalize excessive whitespace in markdown
-    #
-    # Reduces excessive blank lines and trailing whitespace to make content more compact
-    # for LLM consumption without affecting readability.
-    #
-    # Transformations:
-    # - Multiple consecutive blank lines (3+) → 2 blank lines max
-    # - Trailing whitespace on lines → removed
-    # - Leading/trailing whitespace in file → trimmed
+    # Check if content compression should be applied
     #
-    # @param content [String] markdown content to process
-    # @return [String] content with normalized whitespace
-    def normalize_whitespace(content)
-      # Remove trailing whitespace from each line
-      content = content.gsub(/ +$/, '')
-      # Reduce multiple consecutive blank lines to maximum of 2
-      content = content.gsub(/\n{4,}/, "\n\n\n")
+    # @return [Boolean]
+    def should_compress?
+      options[:remove_stopwords] || options[:remove_duplicates]
+    end
-      # Trim leading and trailing whitespace from the entire content
-      content.strip
+    # Compress content using TextCompressor
+    #
+    # @param content [String] content to compress
+    # @return [String] compressed content
+    def compress_content(content)
+      compressor = TextCompressor.new
+      compression_methods = {
+        remove_stopwords: options[:remove_stopwords],
+        remove_duplicates: options[:remove_duplicates]
+      }
+      compressor.compress(content, compression_methods)
     end
   end
 end

data/lib/llm_docs_builder/output_formatter.rb ADDED Viewed

@@ -0,0 +1,93 @@
+# frozen_string_literal: true
+module LlmDocsBuilder
+  # Formats output for CLI display
+  #
+  # Provides formatting utilities for displaying comparison results,
+  # byte sizes, and numbers in a user-friendly way.
+  #
+  # @api private
+  class OutputFormatter
+    # Format bytes into human-readable string
+    #
+    # @param bytes [Integer] number of bytes
+    # @return [String] formatted string with units (bytes/KB/MB)
+    #
+    # @example
+    #   OutputFormatter.format_bytes(1024)      #=> "1.0 KB"
+    #   OutputFormatter.format_bytes(1048576)   #=> "1.0 MB"
+    def self.format_bytes(bytes)
+      if bytes < 1024
+        "#{bytes} bytes"
+      elsif bytes < 1024 * 1024
+        "#{(bytes / 1024.0).round(1)} KB"
+      else
+        "#{(bytes / (1024.0 * 1024)).round(2)} MB"
+      end
+    end
+    # Format number with comma separators for readability
+    #
+    # @param number [Integer] number to format
+    # @return [String] formatted number with commas
+    #
+    # @example
+    #   OutputFormatter.format_number(1234567)  #=> "1,234,567"
+    def self.format_number(number)
+      number.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
+    end
+    # Display formatted comparison results
+    #
+    # @param result [Hash] comparison results from Comparator
+    def self.display_comparison_results(result)
+      puts ''
+      puts '=' * 60
+      puts 'Context Window Comparison'
+      puts '=' * 60
+      puts ''
+      puts "Human version:  #{format_bytes(result[:human_size])} (~#{format_number(result[:human_tokens])} tokens)"
+      puts "  Source: #{result[:human_source]}"
+      puts ''
+      puts "AI version:     #{format_bytes(result[:ai_size])} (~#{format_number(result[:ai_tokens])} tokens)"
+      puts "  Source: #{result[:ai_source]}"
+      puts ''
+      puts '-' * 60
+      if result[:reduction_bytes].positive?
+        display_reduction(result)
+      elsif result[:reduction_bytes].negative?
+        display_increase(result)
+      else
+        puts 'Same size'
+      end
+      puts '=' * 60
+      puts ''
+    end
+    # Display reduction statistics
+    #
+    # @param result [Hash] comparison results
+    # @api private
+    def self.display_reduction(result)
+      puts "Reduction:      #{format_bytes(result[:reduction_bytes])} (#{result[:reduction_percent]}%)"
+      puts "Token savings:  #{format_number(result[:token_reduction])} tokens (#{result[:token_reduction_percent]}%)"
+      puts "Factor:         #{result[:factor]}x smaller"
+    end
+    # Display increase statistics
+    #
+    # @param result [Hash] comparison results
+    # @api private
+    def self.display_increase(result)
+      increase_bytes = result[:reduction_bytes].abs
+      increase_percent = result[:reduction_percent].abs
+      token_increase = result[:token_reduction].abs
+      token_increase_percent = result[:token_reduction_percent].abs
+      puts "Increase:       #{format_bytes(increase_bytes)} (#{increase_percent}%)"
+      puts "Token increase: #{format_number(token_increase)} tokens (#{token_increase_percent}%)"
+      puts "Factor:         #{result[:factor]}x larger"
+    end
+  end
+end