llm-docs-builder 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/rspecs CHANGED
@@ -4,4 +4,5 @@
4
4
  set -e
5
5
 
6
6
  echo "Running all tests..."
7
- bundle exec rspec --format documentation
7
+ # Explicitly specify the spec directory to ensure all tests are discovered
8
+ bundle exec rspec spec/ --format documentation
@@ -295,8 +295,6 @@ module LlmDocsBuilder
295
295
  puts "Documentation Links: #{parsed.documentation_links.size}"
296
296
  puts "Example Links: #{parsed.example_links.size}" if parsed.respond_to?(:example_links)
297
297
  puts "Optional Links: #{parsed.optional_links.size}" if parsed.respond_to?(:optional_links)
298
- elsif parsed.respond_to?(:to_xml)
299
- puts parsed.to_xml
300
298
  end
301
299
  end
302
300
 
@@ -335,72 +333,13 @@ module LlmDocsBuilder
335
333
 
336
334
  begin
337
335
  result = comparator.compare
338
- display_comparison_results(result)
336
+ OutputFormatter.display_comparison_results(result)
339
337
  rescue LlmDocsBuilder::Errors::BaseError => e
340
338
  puts "Error during comparison: #{e.message}"
341
339
  exit 1
342
340
  end
343
341
  end
344
342
 
345
- # Display formatted comparison results
346
- #
347
- # @param result [Hash] comparison results from Comparator
348
- def display_comparison_results(result)
349
- puts ''
350
- puts '=' * 60
351
- puts 'Context Window Comparison'
352
- puts '=' * 60
353
- puts ''
354
- puts "Human version: #{format_bytes(result[:human_size])} (~#{format_number(result[:human_tokens])} tokens)"
355
- puts " Source: #{result[:human_source]}"
356
- puts ''
357
- puts "AI version: #{format_bytes(result[:ai_size])} (~#{format_number(result[:ai_tokens])} tokens)"
358
- puts " Source: #{result[:ai_source]}"
359
- puts ''
360
- puts '-' * 60
361
-
362
- if result[:reduction_bytes].positive?
363
- puts "Reduction: #{format_bytes(result[:reduction_bytes])} (#{result[:reduction_percent]}%)"
364
- puts "Token savings: #{format_number(result[:token_reduction])} tokens (#{result[:token_reduction_percent]}%)"
365
- puts "Factor: #{result[:factor]}x smaller"
366
- elsif result[:reduction_bytes].negative?
367
- increase_bytes = result[:reduction_bytes].abs
368
- increase_percent = result[:reduction_percent].abs
369
- token_increase = result[:token_reduction].abs
370
- token_increase_percent = result[:token_reduction_percent].abs
371
- puts "Increase: #{format_bytes(increase_bytes)} (#{increase_percent}%)"
372
- puts "Token increase: #{format_number(token_increase)} tokens (#{token_increase_percent}%)"
373
- puts "Factor: #{result[:factor]}x larger"
374
- else
375
- puts 'Same size'
376
- end
377
-
378
- puts '=' * 60
379
- puts ''
380
- end
381
-
382
- # Format bytes into human-readable string
383
- #
384
- # @param bytes [Integer] number of bytes
385
- # @return [String] formatted string with units
386
- def format_bytes(bytes)
387
- if bytes < 1024
388
- "#{bytes} bytes"
389
- elsif bytes < 1024 * 1024
390
- "#{(bytes / 1024.0).round(1)} KB"
391
- else
392
- "#{(bytes / (1024.0 * 1024)).round(2)} MB"
393
- end
394
- end
395
-
396
- # Format number with comma separators for readability
397
- #
398
- # @param number [Integer] number to format
399
- # @return [String] formatted number with commas
400
- def format_number(number)
401
- number.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
402
- end
403
-
404
343
  # Validate llms.txt file format
405
344
  #
406
345
  # Checks if llms.txt file follows proper format with title, description, and documentation links.
@@ -231,9 +231,10 @@ module LlmDocsBuilder
231
231
  Float::INFINITY
232
232
  end
233
233
 
234
- # Estimate tokens
235
- human_tokens = estimate_tokens(human_content)
236
- ai_tokens = estimate_tokens(ai_content)
234
+ # Estimate tokens using TokenEstimator
235
+ estimator = TokenEstimator.new
236
+ human_tokens = estimator.estimate(human_content)
237
+ ai_tokens = estimator.estimate(ai_content)
237
238
  token_reduction = human_tokens - ai_tokens
238
239
  token_reduction_percent = if human_tokens.positive?
239
240
  ((token_reduction.to_f / human_tokens) * 100).round
@@ -256,18 +257,5 @@ module LlmDocsBuilder
256
257
  }
257
258
  end
258
259
 
259
- # Estimate token count using character-based approximation
260
- #
261
- # Uses the common heuristic that ~4 characters equals 1 token for English text.
262
- # This provides reasonable estimates for documentation content without requiring
263
- # external tokenizer dependencies.
264
- #
265
- # @param content [String] text content to estimate tokens for
266
- # @return [Integer] estimated number of tokens
267
- def estimate_tokens(content)
268
- # Use 4 characters per token as a reasonable approximation
269
- # This is a common heuristic for English text and works well for documentation
270
- (content.length / 4.0).round
271
- end
272
260
  end
273
261
  end
@@ -70,28 +70,97 @@ module LlmDocsBuilder
70
70
  remove_comments: if options.key?(:remove_comments)
71
71
  options[:remove_comments]
72
72
  else
73
- self['remove_comments'] || false
73
+ self['remove_comments'] || true
74
74
  end,
75
75
  normalize_whitespace: if options.key?(:normalize_whitespace)
76
76
  options[:normalize_whitespace]
77
77
  else
78
- self['normalize_whitespace'] || false
78
+ self['normalize_whitespace'] || true
79
79
  end,
80
80
  remove_badges: if options.key?(:remove_badges)
81
81
  options[:remove_badges]
82
82
  else
83
- self['remove_badges'] || false
83
+ self['remove_badges'] || true
84
84
  end,
85
85
  remove_frontmatter: if options.key?(:remove_frontmatter)
86
86
  options[:remove_frontmatter]
87
87
  else
88
- self['remove_frontmatter'] || false
88
+ self['remove_frontmatter'] || true
89
89
  end,
90
90
  verbose: options.key?(:verbose) ? options[:verbose] : (self['verbose'] || false),
91
91
  # Bulk transformation options
92
92
  suffix: options[:suffix] || self['suffix'] || '.llm',
93
93
  excludes: options[:excludes] || self['excludes'] || [],
94
- bulk: options.key?(:bulk) ? options[:bulk] : (self['bulk'] || false)
94
+ bulk: options.key?(:bulk) ? options[:bulk] : (self['bulk'] || false),
95
+ # New compression options
96
+ remove_code_examples: if options.key?(:remove_code_examples)
97
+ options[:remove_code_examples]
98
+ else
99
+ self['remove_code_examples'] || false
100
+ end,
101
+ remove_images: if options.key?(:remove_images)
102
+ options[:remove_images]
103
+ else
104
+ self['remove_images'] || false
105
+ end,
106
+ simplify_links: if options.key?(:simplify_links)
107
+ options[:simplify_links]
108
+ else
109
+ self['simplify_links'] || false
110
+ end,
111
+ remove_blockquotes: if options.key?(:remove_blockquotes)
112
+ options[:remove_blockquotes]
113
+ else
114
+ self['remove_blockquotes'] || false
115
+ end,
116
+ generate_toc: if options.key?(:generate_toc)
117
+ options[:generate_toc]
118
+ else
119
+ self['generate_toc'] || false
120
+ end,
121
+ custom_instruction: options[:custom_instruction] || self['custom_instruction'],
122
+ remove_stopwords: if options.key?(:remove_stopwords)
123
+ options[:remove_stopwords]
124
+ else
125
+ self['remove_stopwords'] || false
126
+ end,
127
+ remove_duplicates: if options.key?(:remove_duplicates)
128
+ options[:remove_duplicates]
129
+ else
130
+ self['remove_duplicates'] || false
131
+ end,
132
+ # New RAG enhancement options
133
+ normalize_headings: if options.key?(:normalize_headings)
134
+ options[:normalize_headings]
135
+ else
136
+ self['normalize_headings'] || false
137
+ end,
138
+ heading_separator: options[:heading_separator] || self['heading_separator'] || ' / ',
139
+ include_metadata: if options.key?(:include_metadata)
140
+ options[:include_metadata]
141
+ else
142
+ self['include_metadata'] || false
143
+ end,
144
+ include_tokens: if options.key?(:include_tokens)
145
+ options[:include_tokens]
146
+ else
147
+ self['include_tokens'] || false
148
+ end,
149
+ include_timestamps: if options.key?(:include_timestamps)
150
+ options[:include_timestamps]
151
+ else
152
+ self['include_timestamps'] || false
153
+ end,
154
+ include_priority: if options.key?(:include_priority)
155
+ options[:include_priority]
156
+ else
157
+ self['include_priority'] || false
158
+ end,
159
+ calculate_compression: if options.key?(:calculate_compression)
160
+ options[:calculate_compression]
161
+ else
162
+ self['calculate_compression'] || false
163
+ end
95
164
  }
96
165
  end
97
166
 
@@ -88,10 +88,10 @@ module LlmDocsBuilder
88
88
 
89
89
  # Extracts metadata from a documentation file
90
90
  #
91
- # Analyzes file content to extract title, description, and priority
91
+ # Analyzes file content to extract title, description, priority, and optional metadata
92
92
  #
93
93
  # @param file_path [String] path to file to analyze
94
- # @return [Hash] file metadata with :path, :title, :description, :priority
94
+ # @return [Hash] file metadata with :path, :title, :description, :priority, :tokens, :updated
95
95
  def analyze_file(file_path)
96
96
  # Handle single file case differently
97
97
  relative_path = if File.file?(docs_path)
@@ -102,12 +102,28 @@ module LlmDocsBuilder
102
102
 
103
103
  content = File.read(file_path)
104
104
 
105
- {
105
+ metadata = {
106
106
  path: relative_path,
107
107
  title: extract_title(content, file_path),
108
108
  description: extract_description(content),
109
109
  priority: calculate_priority(file_path)
110
110
  }
111
+
112
+ # Add optional enhanced metadata
113
+ if options[:include_metadata]
114
+ metadata[:tokens] = TokenEstimator.estimate(content) if options[:include_tokens]
115
+ metadata[:updated] = File.mtime(file_path).strftime('%Y-%m-%d') if options[:include_timestamps]
116
+
117
+ # Calculate compression ratio if transformation is enabled
118
+ if options[:calculate_compression]
119
+ transformed = apply_transformations(content, file_path)
120
+ original_tokens = TokenEstimator.estimate(content)
121
+ transformed_tokens = TokenEstimator.estimate(transformed)
122
+ metadata[:compression] = (transformed_tokens.to_f / original_tokens).round(2)
123
+ end
124
+ end
125
+
126
+ metadata
111
127
  end
112
128
 
113
129
  # Extracts title from file content or generates from filename
@@ -164,6 +180,21 @@ module LlmDocsBuilder
164
180
  7 # default priority
165
181
  end
166
182
 
183
+ # Applies transformations to content for compression ratio calculation
184
+ #
185
+ # @param content [String] original content
186
+ # @param file_path [String] path to file
187
+ # @return [String] transformed content
188
+ def apply_transformations(content, file_path)
189
+ transformer = MarkdownTransformer.new(file_path, options)
190
+
191
+ # Read file again through transformer to get transformed version
192
+ transformer.transform
193
+ rescue StandardError
194
+ # If transformation fails, return original content
195
+ content
196
+ end
197
+
167
198
  # Constructs llms.txt content from analyzed documentation files
168
199
  #
169
200
  # Combines title, description, and documentation links into formatted output
@@ -186,11 +217,24 @@ module LlmDocsBuilder
186
217
 
187
218
  docs.each do |doc|
188
219
  url = build_url(doc[:path])
189
- content << if doc[:description] && !doc[:description].empty?
190
- "- [#{doc[:title]}](#{url}): #{doc[:description]}"
191
- else
192
- "- [#{doc[:title]}](#{url})"
193
- end
220
+ line = if doc[:description] && !doc[:description].empty?
221
+ "- [#{doc[:title]}](#{url}): #{doc[:description]}"
222
+ else
223
+ "- [#{doc[:title]}](#{url})"
224
+ end
225
+
226
+ # Append metadata if enabled
227
+ if options[:include_metadata]
228
+ metadata_parts = []
229
+ metadata_parts << "tokens:#{doc[:tokens]}" if doc[:tokens]
230
+ metadata_parts << "compression:#{doc[:compression]}" if doc[:compression]
231
+ metadata_parts << "updated:#{doc[:updated]}" if doc[:updated]
232
+ metadata_parts << priority_label(doc[:priority]) if options[:include_priority]
233
+
234
+ line += " #{metadata_parts.join(' ')}" unless metadata_parts.empty?
235
+ end
236
+
237
+ content << line
194
238
  end
195
239
  end
196
240
 
@@ -230,5 +274,20 @@ module LlmDocsBuilder
230
274
  path
231
275
  end
232
276
  end
277
+
278
+ # Converts numeric priority to human-readable label
279
+ #
280
+ # @param priority [Integer] priority value (1-7)
281
+ # @return [String] priority label (high, medium, low)
282
+ def priority_label(priority)
283
+ case priority
284
+ when 1..2
285
+ 'priority:high'
286
+ when 3..5
287
+ 'priority:medium'
288
+ when 6..7
289
+ 'priority:low'
290
+ end
291
+ end
233
292
  end
234
293
  end
@@ -3,9 +3,8 @@
3
3
  module LlmDocsBuilder
4
4
  # Transforms markdown files to be AI-friendly
5
5
  #
6
- # Processes individual markdown files to make them more suitable for LLM consumption by
7
- # expanding relative links to absolute URLs and converting HTML URLs to markdown-friendly
8
- # formats.
6
+ # Orchestrates a pipeline of specialized transformers to process markdown content.
7
+ # Each transformer is responsible for a specific aspect of the transformation.
9
8
  #
10
9
  # @example Transform with base URL
11
10
  # transformer = LlmDocsBuilder::MarkdownTransformer.new('README.md',
@@ -31,163 +30,99 @@ module LlmDocsBuilder
31
30
  # @option options [Boolean] :normalize_whitespace normalize excessive whitespace
32
31
  # @option options [Boolean] :remove_badges remove badge/shield images
33
32
  # @option options [Boolean] :remove_frontmatter remove YAML/TOML frontmatter
33
+ # @option options [Boolean] :remove_code_examples remove code blocks and inline code
34
+ # @option options [Boolean] :remove_images remove image syntax
35
+ # @option options [Boolean] :simplify_links shorten verbose link text
36
+ # @option options [Boolean] :remove_blockquotes remove blockquote formatting
37
+ # @option options [Boolean] :generate_toc generate table of contents at the top
38
+ # @option options [String] :custom_instruction custom instruction text to inject at top
39
+ # @option options [Boolean] :remove_stopwords remove common stopwords (aggressive)
40
+ # @option options [Boolean] :remove_duplicates remove duplicate paragraphs
34
41
  def initialize(file_path, options = {})
35
42
  @file_path = file_path
36
43
  @options = options
37
44
  end
38
45
 
39
- # Transform markdown content to be AI-friendly
46
+ # Transform markdown content using a pipeline of transformers
40
47
  #
41
- # Applies transformations to make the markdown more suitable for LLM processing:
42
- # - Removes YAML/TOML frontmatter (if remove_frontmatter enabled)
43
- # - Expands relative links to absolute URLs (if base_url provided)
44
- # - Converts HTML URLs to markdown format (if convert_urls enabled)
45
- # - Removes HTML comments (if remove_comments enabled)
46
- # - Removes badge/shield images (if remove_badges enabled)
47
- # - Normalizes excessive whitespace (if normalize_whitespace enabled)
48
+ # Processes content through specialized transformers in order:
49
+ # 1. ContentCleanupTransformer - Removes unwanted elements
50
+ # 2. LinkTransformer - Processes links
51
+ # 3. HeadingTransformer - Normalizes heading hierarchy (if enabled)
52
+ # 4. TextCompressor - Advanced compression (if enabled)
53
+ # 5. EnhancementTransformer - Adds TOC and instructions
54
+ # 6. WhitespaceTransformer - Normalizes whitespace
48
55
  #
49
56
  # @return [String] transformed markdown content
50
57
  def transform
51
58
  content = File.read(file_path)
52
59
 
53
- # Remove frontmatter first (before any other processing)
54
- content = remove_frontmatter(content) if options[:remove_frontmatter]
55
-
56
- # Link transformations
57
- content = expand_relative_links(content) if options[:base_url]
58
- content = convert_html_urls(content) if options[:convert_urls]
59
-
60
- # Content cleanup
61
- content = remove_comments(content) if options[:remove_comments]
62
- content = remove_badges(content) if options[:remove_badges]
63
-
64
- # Whitespace normalization last (after all other transformations)
65
- content = normalize_whitespace(content) if options[:normalize_whitespace]
60
+ # Build and execute transformation pipeline
61
+ content = cleanup_transformer.transform(content, options)
62
+ content = link_transformer.transform(content, options)
63
+ content = heading_transformer.transform(content, options)
64
+ content = compress_content(content) if should_compress?
65
+ content = enhancement_transformer.transform(content, options)
66
+ content = whitespace_transformer.transform(content, options)
66
67
 
67
68
  content
68
69
  end
69
70
 
70
71
  private
71
72
 
72
- # Expand relative links to absolute URLs
73
+ # Get content cleanup transformer instance
73
74
  #
74
- # Converts markdown links like `[text](./path.md)` to `[text](https://base.url/path.md)`.
75
- # Leaves absolute URLs and anchors unchanged.
76
- #
77
- # @param content [String] markdown content to process
78
- # @return [String] content with expanded links
79
- def expand_relative_links(content)
80
- base_url = options[:base_url]
81
-
82
- content.gsub(/\[([^\]]+)\]\(([^)]+)\)/) do |match|
83
- text = ::Regexp.last_match(1)
84
- url = ::Regexp.last_match(2)
85
-
86
- if url.start_with?('http://', 'https://', '//', '#')
87
- match # Already absolute or anchor
88
- else
89
- # Clean up relative path
90
- clean_url = url.gsub(%r{^\./}, '') # Remove leading './'
91
- expanded_url = File.join(base_url, clean_url)
92
- "[#{text}](#{expanded_url})"
93
- end
94
- end
75
+ # @return [Transformers::ContentCleanupTransformer]
76
+ def cleanup_transformer
77
+ @cleanup_transformer ||= Transformers::ContentCleanupTransformer.new
95
78
  end
96
79
 
97
- # Convert HTML URLs to markdown-friendly format
80
+ # Get link transformer instance
98
81
  #
99
- # Changes URLs ending in .html or .htm to .md for better LLM understanding
100
- #
101
- # @param content [String] markdown content to process
102
- # @return [String] content with converted URLs
103
- def convert_html_urls(content)
104
- content.gsub(%r{https?://[^\s<>]+\.html?(?=[)\s]|$)}) do |url|
105
- url.sub(/\.html?$/, '.md')
106
- end
82
+ # @return [Transformers::LinkTransformer]
83
+ def link_transformer
84
+ @link_transformer ||= Transformers::LinkTransformer.new
107
85
  end
108
86
 
109
- # Remove HTML comments from markdown content
110
- #
111
- # Strips out HTML comments (<!-- ... -->) which are typically metadata for developers
112
- # and not relevant for LLM consumption. This reduces token usage and improves clarity.
113
- #
114
- # Handles:
115
- # - Single-line comments: <!-- comment -->
116
- # - Multi-line comments spanning multiple lines
117
- # - Multiple comments in the same content
87
+ # Get heading transformer instance
118
88
  #
119
- # @param content [String] markdown content to process
120
- # @return [String] content with comments removed
121
- def remove_comments(content)
122
- # Remove HTML comments (single and multi-line)
123
- # The .*? makes it non-greedy so it stops at the first -->
124
- content.gsub(/<!--.*?-->/m, '')
89
+ # @return [Transformers::HeadingTransformer]
90
+ def heading_transformer
91
+ @heading_transformer ||= Transformers::HeadingTransformer.new
125
92
  end
126
93
 
127
- # Remove badge and shield images from markdown
94
+ # Get enhancement transformer instance
128
95
  #
129
- # Strips out badge/shield images (typically from shields.io, badge.fury.io, etc.)
130
- # which are visual indicators for humans but provide no value to LLMs.
131
- #
132
- # Recognizes common patterns:
133
- # - [![Badge](badge.svg)](link) (linked badges)
134
- # - ![Badge](badge.svg) (unlinked badges)
135
- # - Common badge domains: shields.io, badge.fury.io, travis-ci.org, etc.
136
- #
137
- # @param content [String] markdown content to process
138
- # @return [String] content with badges removed
139
- def remove_badges(content)
140
- # Remove linked badges: [![...](badge-url)](link-url)
141
- content = content.gsub(/\[\!\[([^\]]*)\]\([^\)]*(?:badge|shield|svg|travis|coveralls|fury)[^\)]*\)\]\([^\)]*\)/i, '')
142
-
143
- # Remove standalone badges: ![...](badge-url)
144
- content = content.gsub(/!\[([^\]]*)\]\([^\)]*(?:badge|shield|svg|travis|coveralls|fury)[^\)]*\)/i, '')
145
-
146
- content
96
+ # @return [Transformers::EnhancementTransformer]
97
+ def enhancement_transformer
98
+ @enhancement_transformer ||= Transformers::EnhancementTransformer.new
147
99
  end
148
100
 
149
- # Remove YAML or TOML frontmatter from markdown
150
- #
151
- # Strips out frontmatter blocks which are metadata used by static site generators
152
- # (Jekyll, Hugo, etc.) but not relevant for LLM consumption.
101
+ # Get whitespace transformer instance
153
102
  #
154
- # Recognizes:
155
- # - YAML frontmatter: --- ... ---
156
- # - TOML frontmatter: +++ ... +++
157
- #
158
- # @param content [String] markdown content to process
159
- # @return [String] content with frontmatter removed
160
- def remove_frontmatter(content)
161
- # Remove YAML frontmatter (--- ... ---)
162
- content = content.sub(/\A---\s*$.*?^---\s*$/m, '')
163
-
164
- # Remove TOML frontmatter (+++ ... +++)
165
- content = content.sub(/\A\+\+\+\s*$.*?^\+\+\+\s*$/m, '')
166
-
167
- content
103
+ # @return [Transformers::WhitespaceTransformer]
104
+ def whitespace_transformer
105
+ @whitespace_transformer ||= Transformers::WhitespaceTransformer.new
168
106
  end
169
107
 
170
- # Normalize excessive whitespace in markdown
171
- #
172
- # Reduces excessive blank lines and trailing whitespace to make content more compact
173
- # for LLM consumption without affecting readability.
174
- #
175
- # Transformations:
176
- # - Multiple consecutive blank lines (3+) → 2 blank lines max
177
- # - Trailing whitespace on lines → removed
178
- # - Leading/trailing whitespace in file → trimmed
108
+ # Check if content compression should be applied
179
109
  #
180
- # @param content [String] markdown content to process
181
- # @return [String] content with normalized whitespace
182
- def normalize_whitespace(content)
183
- # Remove trailing whitespace from each line
184
- content = content.gsub(/ +$/, '')
185
-
186
- # Reduce multiple consecutive blank lines to maximum of 2
187
- content = content.gsub(/\n{4,}/, "\n\n\n")
110
+ # @return [Boolean]
111
+ def should_compress?
112
+ options[:remove_stopwords] || options[:remove_duplicates]
113
+ end
188
114
 
189
- # Trim leading and trailing whitespace from the entire content
190
- content.strip
115
+ # Compress content using TextCompressor
116
+ #
117
+ # @param content [String] content to compress
118
+ # @return [String] compressed content
119
+ def compress_content(content)
120
+ compressor = TextCompressor.new
121
+ compression_methods = {
122
+ remove_stopwords: options[:remove_stopwords],
123
+ remove_duplicates: options[:remove_duplicates]
124
+ }
125
+ compressor.compress(content, compression_methods)
191
126
  end
192
127
  end
193
128
  end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmDocsBuilder
4
+ # Formats output for CLI display
5
+ #
6
+ # Provides formatting utilities for displaying comparison results,
7
+ # byte sizes, and numbers in a user-friendly way.
8
+ #
9
+ # @api private
10
+ class OutputFormatter
11
+ # Format bytes into human-readable string
12
+ #
13
+ # @param bytes [Integer] number of bytes
14
+ # @return [String] formatted string with units (bytes/KB/MB)
15
+ #
16
+ # @example
17
+ # OutputFormatter.format_bytes(1024) #=> "1.0 KB"
18
+ # OutputFormatter.format_bytes(1048576) #=> "1.0 MB"
19
+ def self.format_bytes(bytes)
20
+ if bytes < 1024
21
+ "#{bytes} bytes"
22
+ elsif bytes < 1024 * 1024
23
+ "#{(bytes / 1024.0).round(1)} KB"
24
+ else
25
+ "#{(bytes / (1024.0 * 1024)).round(2)} MB"
26
+ end
27
+ end
28
+
29
+ # Format number with comma separators for readability
30
+ #
31
+ # @param number [Integer] number to format
32
+ # @return [String] formatted number with commas
33
+ #
34
+ # @example
35
+ # OutputFormatter.format_number(1234567) #=> "1,234,567"
36
+ def self.format_number(number)
37
+ number.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
38
+ end
39
+
40
+ # Display formatted comparison results
41
+ #
42
+ # @param result [Hash] comparison results from Comparator
43
+ def self.display_comparison_results(result)
44
+ puts ''
45
+ puts '=' * 60
46
+ puts 'Context Window Comparison'
47
+ puts '=' * 60
48
+ puts ''
49
+ puts "Human version: #{format_bytes(result[:human_size])} (~#{format_number(result[:human_tokens])} tokens)"
50
+ puts " Source: #{result[:human_source]}"
51
+ puts ''
52
+ puts "AI version: #{format_bytes(result[:ai_size])} (~#{format_number(result[:ai_tokens])} tokens)"
53
+ puts " Source: #{result[:ai_source]}"
54
+ puts ''
55
+ puts '-' * 60
56
+
57
+ if result[:reduction_bytes].positive?
58
+ display_reduction(result)
59
+ elsif result[:reduction_bytes].negative?
60
+ display_increase(result)
61
+ else
62
+ puts 'Same size'
63
+ end
64
+
65
+ puts '=' * 60
66
+ puts ''
67
+ end
68
+
69
+ # Display reduction statistics
70
+ #
71
+ # @param result [Hash] comparison results
72
+ # @api private
73
+ def self.display_reduction(result)
74
+ puts "Reduction: #{format_bytes(result[:reduction_bytes])} (#{result[:reduction_percent]}%)"
75
+ puts "Token savings: #{format_number(result[:token_reduction])} tokens (#{result[:token_reduction_percent]}%)"
76
+ puts "Factor: #{result[:factor]}x smaller"
77
+ end
78
+
79
+ # Display increase statistics
80
+ #
81
+ # @param result [Hash] comparison results
82
+ # @api private
83
+ def self.display_increase(result)
84
+ increase_bytes = result[:reduction_bytes].abs
85
+ increase_percent = result[:reduction_percent].abs
86
+ token_increase = result[:token_reduction].abs
87
+ token_increase_percent = result[:token_reduction_percent].abs
88
+ puts "Increase: #{format_bytes(increase_bytes)} (#{increase_percent}%)"
89
+ puts "Token increase: #{format_number(token_increase)} tokens (#{token_increase_percent}%)"
90
+ puts "Factor: #{result[:factor]}x larger"
91
+ end
92
+ end
93
+ end