llm-docs-builder 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +3 -0
- data/CHANGELOG.md +37 -0
- data/Gemfile.lock +1 -1
- data/README.md +182 -555
- data/bin/rspecs +2 -1
- data/lib/llm_docs_builder/cli.rb +1 -62
- data/lib/llm_docs_builder/comparator.rb +4 -16
- data/lib/llm_docs_builder/config.rb +42 -5
- data/lib/llm_docs_builder/markdown_transformer.rb +54 -128
- data/lib/llm_docs_builder/output_formatter.rb +93 -0
- data/lib/llm_docs_builder/parser.rb +1 -59
- data/lib/llm_docs_builder/text_compressor.rb +164 -0
- data/lib/llm_docs_builder/token_estimator.rb +52 -0
- data/lib/llm_docs_builder/transformers/base_transformer.rb +30 -0
- data/lib/llm_docs_builder/transformers/content_cleanup_transformer.rb +106 -0
- data/lib/llm_docs_builder/transformers/enhancement_transformer.rb +95 -0
- data/lib/llm_docs_builder/transformers/link_transformer.rb +84 -0
- data/lib/llm_docs_builder/transformers/whitespace_transformer.rb +44 -0
- data/lib/llm_docs_builder/version.rb +1 -1
- metadata +10 -3
- data/CLAUDE.md +0 -178
- data/llm-docs-builder.yml +0 -7
data/bin/rspecs
CHANGED
data/lib/llm_docs_builder/cli.rb
CHANGED
@@ -295,8 +295,6 @@ module LlmDocsBuilder
|
|
295
295
|
puts "Documentation Links: #{parsed.documentation_links.size}"
|
296
296
|
puts "Example Links: #{parsed.example_links.size}" if parsed.respond_to?(:example_links)
|
297
297
|
puts "Optional Links: #{parsed.optional_links.size}" if parsed.respond_to?(:optional_links)
|
298
|
-
elsif parsed.respond_to?(:to_xml)
|
299
|
-
puts parsed.to_xml
|
300
298
|
end
|
301
299
|
end
|
302
300
|
|
@@ -335,72 +333,13 @@ module LlmDocsBuilder
|
|
335
333
|
|
336
334
|
begin
|
337
335
|
result = comparator.compare
|
338
|
-
display_comparison_results(result)
|
336
|
+
OutputFormatter.display_comparison_results(result)
|
339
337
|
rescue LlmDocsBuilder::Errors::BaseError => e
|
340
338
|
puts "Error during comparison: #{e.message}"
|
341
339
|
exit 1
|
342
340
|
end
|
343
341
|
end
|
344
342
|
|
345
|
-
# Display formatted comparison results
|
346
|
-
#
|
347
|
-
# @param result [Hash] comparison results from Comparator
|
348
|
-
def display_comparison_results(result)
|
349
|
-
puts ''
|
350
|
-
puts '=' * 60
|
351
|
-
puts 'Context Window Comparison'
|
352
|
-
puts '=' * 60
|
353
|
-
puts ''
|
354
|
-
puts "Human version: #{format_bytes(result[:human_size])} (~#{format_number(result[:human_tokens])} tokens)"
|
355
|
-
puts " Source: #{result[:human_source]}"
|
356
|
-
puts ''
|
357
|
-
puts "AI version: #{format_bytes(result[:ai_size])} (~#{format_number(result[:ai_tokens])} tokens)"
|
358
|
-
puts " Source: #{result[:ai_source]}"
|
359
|
-
puts ''
|
360
|
-
puts '-' * 60
|
361
|
-
|
362
|
-
if result[:reduction_bytes].positive?
|
363
|
-
puts "Reduction: #{format_bytes(result[:reduction_bytes])} (#{result[:reduction_percent]}%)"
|
364
|
-
puts "Token savings: #{format_number(result[:token_reduction])} tokens (#{result[:token_reduction_percent]}%)"
|
365
|
-
puts "Factor: #{result[:factor]}x smaller"
|
366
|
-
elsif result[:reduction_bytes].negative?
|
367
|
-
increase_bytes = result[:reduction_bytes].abs
|
368
|
-
increase_percent = result[:reduction_percent].abs
|
369
|
-
token_increase = result[:token_reduction].abs
|
370
|
-
token_increase_percent = result[:token_reduction_percent].abs
|
371
|
-
puts "Increase: #{format_bytes(increase_bytes)} (#{increase_percent}%)"
|
372
|
-
puts "Token increase: #{format_number(token_increase)} tokens (#{token_increase_percent}%)"
|
373
|
-
puts "Factor: #{result[:factor]}x larger"
|
374
|
-
else
|
375
|
-
puts 'Same size'
|
376
|
-
end
|
377
|
-
|
378
|
-
puts '=' * 60
|
379
|
-
puts ''
|
380
|
-
end
|
381
|
-
|
382
|
-
# Format bytes into human-readable string
|
383
|
-
#
|
384
|
-
# @param bytes [Integer] number of bytes
|
385
|
-
# @return [String] formatted string with units
|
386
|
-
def format_bytes(bytes)
|
387
|
-
if bytes < 1024
|
388
|
-
"#{bytes} bytes"
|
389
|
-
elsif bytes < 1024 * 1024
|
390
|
-
"#{(bytes / 1024.0).round(1)} KB"
|
391
|
-
else
|
392
|
-
"#{(bytes / (1024.0 * 1024)).round(2)} MB"
|
393
|
-
end
|
394
|
-
end
|
395
|
-
|
396
|
-
# Format number with comma separators for readability
|
397
|
-
#
|
398
|
-
# @param number [Integer] number to format
|
399
|
-
# @return [String] formatted number with commas
|
400
|
-
def format_number(number)
|
401
|
-
number.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
|
402
|
-
end
|
403
|
-
|
404
343
|
# Validate llms.txt file format
|
405
344
|
#
|
406
345
|
# Checks if llms.txt file follows proper format with title, description, and documentation links.
|
@@ -231,9 +231,10 @@ module LlmDocsBuilder
|
|
231
231
|
Float::INFINITY
|
232
232
|
end
|
233
233
|
|
234
|
-
# Estimate tokens
|
235
|
-
|
236
|
-
|
234
|
+
# Estimate tokens using TokenEstimator
|
235
|
+
estimator = TokenEstimator.new
|
236
|
+
human_tokens = estimator.estimate(human_content)
|
237
|
+
ai_tokens = estimator.estimate(ai_content)
|
237
238
|
token_reduction = human_tokens - ai_tokens
|
238
239
|
token_reduction_percent = if human_tokens.positive?
|
239
240
|
((token_reduction.to_f / human_tokens) * 100).round
|
@@ -256,18 +257,5 @@ module LlmDocsBuilder
|
|
256
257
|
}
|
257
258
|
end
|
258
259
|
|
259
|
-
# Estimate token count using character-based approximation
|
260
|
-
#
|
261
|
-
# Uses the common heuristic that ~4 characters equals 1 token for English text.
|
262
|
-
# This provides reasonable estimates for documentation content without requiring
|
263
|
-
# external tokenizer dependencies.
|
264
|
-
#
|
265
|
-
# @param content [String] text content to estimate tokens for
|
266
|
-
# @return [Integer] estimated number of tokens
|
267
|
-
def estimate_tokens(content)
|
268
|
-
# Use 4 characters per token as a reasonable approximation
|
269
|
-
# This is a common heuristic for English text and works well for documentation
|
270
|
-
(content.length / 4.0).round
|
271
|
-
end
|
272
260
|
end
|
273
261
|
end
|
@@ -70,28 +70,65 @@ module LlmDocsBuilder
|
|
70
70
|
remove_comments: if options.key?(:remove_comments)
|
71
71
|
options[:remove_comments]
|
72
72
|
else
|
73
|
-
self['remove_comments'] ||
|
73
|
+
self['remove_comments'] || true
|
74
74
|
end,
|
75
75
|
normalize_whitespace: if options.key?(:normalize_whitespace)
|
76
76
|
options[:normalize_whitespace]
|
77
77
|
else
|
78
|
-
self['normalize_whitespace'] ||
|
78
|
+
self['normalize_whitespace'] || true
|
79
79
|
end,
|
80
80
|
remove_badges: if options.key?(:remove_badges)
|
81
81
|
options[:remove_badges]
|
82
82
|
else
|
83
|
-
self['remove_badges'] ||
|
83
|
+
self['remove_badges'] || true
|
84
84
|
end,
|
85
85
|
remove_frontmatter: if options.key?(:remove_frontmatter)
|
86
86
|
options[:remove_frontmatter]
|
87
87
|
else
|
88
|
-
self['remove_frontmatter'] ||
|
88
|
+
self['remove_frontmatter'] || true
|
89
89
|
end,
|
90
90
|
verbose: options.key?(:verbose) ? options[:verbose] : (self['verbose'] || false),
|
91
91
|
# Bulk transformation options
|
92
92
|
suffix: options[:suffix] || self['suffix'] || '.llm',
|
93
93
|
excludes: options[:excludes] || self['excludes'] || [],
|
94
|
-
bulk: options.key?(:bulk) ? options[:bulk] : (self['bulk'] || false)
|
94
|
+
bulk: options.key?(:bulk) ? options[:bulk] : (self['bulk'] || false),
|
95
|
+
# New compression options
|
96
|
+
remove_code_examples: if options.key?(:remove_code_examples)
|
97
|
+
options[:remove_code_examples]
|
98
|
+
else
|
99
|
+
self['remove_code_examples'] || false
|
100
|
+
end,
|
101
|
+
remove_images: if options.key?(:remove_images)
|
102
|
+
options[:remove_images]
|
103
|
+
else
|
104
|
+
self['remove_images'] || false
|
105
|
+
end,
|
106
|
+
simplify_links: if options.key?(:simplify_links)
|
107
|
+
options[:simplify_links]
|
108
|
+
else
|
109
|
+
self['simplify_links'] || false
|
110
|
+
end,
|
111
|
+
remove_blockquotes: if options.key?(:remove_blockquotes)
|
112
|
+
options[:remove_blockquotes]
|
113
|
+
else
|
114
|
+
self['remove_blockquotes'] || false
|
115
|
+
end,
|
116
|
+
generate_toc: if options.key?(:generate_toc)
|
117
|
+
options[:generate_toc]
|
118
|
+
else
|
119
|
+
self['generate_toc'] || false
|
120
|
+
end,
|
121
|
+
custom_instruction: options[:custom_instruction] || self['custom_instruction'],
|
122
|
+
remove_stopwords: if options.key?(:remove_stopwords)
|
123
|
+
options[:remove_stopwords]
|
124
|
+
else
|
125
|
+
self['remove_stopwords'] || false
|
126
|
+
end,
|
127
|
+
remove_duplicates: if options.key?(:remove_duplicates)
|
128
|
+
options[:remove_duplicates]
|
129
|
+
else
|
130
|
+
self['remove_duplicates'] || false
|
131
|
+
end
|
95
132
|
}
|
96
133
|
end
|
97
134
|
|
@@ -3,9 +3,8 @@
|
|
3
3
|
module LlmDocsBuilder
|
4
4
|
# Transforms markdown files to be AI-friendly
|
5
5
|
#
|
6
|
-
#
|
7
|
-
#
|
8
|
-
# formats.
|
6
|
+
# Orchestrates a pipeline of specialized transformers to process markdown content.
|
7
|
+
# Each transformer is responsible for a specific aspect of the transformation.
|
9
8
|
#
|
10
9
|
# @example Transform with base URL
|
11
10
|
# transformer = LlmDocsBuilder::MarkdownTransformer.new('README.md',
|
@@ -31,163 +30,90 @@ module LlmDocsBuilder
|
|
31
30
|
# @option options [Boolean] :normalize_whitespace normalize excessive whitespace
|
32
31
|
# @option options [Boolean] :remove_badges remove badge/shield images
|
33
32
|
# @option options [Boolean] :remove_frontmatter remove YAML/TOML frontmatter
|
33
|
+
# @option options [Boolean] :remove_code_examples remove code blocks and inline code
|
34
|
+
# @option options [Boolean] :remove_images remove image syntax
|
35
|
+
# @option options [Boolean] :simplify_links shorten verbose link text
|
36
|
+
# @option options [Boolean] :remove_blockquotes remove blockquote formatting
|
37
|
+
# @option options [Boolean] :generate_toc generate table of contents at the top
|
38
|
+
# @option options [String] :custom_instruction custom instruction text to inject at top
|
39
|
+
# @option options [Boolean] :remove_stopwords remove common stopwords (aggressive)
|
40
|
+
# @option options [Boolean] :remove_duplicates remove duplicate paragraphs
|
34
41
|
def initialize(file_path, options = {})
|
35
42
|
@file_path = file_path
|
36
43
|
@options = options
|
37
44
|
end
|
38
45
|
|
39
|
-
# Transform markdown content
|
46
|
+
# Transform markdown content using a pipeline of transformers
|
40
47
|
#
|
41
|
-
#
|
42
|
-
# - Removes
|
43
|
-
# -
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
# - Normalizes excessive whitespace (if normalize_whitespace enabled)
|
48
|
+
# Processes content through specialized transformers in order:
|
49
|
+
# 1. ContentCleanupTransformer - Removes unwanted elements
|
50
|
+
# 2. LinkTransformer - Processes links
|
51
|
+
# 3. TextCompressor - Advanced compression (if enabled)
|
52
|
+
# 4. EnhancementTransformer - Adds TOC and instructions
|
53
|
+
# 5. WhitespaceTransformer - Normalizes whitespace
|
48
54
|
#
|
49
55
|
# @return [String] transformed markdown content
|
50
56
|
def transform
|
51
57
|
content = File.read(file_path)
|
52
58
|
|
53
|
-
#
|
54
|
-
content =
|
55
|
-
|
56
|
-
|
57
|
-
content =
|
58
|
-
content =
|
59
|
-
|
60
|
-
# Content cleanup
|
61
|
-
content = remove_comments(content) if options[:remove_comments]
|
62
|
-
content = remove_badges(content) if options[:remove_badges]
|
63
|
-
|
64
|
-
# Whitespace normalization last (after all other transformations)
|
65
|
-
content = normalize_whitespace(content) if options[:normalize_whitespace]
|
59
|
+
# Build and execute transformation pipeline
|
60
|
+
content = cleanup_transformer.transform(content, options)
|
61
|
+
content = link_transformer.transform(content, options)
|
62
|
+
content = compress_content(content) if should_compress?
|
63
|
+
content = enhancement_transformer.transform(content, options)
|
64
|
+
content = whitespace_transformer.transform(content, options)
|
66
65
|
|
67
66
|
content
|
68
67
|
end
|
69
68
|
|
70
69
|
private
|
71
70
|
|
72
|
-
#
|
71
|
+
# Get content cleanup transformer instance
|
73
72
|
#
|
74
|
-
#
|
75
|
-
|
76
|
-
|
77
|
-
# @param content [String] markdown content to process
|
78
|
-
# @return [String] content with expanded links
|
79
|
-
def expand_relative_links(content)
|
80
|
-
base_url = options[:base_url]
|
81
|
-
|
82
|
-
content.gsub(/\[([^\]]+)\]\(([^)]+)\)/) do |match|
|
83
|
-
text = ::Regexp.last_match(1)
|
84
|
-
url = ::Regexp.last_match(2)
|
85
|
-
|
86
|
-
if url.start_with?('http://', 'https://', '//', '#')
|
87
|
-
match # Already absolute or anchor
|
88
|
-
else
|
89
|
-
# Clean up relative path
|
90
|
-
clean_url = url.gsub(%r{^\./}, '') # Remove leading './'
|
91
|
-
expanded_url = File.join(base_url, clean_url)
|
92
|
-
"[#{text}](#{expanded_url})"
|
93
|
-
end
|
94
|
-
end
|
73
|
+
# @return [Transformers::ContentCleanupTransformer]
|
74
|
+
def cleanup_transformer
|
75
|
+
@cleanup_transformer ||= Transformers::ContentCleanupTransformer.new
|
95
76
|
end
|
96
77
|
|
97
|
-
#
|
78
|
+
# Get link transformer instance
|
98
79
|
#
|
99
|
-
#
|
100
|
-
|
101
|
-
|
102
|
-
# @return [String] content with converted URLs
|
103
|
-
def convert_html_urls(content)
|
104
|
-
content.gsub(%r{https?://[^\s<>]+\.html?(?=[)\s]|$)}) do |url|
|
105
|
-
url.sub(/\.html?$/, '.md')
|
106
|
-
end
|
80
|
+
# @return [Transformers::LinkTransformer]
|
81
|
+
def link_transformer
|
82
|
+
@link_transformer ||= Transformers::LinkTransformer.new
|
107
83
|
end
|
108
84
|
|
109
|
-
#
|
110
|
-
#
|
111
|
-
# Strips out HTML comments (<!-- ... -->) which are typically metadata for developers
|
112
|
-
# and not relevant for LLM consumption. This reduces token usage and improves clarity.
|
113
|
-
#
|
114
|
-
# Handles:
|
115
|
-
# - Single-line comments: <!-- comment -->
|
116
|
-
# - Multi-line comments spanning multiple lines
|
117
|
-
# - Multiple comments in the same content
|
85
|
+
# Get enhancement transformer instance
|
118
86
|
#
|
119
|
-
# @
|
120
|
-
|
121
|
-
|
122
|
-
# Remove HTML comments (single and multi-line)
|
123
|
-
# The .*? makes it non-greedy so it stops at the first -->
|
124
|
-
content.gsub(/<!--.*?-->/m, '')
|
87
|
+
# @return [Transformers::EnhancementTransformer]
|
88
|
+
def enhancement_transformer
|
89
|
+
@enhancement_transformer ||= Transformers::EnhancementTransformer.new
|
125
90
|
end
|
126
91
|
|
127
|
-
#
|
92
|
+
# Get whitespace transformer instance
|
128
93
|
#
|
129
|
-
#
|
130
|
-
|
131
|
-
|
132
|
-
# Recognizes common patterns:
|
133
|
-
# - [](link) (linked badges)
|
134
|
-
# -  (unlinked badges)
|
135
|
-
# - Common badge domains: shields.io, badge.fury.io, travis-ci.org, etc.
|
136
|
-
#
|
137
|
-
# @param content [String] markdown content to process
|
138
|
-
# @return [String] content with badges removed
|
139
|
-
def remove_badges(content)
|
140
|
-
# Remove linked badges: [](link-url)
|
141
|
-
content = content.gsub(/\[\!\[([^\]]*)\]\([^\)]*(?:badge|shield|svg|travis|coveralls|fury)[^\)]*\)\]\([^\)]*\)/i, '')
|
142
|
-
|
143
|
-
# Remove standalone badges: 
|
144
|
-
content = content.gsub(/!\[([^\]]*)\]\([^\)]*(?:badge|shield|svg|travis|coveralls|fury)[^\)]*\)/i, '')
|
145
|
-
|
146
|
-
content
|
94
|
+
# @return [Transformers::WhitespaceTransformer]
|
95
|
+
def whitespace_transformer
|
96
|
+
@whitespace_transformer ||= Transformers::WhitespaceTransformer.new
|
147
97
|
end
|
148
98
|
|
149
|
-
#
|
150
|
-
#
|
151
|
-
# Strips out frontmatter blocks which are metadata used by static site generators
|
152
|
-
# (Jekyll, Hugo, etc.) but not relevant for LLM consumption.
|
99
|
+
# Check if content compression should be applied
|
153
100
|
#
|
154
|
-
#
|
155
|
-
|
156
|
-
|
157
|
-
#
|
158
|
-
# @param content [String] markdown content to process
|
159
|
-
# @return [String] content with frontmatter removed
|
160
|
-
def remove_frontmatter(content)
|
161
|
-
# Remove YAML frontmatter (--- ... ---)
|
162
|
-
content = content.sub(/\A---\s*$.*?^---\s*$/m, '')
|
163
|
-
|
164
|
-
# Remove TOML frontmatter (+++ ... +++)
|
165
|
-
content = content.sub(/\A\+\+\+\s*$.*?^\+\+\+\s*$/m, '')
|
166
|
-
|
167
|
-
content
|
101
|
+
# @return [Boolean]
|
102
|
+
def should_compress?
|
103
|
+
options[:remove_stopwords] || options[:remove_duplicates]
|
168
104
|
end
|
169
105
|
|
170
|
-
#
|
171
|
-
#
|
172
|
-
#
|
173
|
-
#
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
# @return [String] content with normalized whitespace
|
182
|
-
def normalize_whitespace(content)
|
183
|
-
# Remove trailing whitespace from each line
|
184
|
-
content = content.gsub(/ +$/, '')
|
185
|
-
|
186
|
-
# Reduce multiple consecutive blank lines to maximum of 2
|
187
|
-
content = content.gsub(/\n{4,}/, "\n\n\n")
|
188
|
-
|
189
|
-
# Trim leading and trailing whitespace from the entire content
|
190
|
-
content.strip
|
106
|
+
# Compress content using TextCompressor
|
107
|
+
#
|
108
|
+
# @param content [String] content to compress
|
109
|
+
# @return [String] compressed content
|
110
|
+
def compress_content(content)
|
111
|
+
compressor = TextCompressor.new
|
112
|
+
compression_methods = {
|
113
|
+
remove_stopwords: options[:remove_stopwords],
|
114
|
+
remove_duplicates: options[:remove_duplicates]
|
115
|
+
}
|
116
|
+
compressor.compress(content, compression_methods)
|
191
117
|
end
|
192
118
|
end
|
193
119
|
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module LlmDocsBuilder
|
4
|
+
# Formats output for CLI display
|
5
|
+
#
|
6
|
+
# Provides formatting utilities for displaying comparison results,
|
7
|
+
# byte sizes, and numbers in a user-friendly way.
|
8
|
+
#
|
9
|
+
# @api private
|
10
|
+
class OutputFormatter
|
11
|
+
# Format bytes into human-readable string
|
12
|
+
#
|
13
|
+
# @param bytes [Integer] number of bytes
|
14
|
+
# @return [String] formatted string with units (bytes/KB/MB)
|
15
|
+
#
|
16
|
+
# @example
|
17
|
+
# OutputFormatter.format_bytes(1024) #=> "1.0 KB"
|
18
|
+
# OutputFormatter.format_bytes(1048576) #=> "1.0 MB"
|
19
|
+
def self.format_bytes(bytes)
|
20
|
+
if bytes < 1024
|
21
|
+
"#{bytes} bytes"
|
22
|
+
elsif bytes < 1024 * 1024
|
23
|
+
"#{(bytes / 1024.0).round(1)} KB"
|
24
|
+
else
|
25
|
+
"#{(bytes / (1024.0 * 1024)).round(2)} MB"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# Format number with comma separators for readability
|
30
|
+
#
|
31
|
+
# @param number [Integer] number to format
|
32
|
+
# @return [String] formatted number with commas
|
33
|
+
#
|
34
|
+
# @example
|
35
|
+
# OutputFormatter.format_number(1234567) #=> "1,234,567"
|
36
|
+
def self.format_number(number)
|
37
|
+
number.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
|
38
|
+
end
|
39
|
+
|
40
|
+
# Display formatted comparison results
|
41
|
+
#
|
42
|
+
# @param result [Hash] comparison results from Comparator
|
43
|
+
def self.display_comparison_results(result)
|
44
|
+
puts ''
|
45
|
+
puts '=' * 60
|
46
|
+
puts 'Context Window Comparison'
|
47
|
+
puts '=' * 60
|
48
|
+
puts ''
|
49
|
+
puts "Human version: #{format_bytes(result[:human_size])} (~#{format_number(result[:human_tokens])} tokens)"
|
50
|
+
puts " Source: #{result[:human_source]}"
|
51
|
+
puts ''
|
52
|
+
puts "AI version: #{format_bytes(result[:ai_size])} (~#{format_number(result[:ai_tokens])} tokens)"
|
53
|
+
puts " Source: #{result[:ai_source]}"
|
54
|
+
puts ''
|
55
|
+
puts '-' * 60
|
56
|
+
|
57
|
+
if result[:reduction_bytes].positive?
|
58
|
+
display_reduction(result)
|
59
|
+
elsif result[:reduction_bytes].negative?
|
60
|
+
display_increase(result)
|
61
|
+
else
|
62
|
+
puts 'Same size'
|
63
|
+
end
|
64
|
+
|
65
|
+
puts '=' * 60
|
66
|
+
puts ''
|
67
|
+
end
|
68
|
+
|
69
|
+
# Display reduction statistics
|
70
|
+
#
|
71
|
+
# @param result [Hash] comparison results
|
72
|
+
# @api private
|
73
|
+
def self.display_reduction(result)
|
74
|
+
puts "Reduction: #{format_bytes(result[:reduction_bytes])} (#{result[:reduction_percent]}%)"
|
75
|
+
puts "Token savings: #{format_number(result[:token_reduction])} tokens (#{result[:token_reduction_percent]}%)"
|
76
|
+
puts "Factor: #{result[:factor]}x smaller"
|
77
|
+
end
|
78
|
+
|
79
|
+
# Display increase statistics
|
80
|
+
#
|
81
|
+
# @param result [Hash] comparison results
|
82
|
+
# @api private
|
83
|
+
def self.display_increase(result)
|
84
|
+
increase_bytes = result[:reduction_bytes].abs
|
85
|
+
increase_percent = result[:reduction_percent].abs
|
86
|
+
token_increase = result[:token_reduction].abs
|
87
|
+
token_increase_percent = result[:token_reduction_percent].abs
|
88
|
+
puts "Increase: #{format_bytes(increase_bytes)} (#{increase_percent}%)"
|
89
|
+
puts "Token increase: #{format_number(token_increase)} tokens (#{token_increase_percent}%)"
|
90
|
+
puts "Factor: #{result[:factor]}x larger"
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -108,14 +108,12 @@ module LlmDocsBuilder
|
|
108
108
|
# Represents parsed llms.txt content with structured access to sections
|
109
109
|
#
|
110
110
|
# Provides convenient access to parsed llms.txt sections including title,
|
111
|
-
# description, and link collections.
|
111
|
+
# description, and link collections.
|
112
112
|
#
|
113
113
|
# @example Access parsed content
|
114
114
|
# parsed.title # => "My Project"
|
115
115
|
# parsed.description # => "A description"
|
116
116
|
# parsed.documentation_links # => [{title: "...", url: "...", description: "..."}]
|
117
|
-
# parsed.to_h # => Hash representation
|
118
|
-
# parsed.to_xml # => XML string
|
119
117
|
#
|
120
118
|
# @api public
|
121
119
|
class ParsedContent
|
@@ -163,61 +161,5 @@ module LlmDocsBuilder
|
|
163
161
|
def optional_links
|
164
162
|
sections[:optional] || []
|
165
163
|
end
|
166
|
-
|
167
|
-
# Convert to hash representation
|
168
|
-
#
|
169
|
-
# @return [Hash] hash containing all parsed sections
|
170
|
-
def to_h
|
171
|
-
sections
|
172
|
-
end
|
173
|
-
|
174
|
-
# Convert to XML representation
|
175
|
-
#
|
176
|
-
# Generates an XML document with all parsed sections and links.
|
177
|
-
#
|
178
|
-
# @return [String] XML string representation
|
179
|
-
def to_xml
|
180
|
-
builder = []
|
181
|
-
builder << '<?xml version="1.0" encoding="UTF-8"?>'
|
182
|
-
builder << '<llms_context>'
|
183
|
-
builder << " <title>#{title}</title>" if title
|
184
|
-
builder << " <description>#{description}</description>" if description
|
185
|
-
|
186
|
-
add_xml_section(builder, 'documentation', documentation_links)
|
187
|
-
add_xml_section(builder, 'examples', example_links)
|
188
|
-
add_xml_section(builder, 'optional', optional_links) if sections[:optional]
|
189
|
-
|
190
|
-
builder << '</llms_context>'
|
191
|
-
builder.join("\n")
|
192
|
-
end
|
193
|
-
|
194
|
-
private
|
195
|
-
|
196
|
-
# Appends section XML elements to builder array
|
197
|
-
#
|
198
|
-
# Handles both array of link hashes and raw string content
|
199
|
-
#
|
200
|
-
# @param builder [Array<String>] XML lines accumulator
|
201
|
-
# @param name [String] section name
|
202
|
-
# @param links [Array<Hash>, String] section links or content
|
203
|
-
def add_xml_section(builder, name, links)
|
204
|
-
return if links.empty?
|
205
|
-
|
206
|
-
builder << " <#{name}>"
|
207
|
-
|
208
|
-
if links.is_a?(Array)
|
209
|
-
links.each do |link|
|
210
|
-
builder << ' <link>'
|
211
|
-
builder << " <title>#{link[:title]}</title>"
|
212
|
-
builder << " <url>#{link[:url]}</url>"
|
213
|
-
builder << " <description>#{link[:description]}</description>"
|
214
|
-
builder << ' </link>'
|
215
|
-
end
|
216
|
-
else
|
217
|
-
builder << " #{links}"
|
218
|
-
end
|
219
|
-
|
220
|
-
builder << " </#{name}>"
|
221
|
-
end
|
222
164
|
end
|
223
165
|
end
|