llm-docs-builder 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fileutils'
4
+
5
+ module LlmDocsBuilder
6
+ # Bulk transforms multiple markdown files to be AI-friendly
7
+ #
8
+ # Processes all markdown files in a directory recursively, creating LLM-friendly versions
9
+ # alongside the originals. Supports exclusion patterns and maintains directory structure.
10
+ #
11
+ # @example Transform all files in a directory
12
+ # transformer = LlmDocsBuilder::BulkTransformer.new('./docs',
13
+ # base_url: 'https://myproject.io',
14
+ # suffix: '.llm'
15
+ # )
16
+ # transformer.transform_all
17
+ #
18
+ # @api public
19
+ class BulkTransformer
20
+ # @return [String] path to documentation directory
21
+ attr_reader :docs_path
22
+
23
+ # @return [Hash] transformation options
24
+ attr_reader :options
25
+
26
+ # Initialize a new bulk transformer
27
+ #
28
+ # @param docs_path [String] path to documentation directory
29
+ # @param options [Hash] transformation options
30
+ # @option options [String] :base_url base URL for expanding relative links
31
+ # @option options [Boolean] :convert_urls convert HTML URLs to markdown format
32
+ # @option options [String] :suffix suffix for transformed files (default: '.llm')
33
+ # @option options [Array<String>] :excludes glob patterns for files to exclude
34
+ # @option options [Boolean] :verbose enable verbose output
35
+ def initialize(docs_path, options = {})
36
+ @docs_path = docs_path
37
+ @options = {
38
+ suffix: '.llm',
39
+ excludes: []
40
+ }.merge(options)
41
+ end
42
+
43
+ # Transform all markdown files in the directory
44
+ #
45
+ # Recursively finds all markdown files, applies transformations,
46
+ # and saves LLM-friendly versions with the specified suffix.
47
+ #
48
+ # @return [Array<String>] paths of transformed files
49
+ def transform_all
50
+ raise Errors::GenerationError, "Directory not found: #{docs_path}" unless File.directory?(docs_path)
51
+
52
+ markdown_files = find_markdown_files
53
+ transformed_files = []
54
+
55
+ markdown_files.each do |file_path|
56
+ next if should_exclude?(file_path)
57
+
58
+ puts "Transforming #{file_path}..." if options[:verbose]
59
+
60
+ transformed_content = transform_file(file_path)
61
+ output_path = generate_output_path(file_path)
62
+
63
+ # Ensure output directory exists
64
+ FileUtils.mkdir_p(File.dirname(output_path))
65
+
66
+ File.write(output_path, transformed_content)
67
+ transformed_files << output_path
68
+
69
+ puts " → #{output_path}" if options[:verbose]
70
+ end
71
+
72
+ transformed_files
73
+ end
74
+
75
+ private
76
+
77
+ # Recursively scans the docs directory for markdown files
78
+ #
79
+ # Skips hidden files (starting with dot) and returns sorted array of paths
80
+ #
81
+ # @return [Array<String>] paths to markdown files
82
+ def find_markdown_files
83
+ files = []
84
+
85
+ Find.find(docs_path) do |path|
86
+ next unless File.file?(path)
87
+ next unless path.match?(/\.md$/i)
88
+ next if File.basename(path).start_with?('.')
89
+
90
+ files << path
91
+ end
92
+
93
+ files.sort
94
+ end
95
+
96
+ # Tests if file matches any exclusion pattern from options
97
+ #
98
+ # Uses File.fnmatch with pathname and dotmatch flags
99
+ #
100
+ # @param file_path [String] path to check
101
+ # @return [Boolean] true if file should be excluded
102
+ def should_exclude?(file_path)
103
+ excludes = Array(options[:excludes])
104
+
105
+ excludes.any? do |pattern|
106
+ File.fnmatch(pattern, file_path, File::FNM_PATHNAME | File::FNM_DOTMATCH)
107
+ end
108
+ end
109
+
110
+ # Applies markdown transformations to a single file
111
+ #
112
+ # Creates MarkdownTransformer instance and delegates transformation
113
+ #
114
+ # @param file_path [String] path to markdown file
115
+ # @return [String] transformed content
116
+ def transform_file(file_path)
117
+ transformer = MarkdownTransformer.new(file_path, options)
118
+ transformer.transform
119
+ end
120
+
121
+ # Constructs output path by adding suffix before .md extension
122
+ #
123
+ # For example: README.md with suffix .llm becomes README.llm.md
124
+ #
125
+ # @param input_path [String] original file path
126
+ # @return [String] path for transformed file
127
+ def generate_output_path(input_path)
128
+ dir = File.dirname(input_path)
129
+ basename = File.basename(input_path, '.md')
130
+ suffix = options[:suffix]
131
+
132
+ File.join(dir, "#{basename}#{suffix}.md")
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,434 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'optparse'
4
+
5
+ module LlmDocsBuilder
6
+ # Command-line interface for llms-txt gem
7
+ #
8
+ # Provides commands for generating, transforming, parsing, and validating llms.txt files.
9
+ # All file paths must be specified using flags (-d/--docs) for consistency.
10
+ #
11
+ # @example Run the CLI
12
+ # LlmDocsBuilder::CLI.run(['generate', '--docs', './docs', '--output', 'llms.txt'])
13
+ #
14
+ # @api public
15
+ class CLI
16
+ # Run the CLI with given arguments
17
+ #
18
+ # @param argv [Array<String>] command-line arguments (defaults to ARGV)
19
+ def self.run(argv = ARGV)
20
+ new.run(argv)
21
+ end
22
+
23
+ # Execute CLI command with error handling
24
+ #
25
+ # Parses command-line arguments and delegates to appropriate command handler.
26
+ # Handles all LlmDocsBuilder errors gracefully with user-friendly messages.
27
+ #
28
+ # @param argv [Array<String>] command-line arguments
29
+ # @raise [SystemExit] exits with status 1 on error
30
+ def run(argv)
31
+ options = parse_options(argv)
32
+
33
+ case options[:command]
34
+ when 'generate', nil
35
+ generate(options)
36
+ when 'transform'
37
+ transform(options)
38
+ when 'bulk-transform'
39
+ bulk_transform(options)
40
+ when 'compare'
41
+ compare(options)
42
+ when 'parse'
43
+ parse(options)
44
+ when 'validate'
45
+ validate(options)
46
+ when 'version'
47
+ show_version
48
+ else
49
+ puts "Unknown command: #{options[:command]}"
50
+ puts "Run 'llm-docs-builder --help' for usage information"
51
+ exit 1
52
+ end
53
+ rescue LlmDocsBuilder::Errors::BaseError => e
54
+ puts "Error: #{e.message}"
55
+ exit 1
56
+ rescue StandardError => e
57
+ puts "Unexpected error: #{e.message}"
58
+ puts e.backtrace.join("\n") if options&.fetch(:verbose, false)
59
+ exit 1
60
+ end
61
+
62
+ private
63
+
64
+ # Parse command-line options using OptionParser
65
+ #
66
+ # Extracts command and options from argv. First non-flag argument is treated as command name.
67
+ #
68
+ # @param argv [Array<String>] command-line arguments
69
+ # @return [Hash] parsed options including :command, :config, :docs, :output, :verbose
70
+ def parse_options(argv)
71
+ options = {
72
+ command: argv.first&.match?(/^[a-z-]+$/) ? argv.shift : nil
73
+ }
74
+
75
+ OptionParser.new do |opts|
76
+ opts.banner = "llm-docs-builder - Build and optimize documentation for LLMs\n\nUsage: llm-docs-builder [command] [options]\n\nFor advanced configuration (base_url, title, description, convert_urls), use a config file."
77
+
78
+ opts.separator ''
79
+ opts.separator 'Commands:'
80
+ opts.separator ' generate Generate llms.txt from documentation (default)'
81
+ opts.separator ' transform Transform a markdown file to be AI-friendly'
82
+ opts.separator ' bulk-transform Transform all markdown files in directory'
83
+ opts.separator ' compare Compare content sizes to measure context savings'
84
+ opts.separator ' parse Parse existing llms.txt file'
85
+ opts.separator ' validate Validate llms.txt file'
86
+ opts.separator ' version Show version'
87
+
88
+ opts.separator ''
89
+ opts.separator 'Options:'
90
+
91
+ opts.on('-c', '--config PATH', 'Configuration file path (default: llm-docs-builder.yml)') do |path|
92
+ options[:config] = path
93
+ end
94
+
95
+ opts.on('-d', '--docs PATH', 'Path to documentation directory or file') do |path|
96
+ options[:docs] = path
97
+ end
98
+
99
+ opts.on('-o', '--output PATH', 'Output file path') do |path|
100
+ options[:output] = path
101
+ end
102
+
103
+ opts.on('-u', '--url URL', 'URL to fetch for comparison') do |url|
104
+ options[:url] = url
105
+ end
106
+
107
+ opts.on('-f', '--file PATH', 'Local markdown file for comparison') do |path|
108
+ options[:file] = path
109
+ end
110
+
111
+ opts.on('-v', '--verbose', 'Verbose output') do
112
+ options[:verbose] = true
113
+ end
114
+
115
+ opts.on('-h', '--help', 'Show this message') do
116
+ puts opts
117
+ exit
118
+ end
119
+
120
+ opts.on('--version', 'Show version') do
121
+ show_version
122
+ exit
123
+ end
124
+ end.parse!(argv)
125
+
126
+ options
127
+ end
128
+
129
+ # Generate llms.txt from documentation directory or file
130
+ #
131
+ # Loads configuration, merges with CLI options, generates llms.txt content,
132
+ # and optionally validates the output.
133
+ #
134
+ # @param options [Hash] command options from parse_options
135
+ # @option options [String] :config path to config file
136
+ # @option options [String] :docs path to documentation
137
+ # @option options [String] :output output file path
138
+ # @option options [Boolean] :verbose enable verbose output
139
+ # @raise [SystemExit] exits with status 1 if docs path not found
140
+ def generate(options)
141
+ # Load config and merge with CLI options
142
+ config = LlmDocsBuilder::Config.new(options[:config])
143
+ merged_options = config.merge_with_options(options)
144
+
145
+ docs_path = merged_options[:docs]
146
+
147
+ unless File.exist?(docs_path)
148
+ puts "Documentation path not found: #{docs_path}"
149
+ exit 1
150
+ end
151
+
152
+ puts "Generating llms.txt from #{docs_path}..." if merged_options[:verbose]
153
+
154
+ content = LlmDocsBuilder.generate_from_docs(docs_path, merged_options)
155
+ output_path = merged_options[:output]
156
+
157
+ File.write(output_path, content)
158
+ puts "Successfully generated #{output_path}"
159
+
160
+ return unless merged_options[:verbose]
161
+
162
+ validator = LlmDocsBuilder::Validator.new(content)
163
+ if validator.valid?
164
+ puts 'Valid llms.txt format'
165
+ else
166
+ puts 'Validation warnings:'
167
+ validator.errors.each { |error| puts " - #{error}" }
168
+ end
169
+ end
170
+
171
+ # Transform markdown file to be AI-friendly
172
+ #
173
+ # Expands relative links to absolute URLs and optionally converts HTML URLs to markdown format.
174
+ #
175
+ # @param options [Hash] command options from parse_options
176
+ # @option options [String] :config path to config file
177
+ # @option options [String] :docs path to markdown file (required)
178
+ # @option options [String] :output output file path
179
+ # @option options [String] :base_url base URL for link expansion
180
+ # @option options [Boolean] :convert_urls convert .html to .md
181
+ # @option options [Boolean] :verbose enable verbose output
182
+ # @raise [SystemExit] exits with status 1 if file not found or -d flag missing
183
+ def transform(options)
184
+ # Load config and merge with CLI options
185
+ config = LlmDocsBuilder::Config.new(options[:config])
186
+ merged_options = config.merge_with_options(options)
187
+
188
+ file_path = merged_options[:docs]
189
+
190
+ unless file_path
191
+ puts 'File path required for transform command (use -d/--docs)'
192
+ exit 1
193
+ end
194
+
195
+ unless File.exist?(file_path)
196
+ puts "File not found: #{file_path}"
197
+ exit 1
198
+ end
199
+
200
+ puts "Transforming #{file_path}..." if merged_options[:verbose]
201
+
202
+ content = LlmDocsBuilder.transform_markdown(file_path, merged_options)
203
+
204
+ if merged_options[:output] && merged_options[:output] != 'llms.txt'
205
+ File.write(merged_options[:output], content)
206
+ puts "Transformed content saved to #{merged_options[:output]}"
207
+ else
208
+ puts content
209
+ end
210
+ end
211
+
212
+ # Transform all markdown files in directory recursively
213
+ #
214
+ # Creates AI-friendly versions of all markdown files with configurable suffix and exclusions.
215
+ #
216
+ # @param options [Hash] command options from parse_options
217
+ # @option options [String] :config path to config file
218
+ # @option options [String] :docs path to documentation directory (required)
219
+ # @option options [String] :suffix suffix for transformed files (default: '.llm')
220
+ # @option options [Array<String>] :excludes glob patterns to exclude
221
+ # @option options [String] :base_url base URL for link expansion
222
+ # @option options [Boolean] :convert_urls convert .html to .md
223
+ # @option options [Boolean] :verbose enable verbose output
224
+ # @raise [SystemExit] exits with status 1 if directory not found or transformation fails
225
+ def bulk_transform(options)
226
+ # Load config and merge with CLI options
227
+ config = LlmDocsBuilder::Config.new(options[:config])
228
+ merged_options = config.merge_with_options(options)
229
+
230
+ docs_path = merged_options[:docs]
231
+
232
+ unless File.exist?(docs_path)
233
+ puts "Documentation path not found: #{docs_path}"
234
+ exit 1
235
+ end
236
+
237
+ unless File.directory?(docs_path)
238
+ puts "Path must be a directory for bulk transformation: #{docs_path}"
239
+ exit 1
240
+ end
241
+
242
+ puts "Bulk transforming markdown files in #{docs_path}..." if merged_options[:verbose]
243
+ puts "Using suffix: #{merged_options[:suffix]}" if merged_options[:verbose]
244
+ if merged_options[:verbose] && !merged_options[:excludes].empty?
245
+ puts "Excludes: #{merged_options[:excludes].join(', ')}"
246
+ end
247
+
248
+ begin
249
+ transformed_files = LlmDocsBuilder.bulk_transform(docs_path, merged_options)
250
+
251
+ if transformed_files.empty?
252
+ puts 'No markdown files found to transform'
253
+ else
254
+ puts "Successfully transformed #{transformed_files.size} files:"
255
+ # verbose mode already shows progress
256
+ unless merged_options[:verbose]
257
+ transformed_files.each do |file|
258
+ puts " #{file}"
259
+ end
260
+ end
261
+ end
262
+ rescue LlmDocsBuilder::Errors::BaseError => e
263
+ puts "Error during bulk transformation: #{e.message}"
264
+ exit 1
265
+ end
266
+ end
267
+
268
+ # Parse existing llms.txt file and display information
269
+ #
270
+ # Reads and parses llms.txt file, displaying title, description, and links.
271
+ # Defaults to 'llms.txt' in current directory if no file specified.
272
+ #
273
+ # @param options [Hash] command options from parse_options
274
+ # @option options [String] :config path to config file
275
+ # @option options [String] :docs path to llms.txt file (defaults to 'llms.txt')
276
+ # @option options [Boolean] :verbose enable verbose output with link counts
277
+ # @raise [SystemExit] exits with status 1 if file not found
278
+ def parse(options)
279
+ # Load config and merge with CLI options
280
+ config = LlmDocsBuilder::Config.new(options[:config])
281
+ merged_options = config.merge_with_options(options)
282
+
283
+ file_path = merged_options[:docs] || 'llms.txt'
284
+
285
+ unless File.exist?(file_path)
286
+ puts "File not found: #{file_path}"
287
+ exit 1
288
+ end
289
+
290
+ parsed = LlmDocsBuilder.parse(file_path)
291
+
292
+ if options[:verbose]
293
+ puts "Title: #{parsed.title}"
294
+ puts "Description: #{parsed.description}"
295
+ puts "Documentation Links: #{parsed.documentation_links.size}"
296
+ puts "Example Links: #{parsed.example_links.size}" if parsed.respond_to?(:example_links)
297
+ puts "Optional Links: #{parsed.optional_links.size}" if parsed.respond_to?(:optional_links)
298
+ elsif parsed.respond_to?(:to_xml)
299
+ puts parsed.to_xml
300
+ end
301
+ end
302
+
303
+ # Compare content sizes between human and AI versions
304
+ #
305
+ # Measures context window savings by comparing:
306
+ # - Remote URL with different User-Agents (human vs AI bot)
307
+ # - Remote URL with local markdown file
308
+ #
309
+ # @param options [Hash] command options from parse_options
310
+ # @option options [String] :url URL to fetch for comparison (required)
311
+ # @option options [String] :file local markdown file for comparison (optional)
312
+ # @option options [Boolean] :verbose enable verbose output
313
+ # @raise [SystemExit] exits with status 1 if URL not provided or fetch fails
314
+ def compare(options)
315
+ url = options[:url]
316
+
317
+ unless url
318
+ puts 'URL required for compare command (use -u/--url)'
319
+ puts ''
320
+ puts 'Examples:'
321
+ puts ' # Compare remote versions (different User-Agents)'
322
+ puts ' llm-docs-builder compare --url https://example.com/docs/page.html'
323
+ puts ''
324
+ puts ' # Compare remote with local file'
325
+ puts ' llm-docs-builder compare --url https://example.com/docs/page.html --file docs/page.md'
326
+ exit 1
327
+ end
328
+
329
+ comparator_options = {
330
+ local_file: options[:file],
331
+ verbose: options[:verbose]
332
+ }
333
+
334
+ comparator = LlmDocsBuilder::Comparator.new(url, comparator_options)
335
+
336
+ begin
337
+ result = comparator.compare
338
+ display_comparison_results(result)
339
+ rescue LlmDocsBuilder::Errors::BaseError => e
340
+ puts "Error during comparison: #{e.message}"
341
+ exit 1
342
+ end
343
+ end
344
+
345
+ # Display formatted comparison results
346
+ #
347
+ # @param result [Hash] comparison results from Comparator
348
+ def display_comparison_results(result)
349
+ puts ''
350
+ puts '=' * 60
351
+ puts 'Context Window Comparison'
352
+ puts '=' * 60
353
+ puts ''
354
+ puts "Human version: #{format_bytes(result[:human_size])}"
355
+ puts " Source: #{result[:human_source]}"
356
+ puts ''
357
+ puts "AI version: #{format_bytes(result[:ai_size])}"
358
+ puts " Source: #{result[:ai_source]}"
359
+ puts ''
360
+ puts '-' * 60
361
+
362
+ if result[:reduction_bytes].positive?
363
+ puts "Reduction: #{format_bytes(result[:reduction_bytes])} (#{result[:reduction_percent]}%)"
364
+ puts "Factor: #{result[:factor]}x smaller"
365
+ elsif result[:reduction_bytes].negative?
366
+ increase_bytes = result[:reduction_bytes].abs
367
+ increase_percent = result[:reduction_percent].abs
368
+ puts "Increase: #{format_bytes(increase_bytes)} (#{increase_percent}%)"
369
+ puts "Factor: #{result[:factor]}x larger"
370
+ else
371
+ puts 'Same size'
372
+ end
373
+
374
+ puts '=' * 60
375
+ puts ''
376
+ end
377
+
378
+ # Format bytes into human-readable string
379
+ #
380
+ # @param bytes [Integer] number of bytes
381
+ # @return [String] formatted string with units
382
+ def format_bytes(bytes)
383
+ if bytes < 1024
384
+ "#{bytes} bytes"
385
+ elsif bytes < 1024 * 1024
386
+ "#{(bytes / 1024.0).round(1)} KB"
387
+ else
388
+ "#{(bytes / (1024.0 * 1024)).round(2)} MB"
389
+ end
390
+ end
391
+
392
+ # Validate llms.txt file format
393
+ #
394
+ # Checks if llms.txt file follows proper format with title, description, and documentation links.
395
+ # Defaults to 'llms.txt' in current directory if no file specified.
396
+ #
397
+ # @param options [Hash] command options from parse_options
398
+ # @option options [String] :config path to config file
399
+ # @option options [String] :docs path to llms.txt file (defaults to 'llms.txt')
400
+ # @raise [SystemExit] exits with status 1 if file not found or invalid
401
+ def validate(options)
402
+ # Load config and merge with CLI options
403
+ config = LlmDocsBuilder::Config.new(options[:config])
404
+ merged_options = config.merge_with_options(options)
405
+
406
+ file_path = merged_options[:docs] || 'llms.txt'
407
+
408
+ unless File.exist?(file_path)
409
+ puts "File not found: #{file_path}"
410
+ exit 1
411
+ end
412
+
413
+ content = File.read(file_path)
414
+ valid = LlmDocsBuilder.validate(content)
415
+
416
+ if valid
417
+ puts 'Valid llms.txt file'
418
+ else
419
+ puts 'Invalid llms.txt file'
420
+ puts "\nErrors:"
421
+ LlmDocsBuilder::Validator.new(content).errors.each do |error|
422
+ puts " - #{error}"
423
+ end
424
+ exit 1
425
+ end
426
+ end
427
+
428
+ # Display version information
429
+ #
430
+ def show_version
431
+ puts "llm-docs-builder version #{LlmDocsBuilder::VERSION}"
432
+ end
433
+ end
434
+ end