llms-txt-ruby 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/llms-txt ADDED
@@ -0,0 +1,242 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'optparse'
5
+ require 'llms_txt'
6
+
7
+ module LlmsTxt
8
+ class CLI
9
+ def self.run(argv = ARGV)
10
+ new.run(argv)
11
+ end
12
+
13
+ def run(argv)
14
+ options = parse_options(argv)
15
+
16
+ case options[:command]
17
+ when 'generate', nil
18
+ generate(options)
19
+ when 'transform'
20
+ transform(options)
21
+ when 'bulk-transform'
22
+ bulk_transform(options)
23
+ when 'parse'
24
+ parse(options)
25
+ when 'validate'
26
+ validate(options)
27
+ when 'version'
28
+ show_version
29
+ else
30
+ puts "Unknown command: #{options[:command]}"
31
+ puts "Run 'llms-txt --help' for usage information"
32
+ exit 1
33
+ end
34
+ rescue LlmsTxt::Error => e
35
+ puts "Error: #{e.message}"
36
+ exit 1
37
+ rescue StandardError => e
38
+ puts "Unexpected error: #{e.message}"
39
+ puts e.backtrace.join("\n") if options&.fetch(:verbose, false)
40
+ exit 1
41
+ end
42
+
43
+ private
44
+
45
+ def parse_options(argv)
46
+ options = {
47
+ command: argv.first&.match?(/^[a-z-]+$/) ? argv.shift : nil
48
+ }
49
+
50
+ OptionParser.new do |opts|
51
+ opts.banner = "llms-txt - Simple tool for generating llms.txt from markdown documentation\n\nUsage: llms-txt [command] [options]\n\nFor advanced configuration (base_url, title, description, convert_urls), use a config file."
52
+
53
+ opts.separator ''
54
+ opts.separator 'Commands:'
55
+ opts.separator ' generate Generate llms.txt from documentation (default)'
56
+ opts.separator ' transform Transform a markdown file to be AI-friendly'
57
+ opts.separator ' bulk-transform Transform all markdown files in directory'
58
+ opts.separator ' parse Parse existing llms.txt file'
59
+ opts.separator ' validate Validate llms.txt file'
60
+ opts.separator ' version Show version'
61
+
62
+ opts.separator ''
63
+ opts.separator 'Options:'
64
+
65
+ opts.on('-c', '--config PATH', 'Configuration file path (default: llms-txt.yml)') do |path|
66
+ options[:config] = path
67
+ end
68
+
69
+ opts.on('-d', '--docs PATH', 'Path to documentation directory or file') do |path|
70
+ options[:docs] = path
71
+ end
72
+
73
+ opts.on('-o', '--output PATH', 'Output file path') do |path|
74
+ options[:output] = path
75
+ end
76
+
77
+ opts.on('-v', '--verbose', 'Verbose output') do
78
+ options[:verbose] = true
79
+ end
80
+
81
+ opts.on('-h', '--help', 'Show this message') do
82
+ puts opts
83
+ exit
84
+ end
85
+
86
+ opts.on('--version', 'Show version') do
87
+ show_version
88
+ exit
89
+ end
90
+ end.parse!(argv)
91
+
92
+ options[:file_path] = argv.first if argv.any?
93
+ options
94
+ end
95
+
96
+ def generate(options)
97
+ # Load config and merge with CLI options
98
+ config = LlmsTxt::Config.new(options[:config])
99
+ merged_options = config.merge_with_options(options)
100
+
101
+ docs_path = merged_options[:docs]
102
+
103
+ unless File.exist?(docs_path)
104
+ puts "Documentation path not found: #{docs_path}"
105
+ exit 1
106
+ end
107
+
108
+ puts "Generating llms.txt from #{docs_path}..." if merged_options[:verbose]
109
+
110
+ content = LlmsTxt.generate_from_docs(docs_path, merged_options)
111
+ output_path = merged_options[:output]
112
+
113
+ File.write(output_path, content)
114
+ puts "Successfully generated #{output_path}"
115
+
116
+ if merged_options[:verbose]
117
+ validator = LlmsTxt::Validator.new(content)
118
+ if validator.valid?
119
+ puts "Valid llms.txt format"
120
+ else
121
+ puts "Validation warnings:"
122
+ validator.errors.each { |error| puts " - #{error}" }
123
+ end
124
+ end
125
+ end
126
+
127
+ def transform(options)
128
+ # Load config and merge with CLI options
129
+ config = LlmsTxt::Config.new(options[:config])
130
+ merged_options = config.merge_with_options(options)
131
+
132
+ file_path = options[:file_path]
133
+
134
+ unless file_path
135
+ puts "File path required for transform command"
136
+ exit 1
137
+ end
138
+
139
+ unless File.exist?(file_path)
140
+ puts "File not found: #{file_path}"
141
+ exit 1
142
+ end
143
+
144
+ puts "Transforming #{file_path}..." if merged_options[:verbose]
145
+
146
+ content = LlmsTxt.transform_markdown(file_path, merged_options)
147
+
148
+ if merged_options[:output] && merged_options[:output] != 'llms.txt'
149
+ File.write(merged_options[:output], content)
150
+ puts "Transformed content saved to #{merged_options[:output]}"
151
+ else
152
+ puts content
153
+ end
154
+ end
155
+
156
+ def bulk_transform(options)
157
+ # Load config and merge with CLI options
158
+ config = LlmsTxt::Config.new(options[:config])
159
+ merged_options = config.merge_with_options(options)
160
+
161
+ docs_path = merged_options[:docs]
162
+
163
+ unless File.exist?(docs_path)
164
+ puts "Documentation path not found: #{docs_path}"
165
+ exit 1
166
+ end
167
+
168
+ unless File.directory?(docs_path)
169
+ puts "Path must be a directory for bulk transformation: #{docs_path}"
170
+ exit 1
171
+ end
172
+
173
+ puts "Bulk transforming markdown files in #{docs_path}..." if merged_options[:verbose]
174
+ puts "Using suffix: #{merged_options[:suffix]}" if merged_options[:verbose]
175
+ puts "Excludes: #{merged_options[:excludes].join(', ')}" if merged_options[:verbose] && !merged_options[:excludes].empty?
176
+
177
+ begin
178
+ transformed_files = LlmsTxt.bulk_transform(docs_path, merged_options)
179
+
180
+ if transformed_files.empty?
181
+ puts "No markdown files found to transform"
182
+ else
183
+ puts "Successfully transformed #{transformed_files.size} files:"
184
+ transformed_files.each { |file| puts " #{file}" } unless merged_options[:verbose] # verbose mode already shows progress
185
+ end
186
+ rescue LlmsTxt::Error => e
187
+ puts "Error during bulk transformation: #{e.message}"
188
+ exit 1
189
+ end
190
+ end
191
+
192
+ def parse(options)
193
+ file_path = options[:file_path] || 'llms.txt'
194
+
195
+ unless File.exist?(file_path)
196
+ puts "File not found: #{file_path}"
197
+ exit 1
198
+ end
199
+
200
+ parsed = LlmsTxt.parse(file_path)
201
+
202
+ if options[:verbose]
203
+ puts "Title: #{parsed.title}"
204
+ puts "Description: #{parsed.description}"
205
+ puts "Documentation Links: #{parsed.documentation_links.size}"
206
+ puts "Example Links: #{parsed.example_links.size}" if parsed.respond_to?(:example_links)
207
+ puts "Optional Links: #{parsed.optional_links.size}" if parsed.respond_to?(:optional_links)
208
+ else
209
+ puts parsed.to_xml if parsed.respond_to?(:to_xml)
210
+ end
211
+ end
212
+
213
+ def validate(options)
214
+ file_path = options[:file_path] || 'llms.txt'
215
+
216
+ unless File.exist?(file_path)
217
+ puts "File not found: #{file_path}"
218
+ exit 1
219
+ end
220
+
221
+ content = File.read(file_path)
222
+ valid = LlmsTxt.validate(content)
223
+
224
+ if valid
225
+ puts 'Valid llms.txt file'
226
+ else
227
+ puts 'Invalid llms.txt file'
228
+ puts "\nErrors:"
229
+ LlmsTxt::Validator.new(content).errors.each do |error|
230
+ puts " - #{error}"
231
+ end
232
+ exit 1
233
+ end
234
+ end
235
+
236
+ def show_version
237
+ puts "llms-txt version #{LlmsTxt::VERSION}"
238
+ end
239
+ end
240
+ end
241
+
242
+ LlmsTxt::CLI.run # if $PROGRAM_NAME == __FILE__
@@ -0,0 +1,137 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fileutils'
4
+
5
+ module LlmsTxt
6
+ # Bulk transforms multiple markdown files to be AI-friendly
7
+ #
8
+ # Processes all markdown files in a directory recursively, creating LLM-friendly versions
9
+ # alongside the originals. Supports exclusion patterns and maintains directory structure.
10
+ #
11
+ # @example Transform all files in a directory
12
+ # transformer = LlmsTxt::BulkTransformer.new('./docs',
13
+ # base_url: 'https://myproject.io',
14
+ # suffix: '.llm'
15
+ # )
16
+ # transformer.transform_all
17
+ #
18
+ # @api public
19
+ class BulkTransformer
20
+ # @return [String] path to documentation directory
21
+ attr_reader :docs_path
22
+
23
+ # @return [Hash] transformation options
24
+ attr_reader :options
25
+
26
+ # Initialize a new bulk transformer
27
+ #
28
+ # @param docs_path [String] path to documentation directory
29
+ # @param options [Hash] transformation options
30
+ # @option options [String] :base_url base URL for expanding relative links
31
+ # @option options [Boolean] :convert_urls convert HTML URLs to markdown format
32
+ # @option options [String] :suffix suffix for transformed files (default: '.llm')
33
+ # @option options [Array<String>] :excludes glob patterns for files to exclude
34
+ # @option options [Boolean] :verbose enable verbose output
35
+ def initialize(docs_path, options = {})
36
+ @docs_path = docs_path
37
+ @options = {
38
+ suffix: '.llm',
39
+ excludes: []
40
+ }.merge(options)
41
+ end
42
+
43
+ # Transform all markdown files in the directory
44
+ #
45
+ # Recursively finds all markdown files, applies transformations,
46
+ # and saves LLM-friendly versions with the specified suffix.
47
+ #
48
+ # @return [Array<String>] paths of transformed files
49
+ def transform_all
50
+ unless File.directory?(docs_path)
51
+ raise Errors::GenerationError, "Directory not found: #{docs_path}"
52
+ end
53
+
54
+ markdown_files = find_markdown_files
55
+ transformed_files = []
56
+
57
+ markdown_files.each do |file_path|
58
+ next if should_exclude?(file_path)
59
+
60
+ puts "Transforming #{file_path}..." if options[:verbose]
61
+
62
+ transformed_content = transform_file(file_path)
63
+ output_path = generate_output_path(file_path)
64
+
65
+ # Ensure output directory exists
66
+ FileUtils.mkdir_p(File.dirname(output_path))
67
+
68
+ File.write(output_path, transformed_content)
69
+ transformed_files << output_path
70
+
71
+ puts " → #{output_path}" if options[:verbose]
72
+ end
73
+
74
+ transformed_files
75
+ end
76
+
77
+ private
78
+
79
+ # Recursively scans the docs directory for markdown files
80
+ #
81
+ # Skips hidden files (starting with dot) and returns sorted array of paths
82
+ #
83
+ # @return [Array<String>] paths to markdown files
84
+ def find_markdown_files
85
+ files = []
86
+
87
+ Find.find(docs_path) do |path|
88
+ next unless File.file?(path)
89
+ next unless path.match?(/\.md$/i)
90
+ next if File.basename(path).start_with?('.')
91
+
92
+ files << path
93
+ end
94
+
95
+ files.sort
96
+ end
97
+
98
+ # Tests if file matches any exclusion pattern from options
99
+ #
100
+ # Uses File.fnmatch with pathname and dotmatch flags
101
+ #
102
+ # @param file_path [String] path to check
103
+ # @return [Boolean] true if file should be excluded
104
+ def should_exclude?(file_path)
105
+ excludes = Array(options[:excludes])
106
+
107
+ excludes.any? do |pattern|
108
+ File.fnmatch(pattern, file_path, File::FNM_PATHNAME | File::FNM_DOTMATCH)
109
+ end
110
+ end
111
+
112
+ # Applies markdown transformations to a single file
113
+ #
114
+ # Creates MarkdownTransformer instance and delegates transformation
115
+ #
116
+ # @param file_path [String] path to markdown file
117
+ # @return [String] transformed content
118
+ def transform_file(file_path)
119
+ transformer = MarkdownTransformer.new(file_path, options)
120
+ transformer.transform
121
+ end
122
+
123
+ # Constructs output path by adding suffix before .md extension
124
+ #
125
+ # For example: README.md with suffix .llm becomes README.llm.md
126
+ #
127
+ # @param input_path [String] original file path
128
+ # @return [String] path for transformed file
129
+ def generate_output_path(input_path)
130
+ dir = File.dirname(input_path)
131
+ basename = File.basename(input_path, '.md')
132
+ suffix = options[:suffix]
133
+
134
+ File.join(dir, "#{basename}#{suffix}.md")
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yaml'
4
+
5
+ module LlmsTxt
6
+ # Simple configuration loader for llms-txt.yml files
7
+ #
8
+ # Loads YAML configuration files and provides a simple interface for accessing configuration
9
+ # values. Automatically looks for config files in the current directory if none specified.
10
+ #
11
+ # @example Load default config file
12
+ # config = LlmsTxt::Config.new
13
+ #
14
+ # @example Load specific config file
15
+ # config = LlmsTxt::Config.new('my-config.yml')
16
+ #
17
+ # @example Access config values
18
+ # config['base_url'] # => "https://myproject.io"
19
+ # config.dig('output') # => "llms.txt"
20
+ #
21
+ # @api public
22
+ class Config
23
+ # @return [Hash] the loaded configuration data
24
+ attr_reader :data
25
+
26
+ # Initialize a new configuration loader
27
+ #
28
+ # @param config_file [String, nil] path to YAML config file, or nil to auto-find
29
+ def initialize(config_file = nil)
30
+ @config_file = config_file || find_config_file
31
+ @data = load_config
32
+ end
33
+
34
+ # Access configuration value by key
35
+ #
36
+ # @param key [String, Symbol] configuration key
37
+ # @return [Object, nil] configuration value or nil if not found
38
+ def [](key)
39
+ data[key.to_s]
40
+ end
41
+
42
+ # Access nested configuration values
43
+ #
44
+ # @param keys [Array<String, Symbol>] nested keys to access
45
+ # @return [Object, nil] configuration value or nil if not found
46
+ def dig(*keys)
47
+ data.dig(*keys.map(&:to_s))
48
+ end
49
+
50
+ # Merge config file values with CLI options
51
+ #
52
+ # CLI options take precedence over config file values. Config file provides
53
+ # defaults for any options not specified via CLI.
54
+ #
55
+ # @param options [Hash] CLI options hash
56
+ # @return [Hash] merged configuration with CLI overrides applied
57
+ def merge_with_options(options)
58
+ # CLI options override config file, config file provides defaults
59
+ {
60
+ docs: options[:docs] || self['docs'] || '.',
61
+ base_url: options[:base_url] || self['base_url'],
62
+ title: options[:title] || self['title'],
63
+ description: options[:description] || self['description'],
64
+ output: options[:output] || self['output'] || 'llms.txt',
65
+ convert_urls: options.key?(:convert_urls) ?
66
+ options[:convert_urls] : (self['convert_urls'] || false),
67
+ verbose: options.key?(:verbose) ? options[:verbose] : (self['verbose'] || false),
68
+ # Bulk transformation options
69
+ suffix: options[:suffix] || self['suffix'] || '.llm',
70
+ excludes: options[:excludes] || self['excludes'] || [],
71
+ bulk: options.key?(:bulk) ? options[:bulk] : (self['bulk'] || false)
72
+ }
73
+ end
74
+
75
+ # Check if a config file was found and exists
76
+ #
77
+ # @return [Boolean] true if config file exists, false otherwise
78
+ def exists?
79
+ @config_file && File.exist?(@config_file)
80
+ end
81
+
82
+ private
83
+
84
+ # Find config file in current directory
85
+ #
86
+ # Looks for config files in order of preference:
87
+ # 1. llms-txt.yml
88
+ # 2. llms-txt.yaml
89
+ # 3. .llms-txt.yml
90
+ #
91
+ # @return [String, nil] path to config file or nil if none found
92
+ def find_config_file
93
+ candidates = ['llms-txt.yml', 'llms-txt.yaml', '.llms-txt.yml']
94
+ candidates.find { |file| File.exist?(file) }
95
+ end
96
+
97
+ # Load and parse YAML config file
98
+ #
99
+ # @return [Hash] parsed config data, empty hash if no file
100
+ # @raise [Errors::GenerationError] if YAML is invalid or file cannot be read
101
+ def load_config
102
+ return {} unless @config_file && File.exist?(@config_file)
103
+
104
+ begin
105
+ YAML.load_file(@config_file) || {}
106
+ rescue Psych::SyntaxError => e
107
+ raise Errors::GenerationError, "Invalid YAML in config file #{@config_file}: #{e.message}"
108
+ rescue StandardError => e
109
+ raise Errors::GenerationError, "Failed to load config file #{@config_file}: #{e.message}"
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmsTxt
4
+ # Namespace used to encapsulate all the internal errors of LlmsTxt
5
+ module Errors
6
+ # Base class for all the LlmsTxt internal errors
7
+ BaseError = Class.new(StandardError)
8
+
9
+ # Raised when llms.txt generation fails due to configuration issues,
10
+ # missing directories, invalid YAML, or file access problems
11
+ #
12
+ # @example When directory doesn't exist
13
+ # LlmsTxt.bulk_transform('/nonexistent/path')
14
+ # # => raises GenerationError: "Directory not found: /nonexistent/path"
15
+ #
16
+ # @example When config YAML is invalid
17
+ # LlmsTxt.generate_from_docs(config_file: 'invalid.yml')
18
+ # # => raises GenerationError: "Invalid YAML in config file..."
19
+ GenerationError = Class.new(BaseError)
20
+
21
+ # Raised when llms.txt content validation fails
22
+ #
23
+ # This error is intended for validation failures but currently not used.
24
+ # The Validator class returns boolean results instead of raising errors.
25
+ #
26
+ # @example Future usage (when validation raises)
27
+ # LlmsTxt.validate!(invalid_content)
28
+ # # => raises ValidationError: "Missing required H1 title"
29
+ ValidationError = Class.new(BaseError)
30
+ end
31
+ end