llms-txt-ruby 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +20 -0
- data/Gemfile.lock +1 -1
- data/README.md +163 -0
- data/bin/llms-txt +2 -237
- data/lib/llms_txt/cli.rb +238 -0
- data/lib/llms_txt/version.rb +1 -1
- data/lib/llms_txt.rb +1 -0
- data/llms-txt-ruby.gemspec +9 -7
- metadata +12 -9
- data/mise.toml +0 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 678093f3878f75c9f39c21f009b279e95e20c62a7dcbbc72de5499305a73bf6f
|
4
|
+
data.tar.gz: a2406c0e8fada1bf1e0134acae4adfe3859d1af83d4d5286cb4207d2dde92f64
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dc062480b41973112b8e0e860b1605cd4b51a3f9d8b41af490fee57ae3b286722dcfac1fe07bfe80baca624647dde37e7fffbf7f2a34e9d2ca2b32b25550f2e1
|
7
|
+
data.tar.gz: b7600d18d1ecded87cd64c6e5022a20915abe02d716fd53d033abfd35136b7514b4b7f7c4e1b9e3f9018cb6c2358ac0692c4cdd5b8d115788e8287d836af5268
|
data/.gitignore
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.4.
|
1
|
+
3.4.6
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
## 0.1.2 (2025-10-07)
|
4
|
+
- [Fix] Fixed CLI error handling to use correct `LlmsTxt::Errors::BaseError` instead of non-existent `LlmsTxt::Error`.
|
5
|
+
- [Enhancement] Extracted CLI class to `lib/llms_txt/cli.rb` for better testability.
|
6
|
+
- [Enhancement] Added comprehensive CLI error handling specs.
|
7
|
+
|
8
|
+
## 0.1.1 (2025-10-07)
|
9
|
+
- [Change] Updated repository metadata to use `master` branch instead of `main`.
|
10
|
+
|
11
|
+
## 0.1.0 (2025-10-07)
|
12
|
+
- [Feature] Generate `llms.txt` files from markdown documentation.
|
13
|
+
- [Feature] Transform individual markdown files to be AI-friendly.
|
14
|
+
- [Feature] Bulk transformation of entire documentation directories.
|
15
|
+
- [Feature] CLI with commands: `generate`, `transform`, `bulk-transform`, `parse`, `validate`.
|
16
|
+
- [Feature] Configuration file support (`llms-txt.yml`).
|
17
|
+
- [Feature] Automatic link expansion from relative to absolute URLs.
|
18
|
+
- [Feature] File prioritization (README first, then guides, APIs, etc.).
|
19
|
+
- [Feature] Exclusion patterns for bulk transformations.
|
20
|
+
- [Feature] Ruby API for programmatic usage.
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -213,6 +213,169 @@ wiki/
|
|
213
213
|
└── internal.md ← Excluded, no .llm.md version
|
214
214
|
```
|
215
215
|
|
216
|
+
## Serving LLM-Friendly Documentation
|
217
|
+
|
218
|
+
After using `bulk-transform` to create `.llm.md` versions of your documentation, you can configure your web server to automatically serve these LLM-optimized versions to AI bots while showing the original versions to human visitors.
|
219
|
+
|
220
|
+
### How It Works
|
221
|
+
|
222
|
+
The strategy is simple:
|
223
|
+
|
224
|
+
1. **Detect AI bots** by their User-Agent strings
|
225
|
+
2. **Serve `.llm.md` files** to detected AI bots
|
226
|
+
3. **Serve original `.md` files** to human visitors
|
227
|
+
4. **Automatic selection** - no manual switching needed
|
228
|
+
|
229
|
+
### Apache Configuration
|
230
|
+
|
231
|
+
Add this to your `.htaccess` file:
|
232
|
+
|
233
|
+
```apache
|
234
|
+
# Detect LLM bots by User-Agent
|
235
|
+
SetEnvIf User-Agent "(?i)(openai|anthropic|claude|gpt|chatgpt|bard|gemini|copilot)" IS_LLM_BOT
|
236
|
+
SetEnvIf User-Agent "(?i)(perplexity|character\.ai|you\.com|poe\.com|huggingface|replicate)" IS_LLM_BOT
|
237
|
+
SetEnvIf User-Agent "(?i)(langchain|llamaindex|semantic|embedding|vector|rag)" IS_LLM_BOT
|
238
|
+
SetEnvIf User-Agent "(?i)(ollama|mistral|cohere|together|fireworks|groq)" IS_LLM_BOT
|
239
|
+
|
240
|
+
# Serve .md files as text/plain
|
241
|
+
<FilesMatch "\.md$">
|
242
|
+
Header set Content-Type "text/plain; charset=utf-8"
|
243
|
+
ForceType text/plain
|
244
|
+
</FilesMatch>
|
245
|
+
|
246
|
+
# Enable rewrite engine
|
247
|
+
RewriteEngine On
|
248
|
+
|
249
|
+
# For LLM bots: rewrite requests to serve .llm.md versions
|
250
|
+
RewriteCond %{ENV:IS_LLM_BOT} !^$
|
251
|
+
RewriteCond %{REQUEST_URI} ^/docs/.*\.md$ [NC]
|
252
|
+
RewriteCond %{REQUEST_URI} !\.llm\.md$ [NC]
|
253
|
+
RewriteCond %{DOCUMENT_ROOT}%{REQUEST_URI} -f
|
254
|
+
RewriteRule ^(.*)\.md$ $1.llm.md [L]
|
255
|
+
|
256
|
+
# For LLM bots: handle clean URLs by appending .llm.md
|
257
|
+
RewriteCond %{ENV:IS_LLM_BOT} !^$
|
258
|
+
RewriteCond %{REQUEST_URI} ^/docs/ [NC]
|
259
|
+
RewriteCond %{REQUEST_URI} !\.md$ [NC]
|
260
|
+
RewriteCond %{DOCUMENT_ROOT}%{REQUEST_URI}.llm.md -f
|
261
|
+
RewriteRule ^(.*)$ $1.llm.md [L]
|
262
|
+
|
263
|
+
# For regular users: serve original .md files or clean URLs as usual
|
264
|
+
# (add your normal URL handling rules here)
|
265
|
+
```
|
266
|
+
|
267
|
+
### Nginx Configuration
|
268
|
+
|
269
|
+
Add this to your nginx server block:
|
270
|
+
|
271
|
+
```nginx
|
272
|
+
# Map to detect LLM bots
|
273
|
+
map $http_user_agent $is_llm_bot {
|
274
|
+
default 0;
|
275
|
+
"~*(?i)(openai|anthropic|claude|gpt|chatgpt|bard|gemini|copilot)" 1;
|
276
|
+
"~*(?i)(perplexity|character\.ai|you\.com|poe\.com|huggingface|replicate)" 1;
|
277
|
+
"~*(?i)(langchain|llamaindex|semantic|embedding|vector|rag)" 1;
|
278
|
+
"~*(?i)(ollama|mistral|cohere|together|fireworks|groq)" 1;
|
279
|
+
}
|
280
|
+
|
281
|
+
server {
|
282
|
+
# ... your server configuration ...
|
283
|
+
|
284
|
+
# Serve .md files as text/plain
|
285
|
+
location ~ \.md$ {
|
286
|
+
default_type text/plain;
|
287
|
+
charset utf-8;
|
288
|
+
}
|
289
|
+
|
290
|
+
# For LLM bots requesting .md files, serve .llm.md version
|
291
|
+
location ~ ^/docs/(.*)\.md$ {
|
292
|
+
if ($is_llm_bot) {
|
293
|
+
rewrite ^(.*)\.md$ $1.llm.md last;
|
294
|
+
}
|
295
|
+
try_files $uri $uri/ =404;
|
296
|
+
}
|
297
|
+
|
298
|
+
# For LLM bots requesting clean URLs, serve .llm.md version
|
299
|
+
location ~ ^/docs/ {
|
300
|
+
if ($is_llm_bot) {
|
301
|
+
try_files $uri.llm.md $uri $uri/ =404;
|
302
|
+
}
|
303
|
+
try_files $uri $uri.md $uri/ =404;
|
304
|
+
}
|
305
|
+
}
|
306
|
+
```
|
307
|
+
|
308
|
+
### Cloudflare Workers
|
309
|
+
|
310
|
+
For serverless deployments, use Cloudflare Workers:
|
311
|
+
|
312
|
+
```javascript
|
313
|
+
export default {
|
314
|
+
async fetch(request) {
|
315
|
+
const url = new URL(request.url);
|
316
|
+
const userAgent = request.headers.get('user-agent') || '';
|
317
|
+
|
318
|
+
// Detect LLM bots
|
319
|
+
const llmBotPatterns = [
|
320
|
+
/openai|anthropic|claude|gpt|chatgpt|bard|gemini|copilot/i,
|
321
|
+
/perplexity|character\.ai|you\.com|poe\.com|huggingface|replicate/i,
|
322
|
+
/langchain|llamaindex|semantic|embedding|vector|rag/i,
|
323
|
+
/ollama|mistral|cohere|together|fireworks|groq/i
|
324
|
+
];
|
325
|
+
|
326
|
+
const isLLMBot = llmBotPatterns.some(pattern => pattern.test(userAgent));
|
327
|
+
|
328
|
+
// If LLM bot and requesting docs
|
329
|
+
if (isLLMBot && url.pathname.startsWith('/docs/')) {
|
330
|
+
// Try to serve .llm.md version
|
331
|
+
const llmPath = url.pathname.replace(/\.md$/, '.llm.md');
|
332
|
+
if (!url.pathname.endsWith('.llm.md')) {
|
333
|
+
url.pathname = llmPath;
|
334
|
+
}
|
335
|
+
}
|
336
|
+
|
337
|
+
return fetch(url);
|
338
|
+
}
|
339
|
+
}
|
340
|
+
```
|
341
|
+
|
342
|
+
### Custom Suffix
|
343
|
+
|
344
|
+
If you used a different suffix with the `bulk-transform` command (e.g., `--suffix .ai`), update your web server rules accordingly.
|
345
|
+
|
346
|
+
**Apache:**
|
347
|
+
```apache
|
348
|
+
RewriteRule ^(.*)\.md$ $1.ai.md [L]
|
349
|
+
```
|
350
|
+
|
351
|
+
**Nginx:**
|
352
|
+
```nginx
|
353
|
+
rewrite ^(.*)\.md$ $1.ai.md last;
|
354
|
+
```
|
355
|
+
|
356
|
+
**Cloudflare Workers:**
|
357
|
+
```javascript
|
358
|
+
const llmPath = url.pathname.replace(/\.md$/, '.ai.md');
|
359
|
+
```
|
360
|
+
|
361
|
+
### Example Setup
|
362
|
+
|
363
|
+
```yaml
|
364
|
+
# llms-txt.yml
|
365
|
+
docs: ./docs
|
366
|
+
base_url: https://myproject.io
|
367
|
+
suffix: .llm
|
368
|
+
convert_urls: true
|
369
|
+
```
|
370
|
+
|
371
|
+
```bash
|
372
|
+
# Generate LLM-friendly versions
|
373
|
+
llms-txt bulk-transform --config llms-txt.yml
|
374
|
+
|
375
|
+
# Deploy both original and .llm.md files to your web server
|
376
|
+
# The server will automatically serve the right version to each visitor
|
377
|
+
```
|
378
|
+
|
216
379
|
## Ruby API
|
217
380
|
|
218
381
|
### Basic Usage
|
data/bin/llms-txt
CHANGED
@@ -1,242 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
|
-
require 'optparse'
|
5
4
|
require 'llms_txt'
|
5
|
+
require 'llms_txt/cli'
|
6
6
|
|
7
|
-
|
8
|
-
class CLI
|
9
|
-
def self.run(argv = ARGV)
|
10
|
-
new.run(argv)
|
11
|
-
end
|
12
|
-
|
13
|
-
def run(argv)
|
14
|
-
options = parse_options(argv)
|
15
|
-
|
16
|
-
case options[:command]
|
17
|
-
when 'generate', nil
|
18
|
-
generate(options)
|
19
|
-
when 'transform'
|
20
|
-
transform(options)
|
21
|
-
when 'bulk-transform'
|
22
|
-
bulk_transform(options)
|
23
|
-
when 'parse'
|
24
|
-
parse(options)
|
25
|
-
when 'validate'
|
26
|
-
validate(options)
|
27
|
-
when 'version'
|
28
|
-
show_version
|
29
|
-
else
|
30
|
-
puts "Unknown command: #{options[:command]}"
|
31
|
-
puts "Run 'llms-txt --help' for usage information"
|
32
|
-
exit 1
|
33
|
-
end
|
34
|
-
rescue LlmsTxt::Error => e
|
35
|
-
puts "Error: #{e.message}"
|
36
|
-
exit 1
|
37
|
-
rescue StandardError => e
|
38
|
-
puts "Unexpected error: #{e.message}"
|
39
|
-
puts e.backtrace.join("\n") if options&.fetch(:verbose, false)
|
40
|
-
exit 1
|
41
|
-
end
|
42
|
-
|
43
|
-
private
|
44
|
-
|
45
|
-
def parse_options(argv)
|
46
|
-
options = {
|
47
|
-
command: argv.first&.match?(/^[a-z-]+$/) ? argv.shift : nil
|
48
|
-
}
|
49
|
-
|
50
|
-
OptionParser.new do |opts|
|
51
|
-
opts.banner = "llms-txt - Simple tool for generating llms.txt from markdown documentation\n\nUsage: llms-txt [command] [options]\n\nFor advanced configuration (base_url, title, description, convert_urls), use a config file."
|
52
|
-
|
53
|
-
opts.separator ''
|
54
|
-
opts.separator 'Commands:'
|
55
|
-
opts.separator ' generate Generate llms.txt from documentation (default)'
|
56
|
-
opts.separator ' transform Transform a markdown file to be AI-friendly'
|
57
|
-
opts.separator ' bulk-transform Transform all markdown files in directory'
|
58
|
-
opts.separator ' parse Parse existing llms.txt file'
|
59
|
-
opts.separator ' validate Validate llms.txt file'
|
60
|
-
opts.separator ' version Show version'
|
61
|
-
|
62
|
-
opts.separator ''
|
63
|
-
opts.separator 'Options:'
|
64
|
-
|
65
|
-
opts.on('-c', '--config PATH', 'Configuration file path (default: llms-txt.yml)') do |path|
|
66
|
-
options[:config] = path
|
67
|
-
end
|
68
|
-
|
69
|
-
opts.on('-d', '--docs PATH', 'Path to documentation directory or file') do |path|
|
70
|
-
options[:docs] = path
|
71
|
-
end
|
72
|
-
|
73
|
-
opts.on('-o', '--output PATH', 'Output file path') do |path|
|
74
|
-
options[:output] = path
|
75
|
-
end
|
76
|
-
|
77
|
-
opts.on('-v', '--verbose', 'Verbose output') do
|
78
|
-
options[:verbose] = true
|
79
|
-
end
|
80
|
-
|
81
|
-
opts.on('-h', '--help', 'Show this message') do
|
82
|
-
puts opts
|
83
|
-
exit
|
84
|
-
end
|
85
|
-
|
86
|
-
opts.on('--version', 'Show version') do
|
87
|
-
show_version
|
88
|
-
exit
|
89
|
-
end
|
90
|
-
end.parse!(argv)
|
91
|
-
|
92
|
-
options[:file_path] = argv.first if argv.any?
|
93
|
-
options
|
94
|
-
end
|
95
|
-
|
96
|
-
def generate(options)
|
97
|
-
# Load config and merge with CLI options
|
98
|
-
config = LlmsTxt::Config.new(options[:config])
|
99
|
-
merged_options = config.merge_with_options(options)
|
100
|
-
|
101
|
-
docs_path = merged_options[:docs]
|
102
|
-
|
103
|
-
unless File.exist?(docs_path)
|
104
|
-
puts "Documentation path not found: #{docs_path}"
|
105
|
-
exit 1
|
106
|
-
end
|
107
|
-
|
108
|
-
puts "Generating llms.txt from #{docs_path}..." if merged_options[:verbose]
|
109
|
-
|
110
|
-
content = LlmsTxt.generate_from_docs(docs_path, merged_options)
|
111
|
-
output_path = merged_options[:output]
|
112
|
-
|
113
|
-
File.write(output_path, content)
|
114
|
-
puts "Successfully generated #{output_path}"
|
115
|
-
|
116
|
-
if merged_options[:verbose]
|
117
|
-
validator = LlmsTxt::Validator.new(content)
|
118
|
-
if validator.valid?
|
119
|
-
puts "Valid llms.txt format"
|
120
|
-
else
|
121
|
-
puts "Validation warnings:"
|
122
|
-
validator.errors.each { |error| puts " - #{error}" }
|
123
|
-
end
|
124
|
-
end
|
125
|
-
end
|
126
|
-
|
127
|
-
def transform(options)
|
128
|
-
# Load config and merge with CLI options
|
129
|
-
config = LlmsTxt::Config.new(options[:config])
|
130
|
-
merged_options = config.merge_with_options(options)
|
131
|
-
|
132
|
-
file_path = options[:file_path]
|
133
|
-
|
134
|
-
unless file_path
|
135
|
-
puts "File path required for transform command"
|
136
|
-
exit 1
|
137
|
-
end
|
138
|
-
|
139
|
-
unless File.exist?(file_path)
|
140
|
-
puts "File not found: #{file_path}"
|
141
|
-
exit 1
|
142
|
-
end
|
143
|
-
|
144
|
-
puts "Transforming #{file_path}..." if merged_options[:verbose]
|
145
|
-
|
146
|
-
content = LlmsTxt.transform_markdown(file_path, merged_options)
|
147
|
-
|
148
|
-
if merged_options[:output] && merged_options[:output] != 'llms.txt'
|
149
|
-
File.write(merged_options[:output], content)
|
150
|
-
puts "Transformed content saved to #{merged_options[:output]}"
|
151
|
-
else
|
152
|
-
puts content
|
153
|
-
end
|
154
|
-
end
|
155
|
-
|
156
|
-
def bulk_transform(options)
|
157
|
-
# Load config and merge with CLI options
|
158
|
-
config = LlmsTxt::Config.new(options[:config])
|
159
|
-
merged_options = config.merge_with_options(options)
|
160
|
-
|
161
|
-
docs_path = merged_options[:docs]
|
162
|
-
|
163
|
-
unless File.exist?(docs_path)
|
164
|
-
puts "Documentation path not found: #{docs_path}"
|
165
|
-
exit 1
|
166
|
-
end
|
167
|
-
|
168
|
-
unless File.directory?(docs_path)
|
169
|
-
puts "Path must be a directory for bulk transformation: #{docs_path}"
|
170
|
-
exit 1
|
171
|
-
end
|
172
|
-
|
173
|
-
puts "Bulk transforming markdown files in #{docs_path}..." if merged_options[:verbose]
|
174
|
-
puts "Using suffix: #{merged_options[:suffix]}" if merged_options[:verbose]
|
175
|
-
puts "Excludes: #{merged_options[:excludes].join(', ')}" if merged_options[:verbose] && !merged_options[:excludes].empty?
|
176
|
-
|
177
|
-
begin
|
178
|
-
transformed_files = LlmsTxt.bulk_transform(docs_path, merged_options)
|
179
|
-
|
180
|
-
if transformed_files.empty?
|
181
|
-
puts "No markdown files found to transform"
|
182
|
-
else
|
183
|
-
puts "Successfully transformed #{transformed_files.size} files:"
|
184
|
-
transformed_files.each { |file| puts " #{file}" } unless merged_options[:verbose] # verbose mode already shows progress
|
185
|
-
end
|
186
|
-
rescue LlmsTxt::Error => e
|
187
|
-
puts "Error during bulk transformation: #{e.message}"
|
188
|
-
exit 1
|
189
|
-
end
|
190
|
-
end
|
191
|
-
|
192
|
-
def parse(options)
|
193
|
-
file_path = options[:file_path] || 'llms.txt'
|
194
|
-
|
195
|
-
unless File.exist?(file_path)
|
196
|
-
puts "File not found: #{file_path}"
|
197
|
-
exit 1
|
198
|
-
end
|
199
|
-
|
200
|
-
parsed = LlmsTxt.parse(file_path)
|
201
|
-
|
202
|
-
if options[:verbose]
|
203
|
-
puts "Title: #{parsed.title}"
|
204
|
-
puts "Description: #{parsed.description}"
|
205
|
-
puts "Documentation Links: #{parsed.documentation_links.size}"
|
206
|
-
puts "Example Links: #{parsed.example_links.size}" if parsed.respond_to?(:example_links)
|
207
|
-
puts "Optional Links: #{parsed.optional_links.size}" if parsed.respond_to?(:optional_links)
|
208
|
-
else
|
209
|
-
puts parsed.to_xml if parsed.respond_to?(:to_xml)
|
210
|
-
end
|
211
|
-
end
|
212
|
-
|
213
|
-
def validate(options)
|
214
|
-
file_path = options[:file_path] || 'llms.txt'
|
215
|
-
|
216
|
-
unless File.exist?(file_path)
|
217
|
-
puts "File not found: #{file_path}"
|
218
|
-
exit 1
|
219
|
-
end
|
220
|
-
|
221
|
-
content = File.read(file_path)
|
222
|
-
valid = LlmsTxt.validate(content)
|
223
|
-
|
224
|
-
if valid
|
225
|
-
puts 'Valid llms.txt file'
|
226
|
-
else
|
227
|
-
puts 'Invalid llms.txt file'
|
228
|
-
puts "\nErrors:"
|
229
|
-
LlmsTxt::Validator.new(content).errors.each do |error|
|
230
|
-
puts " - #{error}"
|
231
|
-
end
|
232
|
-
exit 1
|
233
|
-
end
|
234
|
-
end
|
235
|
-
|
236
|
-
def show_version
|
237
|
-
puts "llms-txt version #{LlmsTxt::VERSION}"
|
238
|
-
end
|
239
|
-
end
|
240
|
-
end
|
241
|
-
|
242
|
-
LlmsTxt::CLI.run # if $PROGRAM_NAME == __FILE__
|
7
|
+
LlmsTxt::CLI.run
|
data/lib/llms_txt/cli.rb
ADDED
@@ -0,0 +1,238 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
|
5
|
+
module LlmsTxt
|
6
|
+
class CLI
|
7
|
+
def self.run(argv = ARGV)
|
8
|
+
new.run(argv)
|
9
|
+
end
|
10
|
+
|
11
|
+
def run(argv)
|
12
|
+
options = parse_options(argv)
|
13
|
+
|
14
|
+
case options[:command]
|
15
|
+
when 'generate', nil
|
16
|
+
generate(options)
|
17
|
+
when 'transform'
|
18
|
+
transform(options)
|
19
|
+
when 'bulk-transform'
|
20
|
+
bulk_transform(options)
|
21
|
+
when 'parse'
|
22
|
+
parse(options)
|
23
|
+
when 'validate'
|
24
|
+
validate(options)
|
25
|
+
when 'version'
|
26
|
+
show_version
|
27
|
+
else
|
28
|
+
puts "Unknown command: #{options[:command]}"
|
29
|
+
puts "Run 'llms-txt --help' for usage information"
|
30
|
+
exit 1
|
31
|
+
end
|
32
|
+
rescue LlmsTxt::Errors::BaseError => e
|
33
|
+
puts "Error: #{e.message}"
|
34
|
+
exit 1
|
35
|
+
rescue StandardError => e
|
36
|
+
puts "Unexpected error: #{e.message}"
|
37
|
+
puts e.backtrace.join("\n") if options&.fetch(:verbose, false)
|
38
|
+
exit 1
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def parse_options(argv)
|
44
|
+
options = {
|
45
|
+
command: argv.first&.match?(/^[a-z-]+$/) ? argv.shift : nil
|
46
|
+
}
|
47
|
+
|
48
|
+
OptionParser.new do |opts|
|
49
|
+
opts.banner = "llms-txt - Simple tool for generating llms.txt from markdown documentation\n\nUsage: llms-txt [command] [options]\n\nFor advanced configuration (base_url, title, description, convert_urls), use a config file."
|
50
|
+
|
51
|
+
opts.separator ''
|
52
|
+
opts.separator 'Commands:'
|
53
|
+
opts.separator ' generate Generate llms.txt from documentation (default)'
|
54
|
+
opts.separator ' transform Transform a markdown file to be AI-friendly'
|
55
|
+
opts.separator ' bulk-transform Transform all markdown files in directory'
|
56
|
+
opts.separator ' parse Parse existing llms.txt file'
|
57
|
+
opts.separator ' validate Validate llms.txt file'
|
58
|
+
opts.separator ' version Show version'
|
59
|
+
|
60
|
+
opts.separator ''
|
61
|
+
opts.separator 'Options:'
|
62
|
+
|
63
|
+
opts.on('-c', '--config PATH', 'Configuration file path (default: llms-txt.yml)') do |path|
|
64
|
+
options[:config] = path
|
65
|
+
end
|
66
|
+
|
67
|
+
opts.on('-d', '--docs PATH', 'Path to documentation directory or file') do |path|
|
68
|
+
options[:docs] = path
|
69
|
+
end
|
70
|
+
|
71
|
+
opts.on('-o', '--output PATH', 'Output file path') do |path|
|
72
|
+
options[:output] = path
|
73
|
+
end
|
74
|
+
|
75
|
+
opts.on('-v', '--verbose', 'Verbose output') do
|
76
|
+
options[:verbose] = true
|
77
|
+
end
|
78
|
+
|
79
|
+
opts.on('-h', '--help', 'Show this message') do
|
80
|
+
puts opts
|
81
|
+
exit
|
82
|
+
end
|
83
|
+
|
84
|
+
opts.on('--version', 'Show version') do
|
85
|
+
show_version
|
86
|
+
exit
|
87
|
+
end
|
88
|
+
end.parse!(argv)
|
89
|
+
|
90
|
+
options[:file_path] = argv.first if argv.any?
|
91
|
+
options
|
92
|
+
end
|
93
|
+
|
94
|
+
def generate(options)
|
95
|
+
# Load config and merge with CLI options
|
96
|
+
config = LlmsTxt::Config.new(options[:config])
|
97
|
+
merged_options = config.merge_with_options(options)
|
98
|
+
|
99
|
+
docs_path = merged_options[:docs]
|
100
|
+
|
101
|
+
unless File.exist?(docs_path)
|
102
|
+
puts "Documentation path not found: #{docs_path}"
|
103
|
+
exit 1
|
104
|
+
end
|
105
|
+
|
106
|
+
puts "Generating llms.txt from #{docs_path}..." if merged_options[:verbose]
|
107
|
+
|
108
|
+
content = LlmsTxt.generate_from_docs(docs_path, merged_options)
|
109
|
+
output_path = merged_options[:output]
|
110
|
+
|
111
|
+
File.write(output_path, content)
|
112
|
+
puts "Successfully generated #{output_path}"
|
113
|
+
|
114
|
+
if merged_options[:verbose]
|
115
|
+
validator = LlmsTxt::Validator.new(content)
|
116
|
+
if validator.valid?
|
117
|
+
puts "Valid llms.txt format"
|
118
|
+
else
|
119
|
+
puts "Validation warnings:"
|
120
|
+
validator.errors.each { |error| puts " - #{error}" }
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def transform(options)
|
126
|
+
# Load config and merge with CLI options
|
127
|
+
config = LlmsTxt::Config.new(options[:config])
|
128
|
+
merged_options = config.merge_with_options(options)
|
129
|
+
|
130
|
+
file_path = options[:file_path]
|
131
|
+
|
132
|
+
unless file_path
|
133
|
+
puts "File path required for transform command"
|
134
|
+
exit 1
|
135
|
+
end
|
136
|
+
|
137
|
+
unless File.exist?(file_path)
|
138
|
+
puts "File not found: #{file_path}"
|
139
|
+
exit 1
|
140
|
+
end
|
141
|
+
|
142
|
+
puts "Transforming #{file_path}..." if merged_options[:verbose]
|
143
|
+
|
144
|
+
content = LlmsTxt.transform_markdown(file_path, merged_options)
|
145
|
+
|
146
|
+
if merged_options[:output] && merged_options[:output] != 'llms.txt'
|
147
|
+
File.write(merged_options[:output], content)
|
148
|
+
puts "Transformed content saved to #{merged_options[:output]}"
|
149
|
+
else
|
150
|
+
puts content
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def bulk_transform(options)
|
155
|
+
# Load config and merge with CLI options
|
156
|
+
config = LlmsTxt::Config.new(options[:config])
|
157
|
+
merged_options = config.merge_with_options(options)
|
158
|
+
|
159
|
+
docs_path = merged_options[:docs]
|
160
|
+
|
161
|
+
unless File.exist?(docs_path)
|
162
|
+
puts "Documentation path not found: #{docs_path}"
|
163
|
+
exit 1
|
164
|
+
end
|
165
|
+
|
166
|
+
unless File.directory?(docs_path)
|
167
|
+
puts "Path must be a directory for bulk transformation: #{docs_path}"
|
168
|
+
exit 1
|
169
|
+
end
|
170
|
+
|
171
|
+
puts "Bulk transforming markdown files in #{docs_path}..." if merged_options[:verbose]
|
172
|
+
puts "Using suffix: #{merged_options[:suffix]}" if merged_options[:verbose]
|
173
|
+
puts "Excludes: #{merged_options[:excludes].join(', ')}" if merged_options[:verbose] && !merged_options[:excludes].empty?
|
174
|
+
|
175
|
+
begin
|
176
|
+
transformed_files = LlmsTxt.bulk_transform(docs_path, merged_options)
|
177
|
+
|
178
|
+
if transformed_files.empty?
|
179
|
+
puts "No markdown files found to transform"
|
180
|
+
else
|
181
|
+
puts "Successfully transformed #{transformed_files.size} files:"
|
182
|
+
transformed_files.each { |file| puts " #{file}" } unless merged_options[:verbose] # verbose mode already shows progress
|
183
|
+
end
|
184
|
+
rescue LlmsTxt::Errors::BaseError => e
|
185
|
+
puts "Error during bulk transformation: #{e.message}"
|
186
|
+
exit 1
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def parse(options)
|
191
|
+
file_path = options[:file_path] || 'llms.txt'
|
192
|
+
|
193
|
+
unless File.exist?(file_path)
|
194
|
+
puts "File not found: #{file_path}"
|
195
|
+
exit 1
|
196
|
+
end
|
197
|
+
|
198
|
+
parsed = LlmsTxt.parse(file_path)
|
199
|
+
|
200
|
+
if options[:verbose]
|
201
|
+
puts "Title: #{parsed.title}"
|
202
|
+
puts "Description: #{parsed.description}"
|
203
|
+
puts "Documentation Links: #{parsed.documentation_links.size}"
|
204
|
+
puts "Example Links: #{parsed.example_links.size}" if parsed.respond_to?(:example_links)
|
205
|
+
puts "Optional Links: #{parsed.optional_links.size}" if parsed.respond_to?(:optional_links)
|
206
|
+
else
|
207
|
+
puts parsed.to_xml if parsed.respond_to?(:to_xml)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
def validate(options)
|
212
|
+
file_path = options[:file_path] || 'llms.txt'
|
213
|
+
|
214
|
+
unless File.exist?(file_path)
|
215
|
+
puts "File not found: #{file_path}"
|
216
|
+
exit 1
|
217
|
+
end
|
218
|
+
|
219
|
+
content = File.read(file_path)
|
220
|
+
valid = LlmsTxt.validate(content)
|
221
|
+
|
222
|
+
if valid
|
223
|
+
puts 'Valid llms.txt file'
|
224
|
+
else
|
225
|
+
puts 'Invalid llms.txt file'
|
226
|
+
puts "\nErrors:"
|
227
|
+
LlmsTxt::Validator.new(content).errors.each do |error|
|
228
|
+
puts " - #{error}"
|
229
|
+
end
|
230
|
+
exit 1
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
def show_version
|
235
|
+
puts "llms-txt version #{LlmsTxt::VERSION}"
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
data/lib/llms_txt/version.rb
CHANGED
data/lib/llms_txt.rb
CHANGED
data/llms-txt-ruby.gemspec
CHANGED
@@ -8,22 +8,24 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.authors = ['Maciej Mensfeld']
|
9
9
|
spec.email = %w[maciej@mensfeld.pl]
|
10
10
|
|
11
|
-
spec.summary = '
|
11
|
+
spec.summary = 'Generate llms.txt files and transform markdown documentation to be AI-friendly'
|
12
12
|
spec.description = <<~DESC
|
13
|
-
A
|
14
|
-
|
15
|
-
documentation directories
|
16
|
-
links and
|
13
|
+
A Ruby tool for transforming existing markdown documentation into AI-friendly formats
|
14
|
+
following the llms.txt standard. Features include: generating llms.txt files from
|
15
|
+
documentation directories with automatic file prioritization, transforming individual
|
16
|
+
markdown files by expanding relative links to absolute URLs, and bulk transforming entire
|
17
|
+
documentation trees with customizable exclusion patterns. Provides both CLI and Ruby API
|
18
|
+
with configuration file support.
|
17
19
|
DESC
|
18
20
|
|
19
21
|
spec.homepage = 'https://github.com/mensfeld/llms-txt-ruby'
|
20
22
|
spec.license = 'MIT'
|
21
|
-
spec.required_ruby_version = '>= 3.
|
23
|
+
spec.required_ruby_version = '>= 3.2'
|
22
24
|
|
23
25
|
spec.metadata['allowed_push_host'] = 'https://rubygems.org'
|
24
26
|
spec.metadata['homepage_uri'] = spec.homepage
|
25
27
|
spec.metadata['source_code_uri'] = 'https://github.com/mensfeld/llms-txt-ruby'
|
26
|
-
spec.metadata['changelog_uri'] = 'https://github.com/mensfeld/llms-txt-ruby/blob/
|
28
|
+
spec.metadata['changelog_uri'] = 'https://github.com/mensfeld/llms-txt-ruby/blob/master/CHANGELOG.md'
|
27
29
|
spec.metadata['documentation_uri'] = 'https://github.com/mensfeld/llms-txt-ruby'
|
28
30
|
spec.metadata['rubygems_mfa_required'] = 'true'
|
29
31
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llms-txt-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -94,10 +94,12 @@ dependencies:
|
|
94
94
|
- !ruby/object:Gem::Version
|
95
95
|
version: '0.21'
|
96
96
|
description: |
|
97
|
-
A
|
98
|
-
|
99
|
-
documentation directories
|
100
|
-
links and
|
97
|
+
A Ruby tool for transforming existing markdown documentation into AI-friendly formats
|
98
|
+
following the llms.txt standard. Features include: generating llms.txt files from
|
99
|
+
documentation directories with automatic file prioritization, transforming individual
|
100
|
+
markdown files by expanding relative links to absolute URLs, and bulk transforming entire
|
101
|
+
documentation trees with customizable exclusion patterns. Provides both CLI and Ruby API
|
102
|
+
with configuration file support.
|
101
103
|
email:
|
102
104
|
- maciej@mensfeld.pl
|
103
105
|
executables:
|
@@ -110,6 +112,7 @@ files:
|
|
110
112
|
- ".gitignore"
|
111
113
|
- ".rubocop.yml"
|
112
114
|
- ".ruby-version"
|
115
|
+
- CHANGELOG.md
|
113
116
|
- Gemfile
|
114
117
|
- Gemfile.lock
|
115
118
|
- LICENSE
|
@@ -118,6 +121,7 @@ files:
|
|
118
121
|
- bin/llms-txt
|
119
122
|
- lib/llms_txt.rb
|
120
123
|
- lib/llms_txt/bulk_transformer.rb
|
124
|
+
- lib/llms_txt/cli.rb
|
121
125
|
- lib/llms_txt/config.rb
|
122
126
|
- lib/llms_txt/errors.rb
|
123
127
|
- lib/llms_txt/generator.rb
|
@@ -127,7 +131,6 @@ files:
|
|
127
131
|
- lib/llms_txt/version.rb
|
128
132
|
- llms-txt-ruby.gemspec
|
129
133
|
- llms-txt.yml.example
|
130
|
-
- mise.toml
|
131
134
|
- renovate.json
|
132
135
|
homepage: https://github.com/mensfeld/llms-txt-ruby
|
133
136
|
licenses:
|
@@ -136,7 +139,7 @@ metadata:
|
|
136
139
|
allowed_push_host: https://rubygems.org
|
137
140
|
homepage_uri: https://github.com/mensfeld/llms-txt-ruby
|
138
141
|
source_code_uri: https://github.com/mensfeld/llms-txt-ruby
|
139
|
-
changelog_uri: https://github.com/mensfeld/llms-txt-ruby/blob/
|
142
|
+
changelog_uri: https://github.com/mensfeld/llms-txt-ruby/blob/master/CHANGELOG.md
|
140
143
|
documentation_uri: https://github.com/mensfeld/llms-txt-ruby
|
141
144
|
rubygems_mfa_required: 'true'
|
142
145
|
rdoc_options: []
|
@@ -146,7 +149,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
146
149
|
requirements:
|
147
150
|
- - ">="
|
148
151
|
- !ruby/object:Gem::Version
|
149
|
-
version: 3.
|
152
|
+
version: '3.2'
|
150
153
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
151
154
|
requirements:
|
152
155
|
- - ">="
|
@@ -155,5 +158,5 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
155
158
|
requirements: []
|
156
159
|
rubygems_version: 3.6.9
|
157
160
|
specification_version: 4
|
158
|
-
summary:
|
161
|
+
summary: Generate llms.txt files and transform markdown documentation to be AI-friendly
|
159
162
|
test_files: []
|
data/mise.toml
DELETED