llms-txt-ruby 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +15 -0
- data/Gemfile.lock +1 -1
- data/README.md +163 -0
- data/lib/llms_txt/version.rb +1 -1
- data/llms-txt-ruby.gemspec +9 -7
- metadata +11 -9
- data/mise.toml +0 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 59e6ef21e4d8e7cfad82d91a1d0c6eede9528850622efef8738b718bbbe0ea43
|
4
|
+
data.tar.gz: 0247ceaaed63bea7ba5d86753cd77e19e032ec31561ab6fbfe35874d875cbda6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 13bcd98389bbfd2193e1d21b3b5372bd1656145aec325a0f1456ce95cb699106008c35914cdab9c6ed4e7cefa4c955791347179e003def2a8569ab267c387abc
|
7
|
+
data.tar.gz: 4115db81602c0137224f7e5c8b9c536b5684776c3398e04893d6f1e0b2a327af71918d34c661d83eae9e773654ef282947fcd231af0a3c4cfe8263f085455381
|
data/.gitignore
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.4.
|
1
|
+
3.4.6
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
## 0.1.1 (2025-10-07)
|
4
|
+
- [Change] Updated repository metadata to use `master` branch instead of `main`.
|
5
|
+
|
6
|
+
## 0.1.0 (2025-10-07)
|
7
|
+
- [Feature] Generate `llms.txt` files from markdown documentation.
|
8
|
+
- [Feature] Transform individual markdown files to be AI-friendly.
|
9
|
+
- [Feature] Bulk transformation of entire documentation directories.
|
10
|
+
- [Feature] CLI with commands: `generate`, `transform`, `bulk-transform`, `parse`, `validate`.
|
11
|
+
- [Feature] Configuration file support (`llms-txt.yml`).
|
12
|
+
- [Feature] Automatic link expansion from relative to absolute URLs.
|
13
|
+
- [Feature] File prioritization (README first, then guides, APIs, etc.).
|
14
|
+
- [Feature] Exclusion patterns for bulk transformations.
|
15
|
+
- [Feature] Ruby API for programmatic usage.
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -213,6 +213,169 @@ wiki/
|
|
213
213
|
└── internal.md ← Excluded, no .llm.md version
|
214
214
|
```
|
215
215
|
|
216
|
+
## Serving LLM-Friendly Documentation
|
217
|
+
|
218
|
+
After using `bulk-transform` to create `.llm.md` versions of your documentation, you can configure your web server to automatically serve these LLM-optimized versions to AI bots while showing the original versions to human visitors.
|
219
|
+
|
220
|
+
### How It Works
|
221
|
+
|
222
|
+
The strategy is simple:
|
223
|
+
|
224
|
+
1. **Detect AI bots** by their User-Agent strings
|
225
|
+
2. **Serve `.llm.md` files** to detected AI bots
|
226
|
+
3. **Serve original `.md` files** to human visitors
|
227
|
+
4. **Automatic selection** - no manual switching needed
|
228
|
+
|
229
|
+
### Apache Configuration
|
230
|
+
|
231
|
+
Add this to your `.htaccess` file:
|
232
|
+
|
233
|
+
```apache
|
234
|
+
# Detect LLM bots by User-Agent
|
235
|
+
SetEnvIf User-Agent "(?i)(openai|anthropic|claude|gpt|chatgpt|bard|gemini|copilot)" IS_LLM_BOT
|
236
|
+
SetEnvIf User-Agent "(?i)(perplexity|character\.ai|you\.com|poe\.com|huggingface|replicate)" IS_LLM_BOT
|
237
|
+
SetEnvIf User-Agent "(?i)(langchain|llamaindex|semantic|embedding|vector|rag)" IS_LLM_BOT
|
238
|
+
SetEnvIf User-Agent "(?i)(ollama|mistral|cohere|together|fireworks|groq)" IS_LLM_BOT
|
239
|
+
|
240
|
+
# Serve .md files as text/plain
|
241
|
+
<FilesMatch "\.md$">
|
242
|
+
Header set Content-Type "text/plain; charset=utf-8"
|
243
|
+
ForceType text/plain
|
244
|
+
</FilesMatch>
|
245
|
+
|
246
|
+
# Enable rewrite engine
|
247
|
+
RewriteEngine On
|
248
|
+
|
249
|
+
# For LLM bots: rewrite requests to serve .llm.md versions
|
250
|
+
RewriteCond %{ENV:IS_LLM_BOT} !^$
|
251
|
+
RewriteCond %{REQUEST_URI} ^/docs/.*\.md$ [NC]
|
252
|
+
RewriteCond %{REQUEST_URI} !\.llm\.md$ [NC]
|
253
|
+
RewriteCond %{DOCUMENT_ROOT}%{REQUEST_URI} -f
|
254
|
+
RewriteRule ^(.*)\.md$ $1.llm.md [L]
|
255
|
+
|
256
|
+
# For LLM bots: handle clean URLs by appending .llm.md
|
257
|
+
RewriteCond %{ENV:IS_LLM_BOT} !^$
|
258
|
+
RewriteCond %{REQUEST_URI} ^/docs/ [NC]
|
259
|
+
RewriteCond %{REQUEST_URI} !\.md$ [NC]
|
260
|
+
RewriteCond %{DOCUMENT_ROOT}%{REQUEST_URI}.llm.md -f
|
261
|
+
RewriteRule ^(.*)$ $1.llm.md [L]
|
262
|
+
|
263
|
+
# For regular users: serve original .md files or clean URLs as usual
|
264
|
+
# (add your normal URL handling rules here)
|
265
|
+
```
|
266
|
+
|
267
|
+
### Nginx Configuration
|
268
|
+
|
269
|
+
Add this to your nginx server block:
|
270
|
+
|
271
|
+
```nginx
|
272
|
+
# Map to detect LLM bots
|
273
|
+
map $http_user_agent $is_llm_bot {
|
274
|
+
default 0;
|
275
|
+
"~*(?i)(openai|anthropic|claude|gpt|chatgpt|bard|gemini|copilot)" 1;
|
276
|
+
"~*(?i)(perplexity|character\.ai|you\.com|poe\.com|huggingface|replicate)" 1;
|
277
|
+
"~*(?i)(langchain|llamaindex|semantic|embedding|vector|rag)" 1;
|
278
|
+
"~*(?i)(ollama|mistral|cohere|together|fireworks|groq)" 1;
|
279
|
+
}
|
280
|
+
|
281
|
+
server {
|
282
|
+
# ... your server configuration ...
|
283
|
+
|
284
|
+
# Serve .md files as text/plain
|
285
|
+
location ~ \.md$ {
|
286
|
+
default_type text/plain;
|
287
|
+
charset utf-8;
|
288
|
+
}
|
289
|
+
|
290
|
+
# For LLM bots requesting .md files, serve .llm.md version
|
291
|
+
location ~ ^/docs/(.*)\.md$ {
|
292
|
+
if ($is_llm_bot) {
|
293
|
+
rewrite ^(.*)\.md$ $1.llm.md last;
|
294
|
+
}
|
295
|
+
try_files $uri $uri/ =404;
|
296
|
+
}
|
297
|
+
|
298
|
+
# For LLM bots requesting clean URLs, serve .llm.md version
|
299
|
+
location ~ ^/docs/ {
|
300
|
+
if ($is_llm_bot) {
|
301
|
+
try_files $uri.llm.md $uri $uri/ =404;
|
302
|
+
}
|
303
|
+
try_files $uri $uri.md $uri/ =404;
|
304
|
+
}
|
305
|
+
}
|
306
|
+
```
|
307
|
+
|
308
|
+
### Cloudflare Workers
|
309
|
+
|
310
|
+
For serverless deployments, use Cloudflare Workers:
|
311
|
+
|
312
|
+
```javascript
|
313
|
+
export default {
|
314
|
+
async fetch(request) {
|
315
|
+
const url = new URL(request.url);
|
316
|
+
const userAgent = request.headers.get('user-agent') || '';
|
317
|
+
|
318
|
+
// Detect LLM bots
|
319
|
+
const llmBotPatterns = [
|
320
|
+
/openai|anthropic|claude|gpt|chatgpt|bard|gemini|copilot/i,
|
321
|
+
/perplexity|character\.ai|you\.com|poe\.com|huggingface|replicate/i,
|
322
|
+
/langchain|llamaindex|semantic|embedding|vector|rag/i,
|
323
|
+
/ollama|mistral|cohere|together|fireworks|groq/i
|
324
|
+
];
|
325
|
+
|
326
|
+
const isLLMBot = llmBotPatterns.some(pattern => pattern.test(userAgent));
|
327
|
+
|
328
|
+
// If LLM bot and requesting docs
|
329
|
+
if (isLLMBot && url.pathname.startsWith('/docs/')) {
|
330
|
+
// Try to serve .llm.md version
|
331
|
+
const llmPath = url.pathname.replace(/\.md$/, '.llm.md');
|
332
|
+
if (!url.pathname.endsWith('.llm.md')) {
|
333
|
+
url.pathname = llmPath;
|
334
|
+
}
|
335
|
+
}
|
336
|
+
|
337
|
+
return fetch(url);
|
338
|
+
}
|
339
|
+
}
|
340
|
+
```
|
341
|
+
|
342
|
+
### Custom Suffix
|
343
|
+
|
344
|
+
If you used a different suffix with the `bulk-transform` command (e.g., `--suffix .ai`), update your web server rules accordingly.
|
345
|
+
|
346
|
+
**Apache:**
|
347
|
+
```apache
|
348
|
+
RewriteRule ^(.*)\.md$ $1.ai.md [L]
|
349
|
+
```
|
350
|
+
|
351
|
+
**Nginx:**
|
352
|
+
```nginx
|
353
|
+
rewrite ^(.*)\.md$ $1.ai.md last;
|
354
|
+
```
|
355
|
+
|
356
|
+
**Cloudflare Workers:**
|
357
|
+
```javascript
|
358
|
+
const llmPath = url.pathname.replace(/\.md$/, '.ai.md');
|
359
|
+
```
|
360
|
+
|
361
|
+
### Example Setup
|
362
|
+
|
363
|
+
```yaml
|
364
|
+
# llms-txt.yml
|
365
|
+
docs: ./docs
|
366
|
+
base_url: https://myproject.io
|
367
|
+
suffix: .llm
|
368
|
+
convert_urls: true
|
369
|
+
```
|
370
|
+
|
371
|
+
```bash
|
372
|
+
# Generate LLM-friendly versions
|
373
|
+
llms-txt bulk-transform --config llms-txt.yml
|
374
|
+
|
375
|
+
# Deploy both original and .llm.md files to your web server
|
376
|
+
# The server will automatically serve the right version to each visitor
|
377
|
+
```
|
378
|
+
|
216
379
|
## Ruby API
|
217
380
|
|
218
381
|
### Basic Usage
|
data/lib/llms_txt/version.rb
CHANGED
data/llms-txt-ruby.gemspec
CHANGED
@@ -8,22 +8,24 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.authors = ['Maciej Mensfeld']
|
9
9
|
spec.email = %w[maciej@mensfeld.pl]
|
10
10
|
|
11
|
-
spec.summary = '
|
11
|
+
spec.summary = 'Generate llms.txt files and transform markdown documentation to be AI-friendly'
|
12
12
|
spec.description = <<~DESC
|
13
|
-
A
|
14
|
-
|
15
|
-
documentation directories
|
16
|
-
links and
|
13
|
+
A Ruby tool for transforming existing markdown documentation into AI-friendly formats
|
14
|
+
following the llms.txt standard. Features include: generating llms.txt files from
|
15
|
+
documentation directories with automatic file prioritization, transforming individual
|
16
|
+
markdown files by expanding relative links to absolute URLs, and bulk transforming entire
|
17
|
+
documentation trees with customizable exclusion patterns. Provides both CLI and Ruby API
|
18
|
+
with configuration file support.
|
17
19
|
DESC
|
18
20
|
|
19
21
|
spec.homepage = 'https://github.com/mensfeld/llms-txt-ruby'
|
20
22
|
spec.license = 'MIT'
|
21
|
-
spec.required_ruby_version = '>= 3.
|
23
|
+
spec.required_ruby_version = '>= 3.2'
|
22
24
|
|
23
25
|
spec.metadata['allowed_push_host'] = 'https://rubygems.org'
|
24
26
|
spec.metadata['homepage_uri'] = spec.homepage
|
25
27
|
spec.metadata['source_code_uri'] = 'https://github.com/mensfeld/llms-txt-ruby'
|
26
|
-
spec.metadata['changelog_uri'] = 'https://github.com/mensfeld/llms-txt-ruby/blob/
|
28
|
+
spec.metadata['changelog_uri'] = 'https://github.com/mensfeld/llms-txt-ruby/blob/master/CHANGELOG.md'
|
27
29
|
spec.metadata['documentation_uri'] = 'https://github.com/mensfeld/llms-txt-ruby'
|
28
30
|
spec.metadata['rubygems_mfa_required'] = 'true'
|
29
31
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llms-txt-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -94,10 +94,12 @@ dependencies:
|
|
94
94
|
- !ruby/object:Gem::Version
|
95
95
|
version: '0.21'
|
96
96
|
description: |
|
97
|
-
A
|
98
|
-
|
99
|
-
documentation directories
|
100
|
-
links and
|
97
|
+
A Ruby tool for transforming existing markdown documentation into AI-friendly formats
|
98
|
+
following the llms.txt standard. Features include: generating llms.txt files from
|
99
|
+
documentation directories with automatic file prioritization, transforming individual
|
100
|
+
markdown files by expanding relative links to absolute URLs, and bulk transforming entire
|
101
|
+
documentation trees with customizable exclusion patterns. Provides both CLI and Ruby API
|
102
|
+
with configuration file support.
|
101
103
|
email:
|
102
104
|
- maciej@mensfeld.pl
|
103
105
|
executables:
|
@@ -110,6 +112,7 @@ files:
|
|
110
112
|
- ".gitignore"
|
111
113
|
- ".rubocop.yml"
|
112
114
|
- ".ruby-version"
|
115
|
+
- CHANGELOG.md
|
113
116
|
- Gemfile
|
114
117
|
- Gemfile.lock
|
115
118
|
- LICENSE
|
@@ -127,7 +130,6 @@ files:
|
|
127
130
|
- lib/llms_txt/version.rb
|
128
131
|
- llms-txt-ruby.gemspec
|
129
132
|
- llms-txt.yml.example
|
130
|
-
- mise.toml
|
131
133
|
- renovate.json
|
132
134
|
homepage: https://github.com/mensfeld/llms-txt-ruby
|
133
135
|
licenses:
|
@@ -136,7 +138,7 @@ metadata:
|
|
136
138
|
allowed_push_host: https://rubygems.org
|
137
139
|
homepage_uri: https://github.com/mensfeld/llms-txt-ruby
|
138
140
|
source_code_uri: https://github.com/mensfeld/llms-txt-ruby
|
139
|
-
changelog_uri: https://github.com/mensfeld/llms-txt-ruby/blob/
|
141
|
+
changelog_uri: https://github.com/mensfeld/llms-txt-ruby/blob/master/CHANGELOG.md
|
140
142
|
documentation_uri: https://github.com/mensfeld/llms-txt-ruby
|
141
143
|
rubygems_mfa_required: 'true'
|
142
144
|
rdoc_options: []
|
@@ -146,7 +148,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
146
148
|
requirements:
|
147
149
|
- - ">="
|
148
150
|
- !ruby/object:Gem::Version
|
149
|
-
version: 3.
|
151
|
+
version: '3.2'
|
150
152
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
151
153
|
requirements:
|
152
154
|
- - ">="
|
@@ -155,5 +157,5 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
155
157
|
requirements: []
|
156
158
|
rubygems_version: 3.6.9
|
157
159
|
specification_version: 4
|
158
|
-
summary:
|
160
|
+
summary: Generate llms.txt files and transform markdown documentation to be AI-friendly
|
159
161
|
test_files: []
|
data/mise.toml
DELETED