llms-txt-ruby 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +1 -1
- data/.github/workflows/push.yml +1 -1
- data/.rubocop.yml +53 -6
- data/.ruby-version +1 -1
- data/CHANGELOG.md +18 -0
- data/Gemfile.lock +1 -1
- data/README.md +153 -33
- data/bin/rspecs +7 -0
- data/lib/llms_txt/bulk_transformer.rb +1 -3
- data/lib/llms_txt/cli.rb +114 -18
- data/lib/llms_txt/config.rb +5 -2
- data/lib/llms_txt/generator.rb +17 -17
- data/lib/llms_txt/markdown_transformer.rb +2 -2
- data/lib/llms_txt/validator.rb +5 -11
- data/lib/llms_txt/version.rb +1 -1
- data/lib/llms_txt.rb +0 -1
- data/renovate.json +7 -4
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '091a1f41ec541d6f747df9f11f06f09c59b2f0c2829ae9a0a4fae5624cb551ef'
|
4
|
+
data.tar.gz: f93506e0ff2326c1957affb5211de3f5a6ae022da75308b2274ddebac8d3560c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0fd566dd98d2fc189de5e4553ccb613c2481569040ae3ae6468e89a95fef0c1d3a80a9bd8f81ecedcbdc306d773bbd2dde1883a0ee66878b70d82332a21aac19
|
7
|
+
data.tar.gz: f8e0eb6c2c46c14ab4cc5fb56a99953d0771407188deb5d2c8138992d5d729cebc3250571d10f8e694033b7b4ab4dbc82de1a09cb66f5fa8db563d960a479068
|
data/.github/workflows/ci.yml
CHANGED
data/.github/workflows/push.yml
CHANGED
data/.rubocop.yml
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
AllCops:
|
2
|
-
TargetRubyVersion: 3.
|
2
|
+
TargetRubyVersion: 3.2
|
3
3
|
NewCops: enable
|
4
|
+
SuggestExtensions: false
|
4
5
|
|
5
6
|
Style/Documentation:
|
6
7
|
Enabled: false
|
@@ -10,18 +11,64 @@ Style/StringLiterals:
|
|
10
11
|
|
11
12
|
Layout/LineLength:
|
12
13
|
Max: 120
|
14
|
+
Exclude:
|
15
|
+
- 'lib/llms_txt/cli.rb'
|
13
16
|
|
14
17
|
Metrics/ClassLength:
|
15
|
-
Max:
|
18
|
+
Max: 200
|
19
|
+
Exclude:
|
20
|
+
- 'lib/llms_txt/cli.rb'
|
16
21
|
|
17
22
|
Metrics/MethodLength:
|
18
|
-
Max:
|
23
|
+
Max: 35
|
24
|
+
Exclude:
|
25
|
+
- 'lib/llms_txt/cli.rb'
|
19
26
|
|
20
27
|
Metrics/AbcSize:
|
21
|
-
Max:
|
28
|
+
Max: 40
|
29
|
+
Exclude:
|
30
|
+
- 'lib/llms_txt/cli.rb'
|
22
31
|
|
23
32
|
Metrics/CyclomaticComplexity:
|
24
|
-
Max:
|
33
|
+
Max: 15
|
34
|
+
Exclude:
|
35
|
+
- 'lib/llms_txt/config.rb'
|
36
|
+
|
37
|
+
Metrics/PerceivedComplexity:
|
38
|
+
Max: 15
|
39
|
+
Exclude:
|
40
|
+
- 'lib/llms_txt/config.rb'
|
41
|
+
|
42
|
+
Metrics/BlockLength:
|
43
|
+
Exclude:
|
44
|
+
- 'spec/**/*'
|
45
|
+
- 'lib/llms_txt/cli.rb'
|
46
|
+
- '*.gemspec'
|
25
47
|
|
26
48
|
Style/FrozenStringLiteralComment:
|
27
|
-
Enabled: true
|
49
|
+
Enabled: true
|
50
|
+
|
51
|
+
# Specs often have multiline block chains
|
52
|
+
Style/MultilineBlockChain:
|
53
|
+
Exclude:
|
54
|
+
- 'spec/**/*'
|
55
|
+
|
56
|
+
# Disable predicate method naming rule
|
57
|
+
Naming/PredicateMethod:
|
58
|
+
Enabled: false
|
59
|
+
|
60
|
+
# Allow development dependencies in gemspec
|
61
|
+
Gemspec/DevelopmentDependencies:
|
62
|
+
Enabled: false
|
63
|
+
|
64
|
+
# Enforce first argument on new line for multiline method calls
|
65
|
+
Layout/FirstMethodArgumentLineBreak:
|
66
|
+
Enabled: true
|
67
|
+
|
68
|
+
# Use fixed indentation for arguments
|
69
|
+
Layout/ArgumentAlignment:
|
70
|
+
EnforcedStyle: with_fixed_indentation
|
71
|
+
|
72
|
+
# Ensure closing parenthesis on new line for multiline calls
|
73
|
+
Layout/MultilineMethodCallBraceLayout:
|
74
|
+
EnforcedStyle: new_line
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.4.
|
1
|
+
3.4.7
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,23 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 0.2.0 (2025-10-07)
|
4
|
+
- [Breaking] Removed positional argument support for all CLI commands. All file paths must now be specified using flags:
|
5
|
+
- `transform`: use `-d/--docs` flag instead of positional argument
|
6
|
+
- `parse`: use `-d/--docs` flag instead of positional argument (defaults to `llms.txt` if not specified)
|
7
|
+
- `validate`: use `-d/--docs` flag instead of positional argument (defaults to `llms.txt` if not specified)
|
8
|
+
- [Enhancement] Improved CLI consistency by requiring explicit flags for all file paths.
|
9
|
+
- [Enhancement] Added comprehensive CLI integration tests in `spec/integrations/` directory.
|
10
|
+
- Each command has its own dedicated integration test file
|
11
|
+
- Tests verify actual CLI binary execution, not just Ruby API
|
12
|
+
- All tests (unit and integration) run together with `bin/rspecs`
|
13
|
+
- [Enhancement] Added convenient test runner script `bin/rspecs` for running all tests.
|
14
|
+
- [Enhancement] Added comprehensive YARD documentation to all CLI methods.
|
15
|
+
- [Enhancement] Resolved all RuboCop offenses (0 offenses detected).
|
16
|
+
- [Fix] Fixed validator bug where `each_value` was incorrectly called on Array.
|
17
|
+
|
18
|
+
## 0.1.3 (2025-10-07)
|
19
|
+
- [Fix] Fixed `transform` command to accept file path from `-d/--docs` flag in addition to positional arguments.
|
20
|
+
|
3
21
|
## 0.1.2 (2025-10-07)
|
4
22
|
- [Fix] Fixed CLI error handling to use correct `LlmsTxt::Errors::BaseError` instead of non-existent `LlmsTxt::Error`.
|
5
23
|
- [Enhancement] Extracted CLI class to `lib/llms_txt/cli.rb` for better testability.
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -24,7 +24,7 @@ This library converts existing human-first documentation into LLM-friendly forma
|
|
24
24
|
AI-optimized format by expanding relative links to absolute URLs and normalizing link
|
25
25
|
structures
|
26
26
|
3. **Bulk transforms** - Processes all markdown files in a directory recursively, creating
|
27
|
-
LLM-friendly versions alongside originals with customizable exclusion patterns
|
27
|
+
LLM-friendly versions alongside originals (or transforming in-place) with customizable exclusion patterns
|
28
28
|
|
29
29
|
## Installation
|
30
30
|
|
@@ -76,7 +76,7 @@ llms-txt generate
|
|
76
76
|
llms-txt generate --docs ./docs
|
77
77
|
|
78
78
|
# Transform a single file
|
79
|
-
llms-txt transform README.md
|
79
|
+
llms-txt transform --docs README.md
|
80
80
|
|
81
81
|
# Transform all markdown files in directory
|
82
82
|
llms-txt bulk-transform --docs ./docs
|
@@ -91,10 +91,10 @@ llms-txt generate --config my-config.yml
|
|
91
91
|
|
92
92
|
```bash
|
93
93
|
llms-txt generate [options] # Generate llms.txt from documentation (default)
|
94
|
-
llms-txt transform [
|
94
|
+
llms-txt transform [options] # Transform a markdown file to be AI-friendly
|
95
95
|
llms-txt bulk-transform [options] # Transform all markdown files in directory
|
96
|
-
llms-txt parse [
|
97
|
-
llms-txt validate [
|
96
|
+
llms-txt parse [options] # Parse existing llms.txt file
|
97
|
+
llms-txt validate [options] # Validate llms.txt file
|
98
98
|
llms-txt version # Show version
|
99
99
|
```
|
100
100
|
|
@@ -108,7 +108,7 @@ llms-txt version # Show version
|
|
108
108
|
-h, --help Show help message
|
109
109
|
```
|
110
110
|
|
111
|
-
*For advanced options like base_url, title, description, and convert_urls, use a config file.*
|
111
|
+
*For advanced options like base_url, title, description, suffix, excludes, and convert_urls, use a config file.*
|
112
112
|
|
113
113
|
## Configuration File
|
114
114
|
|
@@ -137,7 +137,13 @@ output: llms.txt
|
|
137
137
|
|
138
138
|
# Transformation options (optional)
|
139
139
|
convert_urls: true # Convert .html links to .md
|
140
|
+
suffix: .llm # Suffix for transformed files (use "" for in-place)
|
140
141
|
verbose: false # Enable verbose output
|
142
|
+
|
143
|
+
# Exclusion patterns (optional)
|
144
|
+
excludes:
|
145
|
+
- "**/private/**"
|
146
|
+
- "**/drafts/**"
|
141
147
|
```
|
142
148
|
|
143
149
|
The config file will be automatically found if named:
|
@@ -145,28 +151,136 @@ The config file will be automatically found if named:
|
|
145
151
|
- `llms-txt.yaml`
|
146
152
|
- `.llms-txt.yml`
|
147
153
|
|
154
|
+
### Configuration Options Reference
|
155
|
+
|
156
|
+
| Option | Type | Default | Description |
|
157
|
+
|--------|------|---------|-------------|
|
158
|
+
| `docs` | String | `./docs` | Directory containing markdown files to process |
|
159
|
+
| `base_url` | String | - | Base URL for expanding relative links (e.g., `https://myproject.io`) |
|
160
|
+
| `title` | String | Auto-detected | Project title for llms.txt generation |
|
161
|
+
| `description` | String | Auto-detected | Project description for llms.txt generation |
|
162
|
+
| `output` | String | `llms.txt` | Output filename for generated llms.txt |
|
163
|
+
| `convert_urls` | Boolean | `false` | Convert HTML URLs to markdown format (`.html` → `.md`) |
|
164
|
+
| `suffix` | String | `.llm` | Suffix added to transformed files. Use `""` for in-place transformation |
|
165
|
+
| `excludes` | Array | `[]` | Glob patterns for files/directories to exclude from processing |
|
166
|
+
| `verbose` | Boolean | `false` | Enable detailed output during processing |
|
167
|
+
|
148
168
|
## Bulk Transformation
|
149
169
|
|
150
170
|
The `bulk-transform` command processes all markdown files in a directory recursively, creating
|
151
|
-
AI-friendly versions
|
152
|
-
documentation trees.
|
171
|
+
AI-friendly versions. By default, it creates new files with a `.llm.md` suffix, but you can also transform files in-place for build pipelines.
|
153
172
|
|
154
173
|
### Key Features
|
155
174
|
|
156
175
|
- **Recursive processing** - Finds and transforms all `.md` files in nested directories
|
157
176
|
- **Preserves structure** - Maintains your existing directory layout
|
158
177
|
- **Exclusion patterns** - Skip files/directories using glob patterns
|
159
|
-
- **Custom suffixes** - Choose how transformed files are named
|
178
|
+
- **Custom suffixes** - Choose how transformed files are named, or transform in-place
|
160
179
|
- **LLM optimizations** - Expands relative links, converts HTML URLs, etc.
|
161
180
|
|
162
|
-
###
|
181
|
+
### Default Behavior: Creating Separate Files
|
182
|
+
|
183
|
+
By default, `bulk-transform` creates new `.llm.md` files alongside your originals:
|
184
|
+
|
185
|
+
```yaml
|
186
|
+
# llms-txt.yml
|
187
|
+
docs: ./docs
|
188
|
+
base_url: https://myproject.io
|
189
|
+
suffix: .llm # Creates .llm.md files (default if omitted)
|
190
|
+
convert_urls: true
|
191
|
+
```
|
192
|
+
|
193
|
+
```bash
|
194
|
+
llms-txt bulk-transform --config llms-txt.yml
|
195
|
+
```
|
196
|
+
|
197
|
+
**Result:**
|
198
|
+
```
|
199
|
+
docs/
|
200
|
+
├── README.md
|
201
|
+
├── README.llm.md ← AI-friendly version
|
202
|
+
├── setup.md
|
203
|
+
└── setup.llm.md ← AI-friendly version
|
204
|
+
```
|
205
|
+
|
206
|
+
This preserves your original files and creates LLM-optimized versions separately.
|
207
|
+
|
208
|
+
### In-Place Transformation
|
209
|
+
|
210
|
+
For build pipelines where you want to transform documentation directly without maintaining separate copies, use `suffix: ""`:
|
211
|
+
|
212
|
+
```yaml
|
213
|
+
# llms-txt.yml
|
214
|
+
docs: ./docs
|
215
|
+
base_url: https://myproject.io
|
216
|
+
convert_urls: true
|
217
|
+
suffix: "" # Transform in-place, no separate files
|
218
|
+
excludes:
|
219
|
+
- "**/private/**"
|
220
|
+
- "**/drafts/**"
|
221
|
+
```
|
222
|
+
|
223
|
+
```bash
|
224
|
+
llms-txt bulk-transform --config llms-txt.yml
|
225
|
+
```
|
226
|
+
|
227
|
+
**Before transformation** (`docs/setup.md`):
|
228
|
+
```markdown
|
229
|
+
See the [configuration guide](../config.md) for details.
|
230
|
+
Visit our [API docs](https://myproject.io/api/).
|
231
|
+
```
|
232
|
+
|
233
|
+
**After transformation** (`docs/setup.md` - same file, overwritten):
|
234
|
+
```markdown
|
235
|
+
See the [configuration guide](https://myproject.io/docs/config.md) for details.
|
236
|
+
Visit our [API docs](https://myproject.io/api.md).
|
237
|
+
```
|
238
|
+
|
239
|
+
This is perfect for:
|
240
|
+
- **Build pipelines** - Transform docs as part of your deployment process
|
241
|
+
- **Static site generators** - Process markdown before building HTML
|
242
|
+
- **CI/CD workflows** - Automated documentation transformation
|
243
|
+
|
244
|
+
### Real-World Example: Karafka Framework
|
245
|
+
|
246
|
+
The [Karafka framework](https://github.com/karafka/website) uses in-place transformation in its documentation build process. Previously, it had 140+ lines of custom Ruby code for link expansion and URL conversion. Now it uses:
|
247
|
+
|
248
|
+
```yaml
|
249
|
+
# llms-txt.yml
|
250
|
+
docs: ./online/docs
|
251
|
+
base_url: https://karafka.io/docs
|
252
|
+
convert_urls: true
|
253
|
+
suffix: ""
|
254
|
+
excludes:
|
255
|
+
- "**/Enterprise-License-Setup/**"
|
256
|
+
```
|
163
257
|
|
164
258
|
```bash
|
165
|
-
#
|
259
|
+
# In their build script (sync.rb)
|
260
|
+
system!("llms-txt bulk-transform --config llms-txt.yml")
|
261
|
+
```
|
262
|
+
|
263
|
+
This configuration:
|
264
|
+
- Processes all markdown files recursively in `./online/docs`
|
265
|
+
- Expands relative links to absolute URLs using the base_url
|
266
|
+
- Converts HTML URLs to markdown format (`.html` → `.md`)
|
267
|
+
- Transforms files in-place (no separate `.llm.md` files)
|
268
|
+
- Excludes password-protected enterprise documentation
|
269
|
+
- Runs as part of an automated daily deployment via GitHub Actions
|
270
|
+
|
271
|
+
**Result**: Over 140 lines of custom code replaced with a 6-line configuration file.
|
272
|
+
|
273
|
+
### Usage Examples
|
274
|
+
|
275
|
+
```bash
|
276
|
+
# Transform all files with default settings (creates .llm.md files)
|
166
277
|
llms-txt bulk-transform --docs ./wiki
|
167
278
|
|
168
|
-
#
|
279
|
+
# Transform in-place using config file
|
169
280
|
llms-txt bulk-transform --config karafka-config.yml
|
281
|
+
|
282
|
+
# Verbose output to see processing details
|
283
|
+
llms-txt bulk-transform --config llms-txt.yml --verbose
|
170
284
|
```
|
171
285
|
|
172
286
|
### Example Config for Bulk Transformation
|
@@ -183,7 +297,7 @@ excludes:
|
|
183
297
|
- "**/old-docs/**" # Skip legacy documentation
|
184
298
|
```
|
185
299
|
|
186
|
-
### Example Output
|
300
|
+
### Example Output (Default Suffix)
|
187
301
|
|
188
302
|
With the config above, these files:
|
189
303
|
```
|
@@ -213,10 +327,26 @@ wiki/
|
|
213
327
|
└── internal.md ← Excluded, no .llm.md version
|
214
328
|
```
|
215
329
|
|
330
|
+
### Example Output (In-Place Transformation)
|
331
|
+
|
332
|
+
With `suffix: ""`, the original files are overwritten:
|
333
|
+
```
|
334
|
+
wiki/
|
335
|
+
├── Home.md ← Transformed in-place
|
336
|
+
├── getting-started.md ← Transformed in-place
|
337
|
+
├── api/
|
338
|
+
│ ├── consumers.md ← Transformed in-place
|
339
|
+
│ └── producers.md ← Transformed in-place
|
340
|
+
└── private/
|
341
|
+
└── internal.md ← Excluded from transformation
|
342
|
+
```
|
343
|
+
|
216
344
|
## Serving LLM-Friendly Documentation
|
217
345
|
|
218
346
|
After using `bulk-transform` to create `.llm.md` versions of your documentation, you can configure your web server to automatically serve these LLM-optimized versions to AI bots while showing the original versions to human visitors.
|
219
347
|
|
348
|
+
> **Note:** This section applies when using the default `suffix: .llm` behavior. If you're using `suffix: ""` for in-place transformation, the markdown files are already LLM-optimized and can be served directly.
|
349
|
+
|
220
350
|
### How It Works
|
221
351
|
|
222
352
|
The strategy is simple:
|
@@ -341,7 +471,7 @@ export default {
|
|
341
471
|
|
342
472
|
### Custom Suffix
|
343
473
|
|
344
|
-
If you used a different suffix with the `bulk-transform` command (e.g.,
|
474
|
+
If you used a different suffix with the `bulk-transform` command (e.g., `suffix: .ai`), update your web server rules accordingly.
|
345
475
|
|
346
476
|
**Apache:**
|
347
477
|
```apache
|
@@ -358,24 +488,6 @@ rewrite ^(.*)\.md$ $1.ai.md last;
|
|
358
488
|
const llmPath = url.pathname.replace(/\.md$/, '.ai.md');
|
359
489
|
```
|
360
490
|
|
361
|
-
### Example Setup
|
362
|
-
|
363
|
-
```yaml
|
364
|
-
# llms-txt.yml
|
365
|
-
docs: ./docs
|
366
|
-
base_url: https://myproject.io
|
367
|
-
suffix: .llm
|
368
|
-
convert_urls: true
|
369
|
-
```
|
370
|
-
|
371
|
-
```bash
|
372
|
-
# Generate LLM-friendly versions
|
373
|
-
llms-txt bulk-transform --config llms-txt.yml
|
374
|
-
|
375
|
-
# Deploy both original and .llm.md files to your web server
|
376
|
-
# The server will automatically serve the right version to each visitor
|
377
|
-
```
|
378
|
-
|
379
491
|
## Ruby API
|
380
492
|
|
381
493
|
### Basic Usage
|
@@ -410,7 +522,7 @@ transformed = LlmsTxt.transform_markdown('README.md',
|
|
410
522
|
convert_urls: true
|
411
523
|
)
|
412
524
|
|
413
|
-
# Bulk transform all files in directory
|
525
|
+
# Bulk transform all files in directory (creates .llm.md files)
|
414
526
|
transformed_files = LlmsTxt.bulk_transform('./wiki',
|
415
527
|
base_url: 'https://karafka.io',
|
416
528
|
suffix: '.llm',
|
@@ -418,6 +530,14 @@ transformed_files = LlmsTxt.bulk_transform('./wiki',
|
|
418
530
|
)
|
419
531
|
puts "Transformed #{transformed_files.size} files"
|
420
532
|
|
533
|
+
# Bulk transform in-place (overwrites original files)
|
534
|
+
transformed_files = LlmsTxt.bulk_transform('./wiki',
|
535
|
+
base_url: 'https://karafka.io',
|
536
|
+
suffix: '', # Empty string for in-place transformation
|
537
|
+
convert_urls: true,
|
538
|
+
excludes: ['**/private/**']
|
539
|
+
)
|
540
|
+
|
421
541
|
# Bulk transform with config file
|
422
542
|
transformed_files = LlmsTxt.bulk_transform('./wiki',
|
423
543
|
config_file: 'karafka-config.yml'
|
data/bin/rspecs
ADDED
@@ -47,9 +47,7 @@ module LlmsTxt
|
|
47
47
|
#
|
48
48
|
# @return [Array<String>] paths of transformed files
|
49
49
|
def transform_all
|
50
|
-
unless File.directory?(docs_path)
|
51
|
-
raise Errors::GenerationError, "Directory not found: #{docs_path}"
|
52
|
-
end
|
50
|
+
raise Errors::GenerationError, "Directory not found: #{docs_path}" unless File.directory?(docs_path)
|
53
51
|
|
54
52
|
markdown_files = find_markdown_files
|
55
53
|
transformed_files = []
|
data/lib/llms_txt/cli.rb
CHANGED
@@ -3,11 +3,30 @@
|
|
3
3
|
require 'optparse'
|
4
4
|
|
5
5
|
module LlmsTxt
|
6
|
+
# Command-line interface for llms-txt gem
|
7
|
+
#
|
8
|
+
# Provides commands for generating, transforming, parsing, and validating llms.txt files.
|
9
|
+
# All file paths must be specified using flags (-d/--docs) for consistency.
|
10
|
+
#
|
11
|
+
# @example Run the CLI
|
12
|
+
# LlmsTxt::CLI.run(['generate', '--docs', './docs', '--output', 'llms.txt'])
|
13
|
+
#
|
14
|
+
# @api public
|
6
15
|
class CLI
|
16
|
+
# Run the CLI with given arguments
|
17
|
+
#
|
18
|
+
# @param argv [Array<String>] command-line arguments (defaults to ARGV)
|
7
19
|
def self.run(argv = ARGV)
|
8
20
|
new.run(argv)
|
9
21
|
end
|
10
22
|
|
23
|
+
# Execute CLI command with error handling
|
24
|
+
#
|
25
|
+
# Parses command-line arguments and delegates to appropriate command handler.
|
26
|
+
# Handles all LlmsTxt errors gracefully with user-friendly messages.
|
27
|
+
#
|
28
|
+
# @param argv [Array<String>] command-line arguments
|
29
|
+
# @raise [SystemExit] exits with status 1 on error
|
11
30
|
def run(argv)
|
12
31
|
options = parse_options(argv)
|
13
32
|
|
@@ -40,6 +59,12 @@ module LlmsTxt
|
|
40
59
|
|
41
60
|
private
|
42
61
|
|
62
|
+
# Parse command-line options using OptionParser
|
63
|
+
#
|
64
|
+
# Extracts command and options from argv. First non-flag argument is treated as command name.
|
65
|
+
#
|
66
|
+
# @param argv [Array<String>] command-line arguments
|
67
|
+
# @return [Hash] parsed options including :command, :config, :docs, :output, :verbose
|
43
68
|
def parse_options(argv)
|
44
69
|
options = {
|
45
70
|
command: argv.first&.match?(/^[a-z-]+$/) ? argv.shift : nil
|
@@ -87,10 +112,20 @@ module LlmsTxt
|
|
87
112
|
end
|
88
113
|
end.parse!(argv)
|
89
114
|
|
90
|
-
options[:file_path] = argv.first if argv.any?
|
91
115
|
options
|
92
116
|
end
|
93
117
|
|
118
|
+
# Generate llms.txt from documentation directory or file
|
119
|
+
#
|
120
|
+
# Loads configuration, merges with CLI options, generates llms.txt content,
|
121
|
+
# and optionally validates the output.
|
122
|
+
#
|
123
|
+
# @param options [Hash] command options from parse_options
|
124
|
+
# @option options [String] :config path to config file
|
125
|
+
# @option options [String] :docs path to documentation
|
126
|
+
# @option options [String] :output output file path
|
127
|
+
# @option options [Boolean] :verbose enable verbose output
|
128
|
+
# @raise [SystemExit] exits with status 1 if docs path not found
|
94
129
|
def generate(options)
|
95
130
|
# Load config and merge with CLI options
|
96
131
|
config = LlmsTxt::Config.new(options[:config])
|
@@ -111,26 +146,38 @@ module LlmsTxt
|
|
111
146
|
File.write(output_path, content)
|
112
147
|
puts "Successfully generated #{output_path}"
|
113
148
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
149
|
+
return unless merged_options[:verbose]
|
150
|
+
|
151
|
+
validator = LlmsTxt::Validator.new(content)
|
152
|
+
if validator.valid?
|
153
|
+
puts 'Valid llms.txt format'
|
154
|
+
else
|
155
|
+
puts 'Validation warnings:'
|
156
|
+
validator.errors.each { |error| puts " - #{error}" }
|
122
157
|
end
|
123
158
|
end
|
124
159
|
|
160
|
+
# Transform markdown file to be AI-friendly
|
161
|
+
#
|
162
|
+
# Expands relative links to absolute URLs and optionally converts HTML URLs to markdown format.
|
163
|
+
#
|
164
|
+
# @param options [Hash] command options from parse_options
|
165
|
+
# @option options [String] :config path to config file
|
166
|
+
# @option options [String] :docs path to markdown file (required)
|
167
|
+
# @option options [String] :output output file path
|
168
|
+
# @option options [String] :base_url base URL for link expansion
|
169
|
+
# @option options [Boolean] :convert_urls convert .html to .md
|
170
|
+
# @option options [Boolean] :verbose enable verbose output
|
171
|
+
# @raise [SystemExit] exits with status 1 if file not found or -d flag missing
|
125
172
|
def transform(options)
|
126
173
|
# Load config and merge with CLI options
|
127
174
|
config = LlmsTxt::Config.new(options[:config])
|
128
175
|
merged_options = config.merge_with_options(options)
|
129
176
|
|
130
|
-
file_path =
|
177
|
+
file_path = merged_options[:docs]
|
131
178
|
|
132
179
|
unless file_path
|
133
|
-
puts
|
180
|
+
puts 'File path required for transform command (use -d/--docs)'
|
134
181
|
exit 1
|
135
182
|
end
|
136
183
|
|
@@ -151,6 +198,19 @@ module LlmsTxt
|
|
151
198
|
end
|
152
199
|
end
|
153
200
|
|
201
|
+
# Transform all markdown files in directory recursively
|
202
|
+
#
|
203
|
+
# Creates AI-friendly versions of all markdown files with configurable suffix and exclusions.
|
204
|
+
#
|
205
|
+
# @param options [Hash] command options from parse_options
|
206
|
+
# @option options [String] :config path to config file
|
207
|
+
# @option options [String] :docs path to documentation directory (required)
|
208
|
+
# @option options [String] :suffix suffix for transformed files (default: '.llm')
|
209
|
+
# @option options [Array<String>] :excludes glob patterns to exclude
|
210
|
+
# @option options [String] :base_url base URL for link expansion
|
211
|
+
# @option options [Boolean] :convert_urls convert .html to .md
|
212
|
+
# @option options [Boolean] :verbose enable verbose output
|
213
|
+
# @raise [SystemExit] exits with status 1 if directory not found or transformation fails
|
154
214
|
def bulk_transform(options)
|
155
215
|
# Load config and merge with CLI options
|
156
216
|
config = LlmsTxt::Config.new(options[:config])
|
@@ -170,16 +230,23 @@ module LlmsTxt
|
|
170
230
|
|
171
231
|
puts "Bulk transforming markdown files in #{docs_path}..." if merged_options[:verbose]
|
172
232
|
puts "Using suffix: #{merged_options[:suffix]}" if merged_options[:verbose]
|
173
|
-
|
233
|
+
if merged_options[:verbose] && !merged_options[:excludes].empty?
|
234
|
+
puts "Excludes: #{merged_options[:excludes].join(', ')}"
|
235
|
+
end
|
174
236
|
|
175
237
|
begin
|
176
238
|
transformed_files = LlmsTxt.bulk_transform(docs_path, merged_options)
|
177
239
|
|
178
240
|
if transformed_files.empty?
|
179
|
-
puts
|
241
|
+
puts 'No markdown files found to transform'
|
180
242
|
else
|
181
243
|
puts "Successfully transformed #{transformed_files.size} files:"
|
182
|
-
|
244
|
+
# verbose mode already shows progress
|
245
|
+
unless merged_options[:verbose]
|
246
|
+
transformed_files.each do |file|
|
247
|
+
puts " #{file}"
|
248
|
+
end
|
249
|
+
end
|
183
250
|
end
|
184
251
|
rescue LlmsTxt::Errors::BaseError => e
|
185
252
|
puts "Error during bulk transformation: #{e.message}"
|
@@ -187,8 +254,22 @@ module LlmsTxt
|
|
187
254
|
end
|
188
255
|
end
|
189
256
|
|
257
|
+
# Parse existing llms.txt file and display information
|
258
|
+
#
|
259
|
+
# Reads and parses llms.txt file, displaying title, description, and links.
|
260
|
+
# Defaults to 'llms.txt' in current directory if no file specified.
|
261
|
+
#
|
262
|
+
# @param options [Hash] command options from parse_options
|
263
|
+
# @option options [String] :config path to config file
|
264
|
+
# @option options [String] :docs path to llms.txt file (defaults to 'llms.txt')
|
265
|
+
# @option options [Boolean] :verbose enable verbose output with link counts
|
266
|
+
# @raise [SystemExit] exits with status 1 if file not found
|
190
267
|
def parse(options)
|
191
|
-
|
268
|
+
# Load config and merge with CLI options
|
269
|
+
config = LlmsTxt::Config.new(options[:config])
|
270
|
+
merged_options = config.merge_with_options(options)
|
271
|
+
|
272
|
+
file_path = merged_options[:docs] || 'llms.txt'
|
192
273
|
|
193
274
|
unless File.exist?(file_path)
|
194
275
|
puts "File not found: #{file_path}"
|
@@ -203,13 +284,26 @@ module LlmsTxt
|
|
203
284
|
puts "Documentation Links: #{parsed.documentation_links.size}"
|
204
285
|
puts "Example Links: #{parsed.example_links.size}" if parsed.respond_to?(:example_links)
|
205
286
|
puts "Optional Links: #{parsed.optional_links.size}" if parsed.respond_to?(:optional_links)
|
206
|
-
|
207
|
-
puts parsed.to_xml
|
287
|
+
elsif parsed.respond_to?(:to_xml)
|
288
|
+
puts parsed.to_xml
|
208
289
|
end
|
209
290
|
end
|
210
291
|
|
292
|
+
# Validate llms.txt file format
|
293
|
+
#
|
294
|
+
# Checks if llms.txt file follows proper format with title, description, and documentation links.
|
295
|
+
# Defaults to 'llms.txt' in current directory if no file specified.
|
296
|
+
#
|
297
|
+
# @param options [Hash] command options from parse_options
|
298
|
+
# @option options [String] :config path to config file
|
299
|
+
# @option options [String] :docs path to llms.txt file (defaults to 'llms.txt')
|
300
|
+
# @raise [SystemExit] exits with status 1 if file not found or invalid
|
211
301
|
def validate(options)
|
212
|
-
|
302
|
+
# Load config and merge with CLI options
|
303
|
+
config = LlmsTxt::Config.new(options[:config])
|
304
|
+
merged_options = config.merge_with_options(options)
|
305
|
+
|
306
|
+
file_path = merged_options[:docs] || 'llms.txt'
|
213
307
|
|
214
308
|
unless File.exist?(file_path)
|
215
309
|
puts "File not found: #{file_path}"
|
@@ -231,6 +325,8 @@ module LlmsTxt
|
|
231
325
|
end
|
232
326
|
end
|
233
327
|
|
328
|
+
# Display version information
|
329
|
+
#
|
234
330
|
def show_version
|
235
331
|
puts "llms-txt version #{LlmsTxt::VERSION}"
|
236
332
|
end
|
data/lib/llms_txt/config.rb
CHANGED
@@ -62,8 +62,11 @@ module LlmsTxt
|
|
62
62
|
title: options[:title] || self['title'],
|
63
63
|
description: options[:description] || self['description'],
|
64
64
|
output: options[:output] || self['output'] || 'llms.txt',
|
65
|
-
convert_urls: options.key?(:convert_urls)
|
66
|
-
|
65
|
+
convert_urls: if options.key?(:convert_urls)
|
66
|
+
options[:convert_urls]
|
67
|
+
else
|
68
|
+
self['convert_urls'] || false
|
69
|
+
end,
|
67
70
|
verbose: options.key?(:verbose) ? options[:verbose] : (self['verbose'] || false),
|
68
71
|
# Bulk transformation options
|
69
72
|
suffix: options[:suffix] || self['suffix'] || '.llm',
|
data/lib/llms_txt/generator.rb
CHANGED
@@ -43,7 +43,7 @@ module LlmsTxt
|
|
43
43
|
|
44
44
|
content = build_llms_txt(docs)
|
45
45
|
|
46
|
-
if output_path = options[:output]
|
46
|
+
if (output_path = options[:output])
|
47
47
|
File.write(output_path, content)
|
48
48
|
end
|
49
49
|
|
@@ -95,10 +95,10 @@ module LlmsTxt
|
|
95
95
|
def analyze_file(file_path)
|
96
96
|
# Handle single file case differently
|
97
97
|
relative_path = if File.file?(docs_path)
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
98
|
+
File.basename(file_path)
|
99
|
+
else
|
100
|
+
Pathname.new(file_path).relative_path_from(Pathname.new(docs_path)).to_s
|
101
|
+
end
|
102
102
|
|
103
103
|
content = File.read(file_path)
|
104
104
|
|
@@ -120,7 +120,7 @@ module LlmsTxt
|
|
120
120
|
def extract_title(content, file_path)
|
121
121
|
# Try to extract title from first # header
|
122
122
|
if content.match(/^#\s+(.+)/)
|
123
|
-
|
123
|
+
::Regexp.last_match(1).strip
|
124
124
|
else
|
125
125
|
# Use filename as fallback
|
126
126
|
File.basename(file_path, '.md').gsub(/[_-]/, ' ').split.map(&:capitalize).join(' ')
|
@@ -176,25 +176,25 @@ module LlmsTxt
|
|
176
176
|
|
177
177
|
content = []
|
178
178
|
content << "# #{title}"
|
179
|
-
content <<
|
179
|
+
content << ''
|
180
180
|
content << "> #{description}" if description
|
181
|
-
content <<
|
181
|
+
content << ''
|
182
182
|
|
183
183
|
if docs.any?
|
184
|
-
content <<
|
185
|
-
content <<
|
184
|
+
content << '## Documentation'
|
185
|
+
content << ''
|
186
186
|
|
187
187
|
docs.each do |doc|
|
188
188
|
url = build_url(doc[:path])
|
189
|
-
if doc[:description] && !doc[:description].empty?
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
189
|
+
content << if doc[:description] && !doc[:description].empty?
|
190
|
+
"- [#{doc[:title]}](#{url}): #{doc[:description]}"
|
191
|
+
else
|
192
|
+
"- [#{doc[:title]}](#{url})"
|
193
|
+
end
|
194
194
|
end
|
195
195
|
end
|
196
196
|
|
197
|
-
content.join("\n")
|
197
|
+
"#{content.join("\n")}\n"
|
198
198
|
end
|
199
199
|
|
200
200
|
# Attempts to detect project title from README or directory name
|
@@ -224,7 +224,7 @@ module LlmsTxt
|
|
224
224
|
# @param path [String] relative path to file
|
225
225
|
# @return [String] full URL or relative path
|
226
226
|
def build_url(path)
|
227
|
-
if base_url = options[:base_url]
|
227
|
+
if (base_url = options[:base_url])
|
228
228
|
File.join(base_url, path)
|
229
229
|
else
|
230
230
|
path
|
@@ -61,8 +61,8 @@ module LlmsTxt
|
|
61
61
|
base_url = options[:base_url]
|
62
62
|
|
63
63
|
content.gsub(/\[([^\]]+)\]\(([^)]+)\)/) do |match|
|
64
|
-
text =
|
65
|
-
url =
|
64
|
+
text = ::Regexp.last_match(1)
|
65
|
+
url = ::Regexp.last_match(2)
|
66
66
|
|
67
67
|
if url.start_with?('http://', 'https://', '//', '#')
|
68
68
|
match # Already absolute or anchor
|
data/lib/llms_txt/validator.rb
CHANGED
@@ -80,9 +80,7 @@ module LlmsTxt
|
|
80
80
|
def validate_required_sections
|
81
81
|
lines = content.lines
|
82
82
|
|
83
|
-
unless lines.first&.start_with?('# ')
|
84
|
-
errors << 'Missing required H1 title (must start with "# ")'
|
85
|
-
end
|
83
|
+
errors << 'Missing required H1 title (must start with "# ")' unless lines.first&.start_with?('# ')
|
86
84
|
|
87
85
|
return unless lines.first&.strip&.length.to_i > 80
|
88
86
|
|
@@ -153,9 +151,7 @@ module LlmsTxt
|
|
153
151
|
lib/
|
154
152
|
).*$
|
155
153
|
}x
|
156
|
-
unless url =~ url_pattern
|
157
|
-
errors << "Invalid URL format: #{url}"
|
158
|
-
end
|
154
|
+
errors << "Invalid URL format: #{url}" unless url =~ url_pattern
|
159
155
|
end
|
160
156
|
end
|
161
157
|
|
@@ -192,9 +188,9 @@ module LlmsTxt
|
|
192
188
|
#
|
193
189
|
# Warns about non-HTTPS URLs and URLs containing spaces
|
194
190
|
def validate_links
|
195
|
-
|
191
|
+
urls = content.scan(/\[([^\]]+)\]\(([^)]+)\)/).map(&:last)
|
196
192
|
|
197
|
-
|
193
|
+
urls.each do |url|
|
198
194
|
if url.start_with?('http') && !url.start_with?('https')
|
199
195
|
errors << "Non-HTTPS URL found: #{url} (consider using HTTPS)"
|
200
196
|
end
|
@@ -207,9 +203,7 @@ module LlmsTxt
|
|
207
203
|
#
|
208
204
|
# Enforces 50KB file size limit and 120 character line length limit
|
209
205
|
def validate_file_size
|
210
|
-
if content.bytesize > MAX_FILE_SIZE
|
211
|
-
errors << "File size exceeds maximum (#{MAX_FILE_SIZE} bytes)"
|
212
|
-
end
|
206
|
+
errors << "File size exceeds maximum (#{MAX_FILE_SIZE} bytes)" if content.bytesize > MAX_FILE_SIZE
|
213
207
|
|
214
208
|
lines = content.lines
|
215
209
|
lines.each_with_index do |line, index|
|
data/lib/llms_txt/version.rb
CHANGED
data/lib/llms_txt.rb
CHANGED
data/renovate.json
CHANGED
@@ -10,10 +10,13 @@
|
|
10
10
|
"enabled": true,
|
11
11
|
"pinDigests": true
|
12
12
|
},
|
13
|
-
"ruby": {
|
14
|
-
"enabled": true
|
15
|
-
},
|
16
13
|
"packageRules": [
|
14
|
+
{
|
15
|
+
"matchCategories": [
|
16
|
+
"ruby"
|
17
|
+
],
|
18
|
+
"enabled": true
|
19
|
+
},
|
17
20
|
{
|
18
21
|
"matchManagers": [
|
19
22
|
"github-actions"
|
@@ -27,4 +30,4 @@
|
|
27
30
|
"minimumReleaseAge": "7 days"
|
28
31
|
}
|
29
32
|
]
|
30
|
-
}
|
33
|
+
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llms-txt-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maciej Mensfeld
|
@@ -104,6 +104,7 @@ email:
|
|
104
104
|
- maciej@mensfeld.pl
|
105
105
|
executables:
|
106
106
|
- llms-txt
|
107
|
+
- rspecs
|
107
108
|
extensions: []
|
108
109
|
extra_rdoc_files: []
|
109
110
|
files:
|
@@ -119,6 +120,7 @@ files:
|
|
119
120
|
- README.md
|
120
121
|
- Rakefile
|
121
122
|
- bin/llms-txt
|
123
|
+
- bin/rspecs
|
122
124
|
- lib/llms_txt.rb
|
123
125
|
- lib/llms_txt/bulk_transformer.rb
|
124
126
|
- lib/llms_txt/cli.rb
|