llms-txt-ruby 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 57be54434134c87cc4939264861e7e64f1b721aeef9e52ebe11080b3f1ab08d1
4
- data.tar.gz: d413c3a1902cd259f9af274aefbcaf65df9714ba7e6ae958de2afc30101b4778
3
+ metadata.gz: daed7a0ad2c84969d85309328b3bbb773a43b20f70f8b9f610aa8bdcbe0a0510
4
+ data.tar.gz: '0795721bd23fe13fdcfeac9745ca7bbe0ce49a1a56ddc62a07fba49a047de65d'
5
5
  SHA512:
6
- metadata.gz: 43ab2bcc770fb9f0486363b4904f006d6b462a6adfae1b9e7d05d8a1aed5c2b4356f02ca2737ca4c6e1d0a333f132d621d5cbb1033a28afcff8c8bafcd690b4a
7
- data.tar.gz: ba760b69ca401f1b3cf0e5ec14a9e76bef92e86c5804a040b03b203aa6af1e39361cbe8fb5e8fbb84ecbf48d557cb3ccd49e194f0e7e2eaeb872f0f7f07a171b
6
+ metadata.gz: a8e4943867227ae1f031fedce39c7b30e46180295af86a99cda9f40404023127355ee23130a769d7bafe326c2dcce0aa9be694d9b11cdb0ea436305514443047
7
+ data.tar.gz: 2943403f4a90f1dfab03ec1e106ceeec7e5534ed61b7cf1ae26b16461ea24642b52a413552eb0cd508a810b59902541ee922ef3eacbbacab0b547da9b88a8e47
@@ -0,0 +1,71 @@
1
+ name: CI
2
+
3
+ concurrency:
4
+ group: ${{ github.workflow }}-${{ github.ref }}
5
+ cancel-in-progress: true
6
+
7
+ on:
8
+ pull_request:
9
+ branches: [ master ]
10
+ push:
11
+ branches: [ master ]
12
+ schedule:
13
+ - cron: '0 1 * * *'
14
+
15
+ permissions:
16
+ contents: read
17
+
18
+ jobs:
19
+ specs:
20
+ timeout-minutes: 15
21
+ runs-on: ubuntu-latest
22
+ strategy:
23
+ fail-fast: false
24
+ matrix:
25
+ ruby:
26
+ - '3.4'
27
+ - '3.3'
28
+ - '3.2'
29
+ include:
30
+ - ruby: '3.4'
31
+ coverage: 'true'
32
+ steps:
33
+ - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
34
+ with:
35
+ fetch-depth: 0
36
+
37
+ - name: Set up Ruby
38
+ uses: ruby/setup-ruby@v1
39
+ with:
40
+ ruby-version: ${{ matrix.ruby }}
41
+ bundler-cache: true
42
+ bundler: 'latest'
43
+
44
+ - name: Install latest bundler
45
+ run: |
46
+ gem install bundler --no-document
47
+ bundle config set without 'tools benchmarks docs'
48
+
49
+ - name: Bundle install
50
+ run: bundle install --jobs 4 --retry 3
51
+
52
+ - name: Run all tests
53
+ env:
54
+ GITHUB_COVERAGE: ${{ matrix.coverage }}
55
+ run: bundle exec rspec
56
+
57
+
58
+ ci-success:
59
+ name: CI Success
60
+ runs-on: ubuntu-latest
61
+ if: always()
62
+ needs:
63
+ - specs
64
+ steps:
65
+ - name: Check all jobs passed
66
+ if: |
67
+ contains(needs.*.result, 'failure') ||
68
+ contains(needs.*.result, 'cancelled') ||
69
+ contains(needs.*.result, 'skipped')
70
+ run: exit 1
71
+ - run: echo "All CI checks passed!"
@@ -0,0 +1,35 @@
1
+ name: Push Gem
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - v*
7
+
8
+ permissions:
9
+ contents: read
10
+
11
+ jobs:
12
+ push:
13
+ if: github.repository_owner == 'mensfeld'
14
+ runs-on: ubuntu-latest
15
+ environment: deployment
16
+
17
+ permissions:
18
+ contents: write
19
+ id-token: write
20
+
21
+ steps:
22
+ - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
23
+ with:
24
+ fetch-depth: 0
25
+
26
+ - name: Set up Ruby
27
+ uses: ruby/setup-ruby@0481980f17b760ef6bca5e8c55809102a0af1e5a # v1.263.0
28
+ with:
29
+ bundler-cache: false
30
+
31
+ - name: Bundle install
32
+ run: |
33
+ bundle install --jobs 4 --retry 3
34
+
35
+ - uses: rubygems/release-gem@a25424ba2ba8b387abc8ef40807c2c85b96cbe32 # v1.1.1
data/.gitignore CHANGED
@@ -11,7 +11,8 @@
11
11
  /tmp/
12
12
 
13
13
  # Used by dotenv library to load environment variables.
14
- # .env
14
+ .env
15
+ .env.*
15
16
 
16
17
  # Ignore Byebug command history file.
17
18
  .byebug_history
@@ -54,3 +55,11 @@ build-iPhoneSimulator/
54
55
 
55
56
  # Used by RuboCop. Remote config files pulled in from inherit_from directive.
56
57
  # .rubocop-https?--*
58
+
59
+ # Project-specific generated files
60
+ llms.txt
61
+ *-output.txt
62
+
63
+ # Config files that might contain sensitive data
64
+ llms-txt.yml
65
+ .llms-txt.yml
data/.rubocop.yml ADDED
@@ -0,0 +1,27 @@
1
+ AllCops:
2
+ TargetRubyVersion: 3.1
3
+ NewCops: enable
4
+
5
+ Style/Documentation:
6
+ Enabled: false
7
+
8
+ Style/StringLiterals:
9
+ EnforcedStyle: single_quotes
10
+
11
+ Layout/LineLength:
12
+ Max: 120
13
+
14
+ Metrics/ClassLength:
15
+ Max: 150
16
+
17
+ Metrics/MethodLength:
18
+ Max: 20
19
+
20
+ Metrics/AbcSize:
21
+ Max: 20
22
+
23
+ Metrics/CyclomaticComplexity:
24
+ Max: 10
25
+
26
+ Style/FrozenStringLiteralComment:
27
+ Enabled: true
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 3.4.5
data/Gemfile ADDED
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ gemspec
6
+
7
+ group :development do
8
+ gem 'pry'
9
+ gem 'pry-byebug'
10
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,88 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ llms-txt-ruby (0.1.0)
5
+ zeitwerk (~> 2.6)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ ast (2.4.3)
11
+ byebug (12.0.0)
12
+ coderay (1.1.3)
13
+ diff-lcs (1.6.2)
14
+ docile (1.4.1)
15
+ json (2.13.2)
16
+ language_server-protocol (3.17.0.5)
17
+ lint_roller (1.1.0)
18
+ method_source (1.1.0)
19
+ parallel (1.27.0)
20
+ parser (3.3.9.0)
21
+ ast (~> 2.4.1)
22
+ racc
23
+ prism (1.4.0)
24
+ pry (0.15.2)
25
+ coderay (~> 1.1)
26
+ method_source (~> 1.0)
27
+ pry-byebug (3.11.0)
28
+ byebug (~> 12.0)
29
+ pry (>= 0.13, < 0.16)
30
+ racc (1.8.1)
31
+ rainbow (3.1.1)
32
+ rake (13.3.0)
33
+ regexp_parser (2.11.2)
34
+ rspec (3.13.1)
35
+ rspec-core (~> 3.13.0)
36
+ rspec-expectations (~> 3.13.0)
37
+ rspec-mocks (~> 3.13.0)
38
+ rspec-core (3.13.5)
39
+ rspec-support (~> 3.13.0)
40
+ rspec-expectations (3.13.5)
41
+ diff-lcs (>= 1.2.0, < 2.0)
42
+ rspec-support (~> 3.13.0)
43
+ rspec-mocks (3.13.5)
44
+ diff-lcs (>= 1.2.0, < 2.0)
45
+ rspec-support (~> 3.13.0)
46
+ rspec-support (3.13.5)
47
+ rubocop (1.80.0)
48
+ json (~> 2.3)
49
+ language_server-protocol (~> 3.17.0.2)
50
+ lint_roller (~> 1.1.0)
51
+ parallel (~> 1.10)
52
+ parser (>= 3.3.0.2)
53
+ rainbow (>= 2.2.2, < 4.0)
54
+ regexp_parser (>= 2.9.3, < 3.0)
55
+ rubocop-ast (>= 1.46.0, < 2.0)
56
+ ruby-progressbar (~> 1.7)
57
+ unicode-display_width (>= 2.4.0, < 4.0)
58
+ rubocop-ast (1.46.0)
59
+ parser (>= 3.3.7.2)
60
+ prism (~> 1.4)
61
+ ruby-progressbar (1.13.0)
62
+ simplecov (0.22.0)
63
+ docile (~> 1.1)
64
+ simplecov-html (~> 0.11)
65
+ simplecov_json_formatter (~> 0.1)
66
+ simplecov-html (0.13.2)
67
+ simplecov_json_formatter (0.1.4)
68
+ unicode-display_width (3.1.5)
69
+ unicode-emoji (~> 4.0, >= 4.0.4)
70
+ unicode-emoji (4.0.4)
71
+ zeitwerk (2.7.3)
72
+
73
+ PLATFORMS
74
+ ruby
75
+ x86_64-linux
76
+
77
+ DEPENDENCIES
78
+ bundler (~> 2.0)
79
+ llms-txt-ruby!
80
+ pry
81
+ pry-byebug
82
+ rake (~> 13.0)
83
+ rspec (~> 3.0)
84
+ rubocop (~> 1.0)
85
+ simplecov (~> 0.21)
86
+
87
+ BUNDLED WITH
88
+ 2.7.1
data/README.md CHANGED
@@ -1,23 +1,30 @@
1
1
  # llms-txt-ruby
2
2
 
3
- > ⚠️ **Work in Progress** - This gem is currently under active development and not yet ready for any use.
3
+ [![CI](https://github.com/mensfeld/llms-txt-ruby/actions/workflows/ci.yml/badge.svg)](
4
+ https://github.com/mensfeld/llms-txt-ruby/actions/workflows/ci.yml)
4
5
 
5
- A Ruby gem that automatically generates [llms.txt](https://llmstxt.org/) files for Ruby projects using AI. This gem analyzes your Ruby codebase, extracts documentation from YARD comments, README files, and gemspec metadata, then uses a Large Language Model to create a properly formatted llms.txt file following the official specification.
6
+ A Ruby tool for generating [llms.txt](https://llmstxt.org/) files from existing markdown
7
+ documentation. Transform your docs to be AI-friendly.
6
8
 
7
9
  ## What is llms.txt?
8
10
 
9
- The llms.txt file is a proposed standard for providing LLM-friendly content on websites. It offers brief background information, guidance, and links to detailed markdown files, helping Large Language Models understand and navigate your project more effectively.
11
+ The llms.txt file is a proposed standard for providing LLM-friendly content on websites. It
12
+ offers brief background information, guidance, and links to detailed markdown files, helping
13
+ Large Language Models understand and navigate your project more effectively.
10
14
 
11
15
  Learn more at [llmstxt.org](https://llmstxt.org/).
12
16
 
13
- ## Features
17
+ ## What This Tool Does
14
18
 
15
- - 🤖 **AI-powered generation**: Uses Claude or GPT models to create natural, comprehensive llms.txt files
16
- - 📚 **YARD integration**: Extracts rich documentation from YARD comments and tags
17
- - 🔧 **Configurable**: Supports multiple LLM providers and customizable options
18
- - 🖥️ **CLI + API**: Use from command line or integrate into your Ruby applications
19
- - 📁 **Project awareness**: Understands Ruby project structure and conventions
20
- - 🎯 **Spec compliant**: Generates files that strictly follow the llms.txt specification
19
+ This library converts existing human-first documentation into LLM-friendly formats:
20
+
21
+ 1. **Generates llms.txt** - Transforms your existing markdown documentation into a structured
22
+ overview that helps LLMs understand your project's layout and find relevant information
23
+ 2. **Transforms markdown** - Converts individual markdown files from human-readable format to
24
+ AI-optimized format by expanding relative links to absolute URLs and normalizing link
25
+ structures
26
+ 3. **Bulk transforms** - Processes all markdown files in a directory recursively, creating
27
+ LLM-friendly versions alongside originals with customizable exclusion patterns
21
28
 
22
29
  ## Installation
23
30
 
@@ -39,38 +46,280 @@ Or install it yourself as:
39
46
  $ gem install llms-txt-ruby
40
47
  ```
41
48
 
42
- ## Example Output
49
+ ## Quick Start
43
50
 
44
- Here's what a generated llms.txt file might look like:
51
+ ### Option 1: Using Config File (Recommended)
45
52
 
46
- ```markdown
47
- # MyAwesomeGem
53
+ Create a `llms-txt.yml` file in your project root:
48
54
 
49
- > MyAwesomeGem is a Ruby library for processing data with advanced algorithms and providing a clean API for developers.
55
+ ```yaml
56
+ # llms-txt.yml
57
+ docs: ./docs
58
+ base_url: https://myproject.io
59
+ title: My Awesome Project
60
+ description: A Ruby library that helps developers build amazing applications
61
+ output: llms.txt
62
+ convert_urls: true
63
+ verbose: false
64
+ ```
50
65
 
51
- This gem provides a comprehensive toolkit for data processing, featuring both synchronous and asynchronous processing capabilities. It includes built-in caching, error handling, and extensive configuration options.
66
+ Then simply run:
52
67
 
53
- ## Documentation
68
+ ```bash
69
+ llms-txt generate
70
+ ```
54
71
 
55
- - [Getting Started Guide](docs/getting_started.md): Quick introduction and basic usage examples
56
- - [API Documentation](https://rubydoc.info/gems/my_awesome_gem): Complete API reference
57
- - [Configuration Guide](docs/configuration.md): Detailed configuration options
72
+ ### Option 2: Using CLI Only
58
73
 
59
- ## Examples
74
+ ```bash
75
+ # Generate from docs directory
76
+ llms-txt generate --docs ./docs
60
77
 
61
- - [Basic Usage Examples](examples/basic_usage.rb): Simple examples to get started
62
- - [Advanced Patterns](examples/advanced_patterns.rb): Complex usage patterns and best practices
78
+ # Transform a single file
79
+ llms-txt transform README.md
63
80
 
64
- ## Optional
81
+ # Transform all markdown files in directory
82
+ llms-txt bulk-transform --docs ./docs
65
83
 
66
- - [Contributing Guidelines](CONTRIBUTING.md): How to contribute to this project
67
- - [Changelog](CHANGELOG.md): Version history and changes
84
+ # Use custom config file
85
+ llms-txt generate --config my-config.yml
68
86
  ```
69
87
 
70
- ## License
88
+ ## CLI Reference
89
+
90
+ ### Commands
91
+
92
+ ```bash
93
+ llms-txt generate [options] # Generate llms.txt from documentation (default)
94
+ llms-txt transform [file] # Transform a markdown file to be AI-friendly
95
+ llms-txt bulk-transform [options] # Transform all markdown files in directory
96
+ llms-txt parse [file] # Parse existing llms.txt file
97
+ llms-txt validate [file] # Validate llms.txt file
98
+ llms-txt version # Show version
99
+ ```
100
+
101
+ ### Options
102
+
103
+ ```bash
104
+ -c, --config PATH Configuration file path (default: llms-txt.yml)
105
+ -d, --docs PATH Path to documentation directory or file
106
+ -o, --output PATH Output file path
107
+ -v, --verbose Verbose output
108
+ -h, --help Show help message
109
+ ```
110
+
111
+ *For advanced options like base_url, title, description, and convert_urls, use a config file.*
112
+
113
+ ## Configuration File
114
+
115
+ The recommended way to use llms-txt is with a `llms-txt.yml` config file. This allows you to:
116
+
117
+ - ✅ Store all your settings in one place
118
+ - ✅ Version control your llms.txt configuration
119
+ - ✅ Avoid typing long CLI commands repeatedly
120
+ - ✅ Share configuration across team members
71
121
 
72
- The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
122
+ ### Config File Options
73
123
 
74
- ---
124
+ ```yaml
125
+ # Path to documentation directory or file
126
+ docs: ./docs
127
+
128
+ # Base URL for expanding relative links (optional)
129
+ base_url: https://myproject.io
130
+
131
+ # Project information (optional - auto-detected if not provided)
132
+ title: My Project Name
133
+ description: Brief description of what your project does
134
+
135
+ # Output file (optional, default: llms.txt)
136
+ output: llms.txt
137
+
138
+ # Transformation options (optional)
139
+ convert_urls: true # Convert .html links to .md
140
+ verbose: false # Enable verbose output
141
+ ```
142
+
143
+ The config file will be automatically found if named:
144
+ - `llms-txt.yml`
145
+ - `llms-txt.yaml`
146
+ - `.llms-txt.yml`
147
+
148
+ ## Bulk Transformation
149
+
150
+ The `bulk-transform` command processes all markdown files in a directory recursively, creating
151
+ AI-friendly versions alongside the originals. This is perfect for transforming entire
152
+ documentation trees.
153
+
154
+ ### Key Features
155
+
156
+ - **Recursive processing** - Finds and transforms all `.md` files in nested directories
157
+ - **Preserves structure** - Maintains your existing directory layout
158
+ - **Exclusion patterns** - Skip files/directories using glob patterns
159
+ - **Custom suffixes** - Choose how transformed files are named
160
+ - **LLM optimizations** - Expands relative links, converts HTML URLs, etc.
161
+
162
+ ### Usage
163
+
164
+ ```bash
165
+ # Transform all files with default settings
166
+ llms-txt bulk-transform --docs ./wiki
167
+
168
+ # Using config file (recommended for complex setups)
169
+ llms-txt bulk-transform --config karafka-config.yml
170
+ ```
171
+
172
+ ### Example Config for Bulk Transformation
173
+
174
+ ```yaml
175
+ # karafka-config.yml
176
+ docs: ./wiki
177
+ base_url: https://karafka.io
178
+ suffix: .llm
179
+ convert_urls: true
180
+ excludes:
181
+ - "**/private/**" # Skip private directories
182
+ - "**/draft-*.md" # Skip draft files
183
+ - "**/old-docs/**" # Skip legacy documentation
184
+ ```
185
+
186
+ ### Example Output
187
+
188
+ With the config above, these files:
189
+ ```
190
+ wiki/
191
+ ├── Home.md
192
+ ├── getting-started.md
193
+ ├── api/
194
+ │ ├── consumers.md
195
+ │ └── producers.md
196
+ └── private/
197
+ └── internal.md
198
+ ```
199
+
200
+ Become:
201
+ ```
202
+ wiki/
203
+ ├── Home.md
204
+ ├── Home.llm.md ← AI-friendly version
205
+ ├── getting-started.md
206
+ ├── getting-started.llm.md
207
+ ├── api/
208
+ │ ├── consumers.md
209
+ │ ├── consumers.llm.md
210
+ │ ├── producers.md
211
+ │ └── producers.llm.md
212
+ └── private/
213
+ └── internal.md ← Excluded, no .llm.md version
214
+ ```
215
+
216
+ ## Ruby API
217
+
218
+ ### Basic Usage
219
+
220
+ ```ruby
221
+ require 'llms_txt'
222
+
223
+ # Option 1: Using config file (recommended)
224
+ content = LlmsTxt.generate_from_docs(config_file: 'llms-txt.yml')
225
+
226
+ # Option 2: Direct options (overrides config)
227
+ content = LlmsTxt.generate_from_docs('./docs',
228
+ base_url: 'https://myproject.io',
229
+ title: 'My Project',
230
+ description: 'A great project'
231
+ )
232
+
233
+ # Option 3: Mix config file with overrides
234
+ content = LlmsTxt.generate_from_docs('./docs',
235
+ config_file: 'my-config.yml',
236
+ title: 'Override Title' # This overrides config file title
237
+ )
238
+
239
+ # Transform markdown with config
240
+ transformed = LlmsTxt.transform_markdown('README.md',
241
+ config_file: 'llms-txt.yml'
242
+ )
243
+
244
+ # Transform with direct options
245
+ transformed = LlmsTxt.transform_markdown('README.md',
246
+ base_url: 'https://myproject.io',
247
+ convert_urls: true
248
+ )
249
+
250
+ # Bulk transform all files in directory
251
+ transformed_files = LlmsTxt.bulk_transform('./wiki',
252
+ base_url: 'https://karafka.io',
253
+ suffix: '.llm',
254
+ excludes: ['**/private/**', '**/draft-*.md']
255
+ )
256
+ puts "Transformed #{transformed_files.size} files"
257
+
258
+ # Bulk transform with config file
259
+ transformed_files = LlmsTxt.bulk_transform('./wiki',
260
+ config_file: 'karafka-config.yml'
261
+ )
262
+
263
+ # Parse and validate (unchanged)
264
+ parsed = LlmsTxt.parse('llms.txt')
265
+ puts parsed.title
266
+ puts parsed.description
267
+
268
+ valid = LlmsTxt.validate(content)
269
+ ```
270
+
271
+ ## How It Works
272
+
273
+ ### Generation Process
274
+
275
+ 1. **Scan for markdown files** - Finds all `.md` files in specified directory
276
+ 2. **Extract metadata** - Gets title and description from each file
277
+ 3. **Prioritize docs** - Orders by importance (README first, then guides, APIs, etc.)
278
+ 4. **Build llms.txt** - Creates properly formatted output with links and descriptions
279
+
280
+ ### Transformation Process
281
+
282
+ 1. **Expand relative links** - Convert `./docs/api.md` to `https://myproject.io/docs/api.md`
283
+ 2. **Convert URLs** - Change `.html` links to `.md` for better AI understanding
284
+ 3. **Preserve content** - No content modification, just link processing
285
+
286
+ ### File Prioritization
287
+
288
+ When generating llms.txt, files are automatically prioritized:
289
+
290
+ 1. **README files** - Always listed first
291
+ 2. **Getting Started guides** - Quick start documentation
292
+ 3. **Guides and tutorials** - Step-by-step content
293
+ 4. **API references** - Technical documentation
294
+ 5. **Other files** - Everything else
295
+
296
+ ## Example Output
297
+
298
+ Given a `docs/` directory with:
299
+ - `README.md`
300
+ - `getting-started.md`
301
+ - `api-reference.md`
302
+
303
+ Running `llms-txt generate --docs ./docs --base-url https://myproject.io` creates:
304
+
305
+ ```markdown
306
+ # My Project
307
+
308
+ > This is a Ruby library that helps developers build amazing applications with a clean, simple API.
309
+
310
+ ## Documentation
311
+
312
+ - [README](https://myproject.io/README.md): Complete overview and installation instructions
313
+ - [Getting Started](https://myproject.io/getting-started.md): Quick start guide with examples
314
+ - [API Reference](https://myproject.io/api-reference.md): Detailed API documentation and method
315
+ signatures
316
+ ```
317
+
318
+ ## Contributing
319
+
320
+ Bug reports and pull requests are welcome on GitHub at https://github.com/mensfeld/llms-txt-ruby.
321
+
322
+ ## License
75
323
 
76
- Made with ❤️ for the Ruby community
324
+ The gem is available as open source under the terms of the
325
+ [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+ require 'rubocop/rake_task'
6
+
7
+ RSpec::Core::RakeTask.new(:spec)
8
+ RuboCop::RakeTask.new
9
+
10
+ task default: %i[spec rubocop]