llm-docs-builder 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.dockerignore +44 -0
- data/.github/workflows/ci.yml +71 -0
- data/.github/workflows/docker.yml +102 -0
- data/.github/workflows/push.yml +35 -0
- data/.gitignore +66 -0
- data/.rubocop.yml +74 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +66 -0
- data/CLAUDE.md +178 -0
- data/Dockerfile +64 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +88 -0
- data/LICENSE +21 -0
- data/README.md +684 -0
- data/Rakefile +10 -0
- data/bin/llm-docs-builder +7 -0
- data/bin/rspecs +7 -0
- data/lib/llm_docs_builder/bulk_transformer.rb +135 -0
- data/lib/llm_docs_builder/cli.rb +434 -0
- data/lib/llm_docs_builder/comparator.rb +238 -0
- data/lib/llm_docs_builder/config.rb +116 -0
- data/lib/llm_docs_builder/errors.rb +31 -0
- data/lib/llm_docs_builder/generator.rb +234 -0
- data/lib/llm_docs_builder/markdown_transformer.rb +90 -0
- data/lib/llm_docs_builder/parser.rb +223 -0
- data/lib/llm_docs_builder/validator.rb +216 -0
- data/lib/llm_docs_builder/version.rb +6 -0
- data/lib/llm_docs_builder.rb +130 -0
- data/llm-docs-builder.gemspec +45 -0
- data/llm-docs-builder.yml +7 -0
- data/llm-docs-builder.yml.example +26 -0
- data/renovate.json +33 -0
- metadata +171 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 68bc5131179dcb94c393cb2973c9fa1ffbf868616effb6814a7e99d340923de2
|
4
|
+
data.tar.gz: c660150ac38f2687951ebd6f1e2f2014f4c5636aa4c322ef9114427b9d49a235
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 45bf6443d05c9173bf308b80240595e509d596447343aca937158ebddc44d9d0d07c0b3bc0db61b743e49f9ff634f799a90e8f72c0105ed3221fc3f11bdd7e05
|
7
|
+
data.tar.gz: 4a2e9e04aa39dfa2954988a20e825c1d306299084e120f3332c413e008353be45336819bc0127aa03e05aaaa47db6a90b78f1c4033ea95b665262035b0998ace
|
data/.dockerignore
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# Git
|
2
|
+
.git
|
3
|
+
.github
|
4
|
+
.gitignore
|
5
|
+
|
6
|
+
# Development
|
7
|
+
spec/
|
8
|
+
coverage/
|
9
|
+
.rspec
|
10
|
+
.rubocop.yml
|
11
|
+
|
12
|
+
# Documentation
|
13
|
+
*.md
|
14
|
+
!README.md
|
15
|
+
CLAUDE.md
|
16
|
+
|
17
|
+
# Build artifacts
|
18
|
+
*.gem
|
19
|
+
pkg/
|
20
|
+
vendor/bundle
|
21
|
+
|
22
|
+
# IDE
|
23
|
+
.vscode/
|
24
|
+
.idea/
|
25
|
+
.claude/
|
26
|
+
*.swp
|
27
|
+
*.swo
|
28
|
+
*~
|
29
|
+
|
30
|
+
# Temp files
|
31
|
+
tmp/
|
32
|
+
*.log
|
33
|
+
.DS_Store
|
34
|
+
|
35
|
+
# Config examples
|
36
|
+
*.example
|
37
|
+
llms-txt.yml
|
38
|
+
config-output.txt
|
39
|
+
|
40
|
+
# CI
|
41
|
+
.github/workflows/
|
42
|
+
|
43
|
+
# Ruby
|
44
|
+
# Keep Gemfile.lock for reproducible builds
|
@@ -0,0 +1,71 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
concurrency:
|
4
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
5
|
+
cancel-in-progress: true
|
6
|
+
|
7
|
+
on:
|
8
|
+
pull_request:
|
9
|
+
branches: [ master ]
|
10
|
+
push:
|
11
|
+
branches: [ master ]
|
12
|
+
schedule:
|
13
|
+
- cron: '0 1 * * *'
|
14
|
+
|
15
|
+
permissions:
|
16
|
+
contents: read
|
17
|
+
|
18
|
+
jobs:
|
19
|
+
specs:
|
20
|
+
timeout-minutes: 15
|
21
|
+
runs-on: ubuntu-latest
|
22
|
+
strategy:
|
23
|
+
fail-fast: false
|
24
|
+
matrix:
|
25
|
+
ruby:
|
26
|
+
- '3.4'
|
27
|
+
- '3.3'
|
28
|
+
- '3.2'
|
29
|
+
include:
|
30
|
+
- ruby: '3.4'
|
31
|
+
coverage: 'true'
|
32
|
+
steps:
|
33
|
+
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
|
34
|
+
with:
|
35
|
+
fetch-depth: 0
|
36
|
+
|
37
|
+
- name: Set up Ruby
|
38
|
+
uses: ruby/setup-ruby@v1
|
39
|
+
with:
|
40
|
+
ruby-version: ${{ matrix.ruby }}
|
41
|
+
bundler-cache: true
|
42
|
+
bundler: 'latest'
|
43
|
+
|
44
|
+
- name: Install latest bundler
|
45
|
+
run: |
|
46
|
+
gem install bundler --no-document
|
47
|
+
bundle config set without 'tools benchmarks docs'
|
48
|
+
|
49
|
+
- name: Bundle install
|
50
|
+
run: bundle install --jobs 4 --retry 3
|
51
|
+
|
52
|
+
- name: Run all tests
|
53
|
+
env:
|
54
|
+
GITHUB_COVERAGE: ${{ matrix.coverage }}
|
55
|
+
run: bin/rspecs
|
56
|
+
|
57
|
+
|
58
|
+
ci-success:
|
59
|
+
name: CI Success
|
60
|
+
runs-on: ubuntu-latest
|
61
|
+
if: always()
|
62
|
+
needs:
|
63
|
+
- specs
|
64
|
+
steps:
|
65
|
+
- name: Check all jobs passed
|
66
|
+
if: |
|
67
|
+
contains(needs.*.result, 'failure') ||
|
68
|
+
contains(needs.*.result, 'cancelled') ||
|
69
|
+
contains(needs.*.result, 'skipped')
|
70
|
+
run: exit 1
|
71
|
+
- run: echo "All CI checks passed!"
|
@@ -0,0 +1,102 @@
|
|
1
|
+
name: Docker
|
2
|
+
|
3
|
+
concurrency:
|
4
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
5
|
+
cancel-in-progress: true
|
6
|
+
|
7
|
+
# Temporarily disabled - only runs on manual trigger
|
8
|
+
on:
|
9
|
+
workflow_dispatch:
|
10
|
+
|
11
|
+
# Automatic triggers disabled for now:
|
12
|
+
# push:
|
13
|
+
# branches:
|
14
|
+
# - master
|
15
|
+
# tags:
|
16
|
+
# - 'v*'
|
17
|
+
# pull_request:
|
18
|
+
# branches:
|
19
|
+
# - master
|
20
|
+
# schedule:
|
21
|
+
# # Rebuild weekly to get latest base image security updates
|
22
|
+
# - cron: '0 2 * * 0'
|
23
|
+
|
24
|
+
permissions:
|
25
|
+
contents: read
|
26
|
+
packages: write
|
27
|
+
|
28
|
+
jobs:
|
29
|
+
docker:
|
30
|
+
runs-on: ubuntu-latest
|
31
|
+
steps:
|
32
|
+
- name: Checkout
|
33
|
+
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
|
34
|
+
with:
|
35
|
+
fetch-depth: 0
|
36
|
+
|
37
|
+
- name: Docker meta
|
38
|
+
id: meta
|
39
|
+
uses: docker/metadata-action@v5
|
40
|
+
with:
|
41
|
+
images: |
|
42
|
+
mensfeld/llm-docs-builder
|
43
|
+
ghcr.io/${{ github.repository }}
|
44
|
+
tags: |
|
45
|
+
type=ref,event=branch
|
46
|
+
type=ref,event=pr
|
47
|
+
type=semver,pattern={{version}}
|
48
|
+
type=semver,pattern={{major}}.{{minor}}
|
49
|
+
type=semver,pattern={{major}}
|
50
|
+
type=raw,value=latest,enable={{is_default_branch}}
|
51
|
+
|
52
|
+
- name: Set up QEMU
|
53
|
+
uses: docker/setup-qemu-action@v3
|
54
|
+
|
55
|
+
- name: Set up Docker Buildx
|
56
|
+
uses: docker/setup-buildx-action@v3
|
57
|
+
|
58
|
+
- name: Login to Docker Hub
|
59
|
+
if: github.event_name != 'pull_request'
|
60
|
+
uses: docker/login-action@v3
|
61
|
+
with:
|
62
|
+
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
63
|
+
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
64
|
+
|
65
|
+
- name: Login to GitHub Container Registry
|
66
|
+
if: github.event_name != 'pull_request'
|
67
|
+
uses: docker/login-action@v3
|
68
|
+
with:
|
69
|
+
registry: ghcr.io
|
70
|
+
username: ${{ github.actor }}
|
71
|
+
password: ${{ secrets.GITHUB_TOKEN }}
|
72
|
+
|
73
|
+
- name: Build and push
|
74
|
+
uses: docker/build-push-action@v5
|
75
|
+
with:
|
76
|
+
context: .
|
77
|
+
platforms: linux/amd64,linux/arm64
|
78
|
+
push: ${{ github.event_name != 'pull_request' }}
|
79
|
+
tags: ${{ steps.meta.outputs.tags }}
|
80
|
+
labels: ${{ steps.meta.outputs.labels }}
|
81
|
+
cache-from: type=gha
|
82
|
+
cache-to: type=gha,mode=max
|
83
|
+
|
84
|
+
- name: Test Docker image
|
85
|
+
run: |
|
86
|
+
docker run --rm ${{ fromJSON(steps.meta.outputs.json).tags[0] }} version
|
87
|
+
docker run --rm ${{ fromJSON(steps.meta.outputs.json).tags[0] }} --help
|
88
|
+
|
89
|
+
docker-success:
|
90
|
+
name: Docker Success
|
91
|
+
runs-on: ubuntu-latest
|
92
|
+
if: always()
|
93
|
+
needs:
|
94
|
+
- docker
|
95
|
+
steps:
|
96
|
+
- name: Check all jobs passed
|
97
|
+
if: |
|
98
|
+
contains(needs.*.result, 'failure') ||
|
99
|
+
contains(needs.*.result, 'cancelled') ||
|
100
|
+
contains(needs.*.result, 'skipped')
|
101
|
+
run: exit 1
|
102
|
+
- run: echo "Docker workflow completed successfully!"
|
@@ -0,0 +1,35 @@
|
|
1
|
+
name: Push Gem
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
tags:
|
6
|
+
- v*
|
7
|
+
|
8
|
+
permissions:
|
9
|
+
contents: read
|
10
|
+
|
11
|
+
jobs:
|
12
|
+
push:
|
13
|
+
if: github.repository_owner == 'mensfeld'
|
14
|
+
runs-on: ubuntu-latest
|
15
|
+
environment: deployment
|
16
|
+
|
17
|
+
permissions:
|
18
|
+
contents: write
|
19
|
+
id-token: write
|
20
|
+
|
21
|
+
steps:
|
22
|
+
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
23
|
+
with:
|
24
|
+
fetch-depth: 0
|
25
|
+
|
26
|
+
- name: Set up Ruby
|
27
|
+
uses: ruby/setup-ruby@6797dcbb9a1889fd411d07e8aba7eded53fb8b48 # v1.264.0
|
28
|
+
with:
|
29
|
+
bundler-cache: false
|
30
|
+
|
31
|
+
- name: Bundle install
|
32
|
+
run: |
|
33
|
+
bundle install --jobs 4 --retry 3
|
34
|
+
|
35
|
+
- uses: rubygems/release-gem@a25424ba2ba8b387abc8ef40807c2c85b96cbe32 # v1.1.1
|
data/.gitignore
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/spec/examples.txt
|
9
|
+
/test/tmp/
|
10
|
+
/test/version_tmp/
|
11
|
+
/tmp/
|
12
|
+
mise.toml
|
13
|
+
|
14
|
+
# Used by dotenv library to load environment variables.
|
15
|
+
.env
|
16
|
+
.env.*
|
17
|
+
|
18
|
+
# Ignore Byebug command history file.
|
19
|
+
.byebug_history
|
20
|
+
|
21
|
+
## Specific to RubyMotion:
|
22
|
+
.dat*
|
23
|
+
.repl_history
|
24
|
+
build/
|
25
|
+
*.bridgesupport
|
26
|
+
build-iPhoneOS/
|
27
|
+
build-iPhoneSimulator/
|
28
|
+
|
29
|
+
## Specific to RubyMotion (use of CocoaPods):
|
30
|
+
#
|
31
|
+
# We recommend against adding the Pods directory to your .gitignore. However
|
32
|
+
# you should judge for yourself, the pros and cons are mentioned at:
|
33
|
+
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
34
|
+
#
|
35
|
+
# vendor/Pods/
|
36
|
+
|
37
|
+
## Documentation cache and generated files:
|
38
|
+
/.yardoc/
|
39
|
+
/_yardoc/
|
40
|
+
/doc/
|
41
|
+
/rdoc/
|
42
|
+
|
43
|
+
## Environment normalization:
|
44
|
+
/.bundle/
|
45
|
+
/vendor/bundle
|
46
|
+
/lib/bundler/man/
|
47
|
+
|
48
|
+
# for a library or gem, you might want to ignore these files since the code is
|
49
|
+
# intended to run in multiple environments; otherwise, check them in:
|
50
|
+
# Gemfile.lock
|
51
|
+
# .ruby-version
|
52
|
+
# .ruby-gemset
|
53
|
+
|
54
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
55
|
+
.rvmrc
|
56
|
+
|
57
|
+
# Used by RuboCop. Remote config files pulled in from inherit_from directive.
|
58
|
+
# .rubocop-https?--*
|
59
|
+
|
60
|
+
# Project-specific generated files
|
61
|
+
llms.txt
|
62
|
+
*-output.txt
|
63
|
+
|
64
|
+
# Config files that might contain sensitive data
|
65
|
+
llms-txt.yml
|
66
|
+
.llms-txt.yml
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
AllCops:
|
2
|
+
TargetRubyVersion: 3.2
|
3
|
+
NewCops: enable
|
4
|
+
SuggestExtensions: false
|
5
|
+
|
6
|
+
Style/Documentation:
|
7
|
+
Enabled: false
|
8
|
+
|
9
|
+
Style/StringLiterals:
|
10
|
+
EnforcedStyle: single_quotes
|
11
|
+
|
12
|
+
Layout/LineLength:
|
13
|
+
Max: 120
|
14
|
+
Exclude:
|
15
|
+
- 'lib/llms_txt/cli.rb'
|
16
|
+
|
17
|
+
Metrics/ClassLength:
|
18
|
+
Max: 200
|
19
|
+
Exclude:
|
20
|
+
- 'lib/llms_txt/cli.rb'
|
21
|
+
|
22
|
+
Metrics/MethodLength:
|
23
|
+
Max: 35
|
24
|
+
Exclude:
|
25
|
+
- 'lib/llms_txt/cli.rb'
|
26
|
+
|
27
|
+
Metrics/AbcSize:
|
28
|
+
Max: 40
|
29
|
+
Exclude:
|
30
|
+
- 'lib/llms_txt/cli.rb'
|
31
|
+
|
32
|
+
Metrics/CyclomaticComplexity:
|
33
|
+
Max: 15
|
34
|
+
Exclude:
|
35
|
+
- 'lib/llms_txt/config.rb'
|
36
|
+
|
37
|
+
Metrics/PerceivedComplexity:
|
38
|
+
Max: 15
|
39
|
+
Exclude:
|
40
|
+
- 'lib/llms_txt/config.rb'
|
41
|
+
|
42
|
+
Metrics/BlockLength:
|
43
|
+
Exclude:
|
44
|
+
- 'spec/**/*'
|
45
|
+
- 'lib/llms_txt/cli.rb'
|
46
|
+
- '*.gemspec'
|
47
|
+
|
48
|
+
Style/FrozenStringLiteralComment:
|
49
|
+
Enabled: true
|
50
|
+
|
51
|
+
# Specs often have multiline block chains
|
52
|
+
Style/MultilineBlockChain:
|
53
|
+
Exclude:
|
54
|
+
- 'spec/**/*'
|
55
|
+
|
56
|
+
# Disable predicate method naming rule
|
57
|
+
Naming/PredicateMethod:
|
58
|
+
Enabled: false
|
59
|
+
|
60
|
+
# Allow development dependencies in gemspec
|
61
|
+
Gemspec/DevelopmentDependencies:
|
62
|
+
Enabled: false
|
63
|
+
|
64
|
+
# Enforce first argument on new line for multiline method calls
|
65
|
+
Layout/FirstMethodArgumentLineBreak:
|
66
|
+
Enabled: true
|
67
|
+
|
68
|
+
# Use fixed indentation for arguments
|
69
|
+
Layout/ArgumentAlignment:
|
70
|
+
EnforcedStyle: with_fixed_indentation
|
71
|
+
|
72
|
+
# Ensure closing parenthesis on new line for multiline calls
|
73
|
+
Layout/MultilineMethodCallBraceLayout:
|
74
|
+
EnforcedStyle: new_line
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
3.4.7
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
## Unreleased
|
4
|
+
- [Breaking] **Project renamed from `llms-txt-ruby` to `llm-docs-builder`** to better reflect expanded functionality beyond just llms.txt generation.
|
5
|
+
- Gem name: `llms-txt-ruby` → `llm-docs-builder`
|
6
|
+
- Module name: `LlmsTxt` → `LlmDocsBuilder`
|
7
|
+
- CLI command: `llms-txt` → `llm-docs-builder`
|
8
|
+
- Config file: `llms-txt.yml` → `llm-docs-builder.yml`
|
9
|
+
- Docker images: `mensfeld/llms-txt-ruby` → `mensfeld/llm-docs-builder`
|
10
|
+
- Repository: `llms-txt-ruby` → `llm-docs-builder`
|
11
|
+
- Updated all documentation, examples, and tests
|
12
|
+
- [Feature] Added Docker support for easy CLI usage without Ruby installation.
|
13
|
+
- Multi-stage Dockerfile for minimal image size (~78MB)
|
14
|
+
- Multi-architecture support (linux/amd64, linux/arm64)
|
15
|
+
- Published to Docker Hub (`mensfeld/llm-docs-builder`) and GitHub Container Registry
|
16
|
+
- GitHub Actions workflow for automated Docker builds and publishing
|
17
|
+
- Comprehensive Docker usage documentation with examples for all commands
|
18
|
+
- CI/CD integration examples (GitHub Actions, GitLab CI, Jenkins)
|
19
|
+
- [Feature] Added `compare` command to measure context window savings by comparing content sizes between human and AI versions.
|
20
|
+
- Compare remote URL with different User-Agents (human browser vs AI bot)
|
21
|
+
- Compare remote URL with local markdown file
|
22
|
+
- Display reduction percentage, bytes saved, and compression factor
|
23
|
+
- Support for custom User-Agents and verbose output
|
24
|
+
- [Enhancement] Added `Comparator` class with comprehensive specs for HTTP fetching and size comparison.
|
25
|
+
- [Enhancement] Added `-u/--url` and `-f/--file` CLI flags for compare command.
|
26
|
+
- [Security] Added redirect depth limiting (MAX_REDIRECTS = 10) to prevent infinite redirect loops.
|
27
|
+
- [Security] Added URL validation to reject non-HTTP/HTTPS schemes (prevents file://, javascript:, ftp://, etc.).
|
28
|
+
- [Security] Added URL format validation to ensure proper host and scheme presence.
|
29
|
+
- [Enhancement] Added verbose redirect logging to show redirect chains when --verbose flag is used.
|
30
|
+
|
31
|
+
## 0.2.0 (2025-10-07)
|
32
|
+
- [Breaking] Removed positional argument support for all CLI commands. All file paths must now be specified using flags:
|
33
|
+
- `transform`: use `-d/--docs` flag instead of positional argument
|
34
|
+
- `parse`: use `-d/--docs` flag instead of positional argument (defaults to `llms.txt` if not specified)
|
35
|
+
- `validate`: use `-d/--docs` flag instead of positional argument (defaults to `llms.txt` if not specified)
|
36
|
+
- [Enhancement] Improved CLI consistency by requiring explicit flags for all file paths.
|
37
|
+
- [Enhancement] Added comprehensive CLI integration tests in `spec/integrations/` directory.
|
38
|
+
- Each command has its own dedicated integration test file
|
39
|
+
- Tests verify actual CLI binary execution, not just Ruby API
|
40
|
+
- All tests (unit and integration) run together with `bin/rspecs`
|
41
|
+
- [Enhancement] Added convenient test runner script `bin/rspecs` for running all tests.
|
42
|
+
- [Enhancement] Added comprehensive YARD documentation to all CLI methods.
|
43
|
+
- [Enhancement] Resolved all RuboCop offenses (0 offenses detected).
|
44
|
+
- [Fix] Fixed validator bug where `each_value` was incorrectly called on Array.
|
45
|
+
|
46
|
+
## 0.1.3 (2025-10-07)
|
47
|
+
- [Fix] Fixed `transform` command to accept file path from `-d/--docs` flag in addition to positional arguments.
|
48
|
+
|
49
|
+
## 0.1.2 (2025-10-07)
|
50
|
+
- [Fix] Fixed CLI error handling to use correct `LlmsTxt::Errors::BaseError` instead of non-existent `LlmsTxt::Error`.
|
51
|
+
- [Enhancement] Extracted CLI class to `lib/llms_txt/cli.rb` for better testability.
|
52
|
+
- [Enhancement] Added comprehensive CLI error handling specs.
|
53
|
+
|
54
|
+
## 0.1.1 (2025-10-07)
|
55
|
+
- [Change] Updated repository metadata to use `master` branch instead of `main`.
|
56
|
+
|
57
|
+
## 0.1.0 (2025-10-07)
|
58
|
+
- [Feature] Generate `llms.txt` files from markdown documentation.
|
59
|
+
- [Feature] Transform individual markdown files to be AI-friendly.
|
60
|
+
- [Feature] Bulk transformation of entire documentation directories.
|
61
|
+
- [Feature] CLI with commands: `generate`, `transform`, `bulk-transform`, `parse`, `validate`.
|
62
|
+
- [Feature] Configuration file support (`llms-txt.yml`).
|
63
|
+
- [Feature] Automatic link expansion from relative to absolute URLs.
|
64
|
+
- [Feature] File prioritization (README first, then guides, APIs, etc.).
|
65
|
+
- [Feature] Exclusion patterns for bulk transformations.
|
66
|
+
- [Feature] Ruby API for programmatic usage.
|
data/CLAUDE.md
ADDED
@@ -0,0 +1,178 @@
|
|
1
|
+
# CLAUDE.md
|
2
|
+
|
3
|
+
llm-docs-builder is a Ruby gem that generates [llms.txt](https://llmstxt.org/) files from existing markdown documentation and transforms markdown files to be AI-friendly. It provides both a CLI tool and Ruby API.
|
4
|
+
|
5
|
+
## Project Overview
|
6
|
+
|
7
|
+
llm-docs-builder is a Ruby gem that generates [llms.txt](https://llmstxt.org/) files from existing markdown documentation and transforms markdown files to be AI-friendly. It provides both a CLI tool and Ruby API.
|
8
|
+
|
9
|
+
**Key functionality:**
|
10
|
+
- Generates llms.txt files from documentation directories by scanning markdown files, extracting metadata, and organizing by priority
|
11
|
+
- Transforms individual markdown files by expanding relative links to absolute URLs
|
12
|
+
- Bulk transforms entire documentation trees with customizable suffixes and exclusion patterns
|
13
|
+
- Supports both config file and direct options for all operations
|
14
|
+
|
15
|
+
## Development Commands
|
16
|
+
|
17
|
+
### Testing
|
18
|
+
```bash
|
19
|
+
# Run all tests
|
20
|
+
./bin/rspecs
|
21
|
+
|
22
|
+
# Run specific test file
|
23
|
+
bundle exec rspec spec/llm_docs_builder_spec.rb
|
24
|
+
|
25
|
+
# Run specific test line
|
26
|
+
bundle exec rspec spec/llm_docs_builder_spec.rb:42
|
27
|
+
```
|
28
|
+
|
29
|
+
### Code Quality
|
30
|
+
```bash
|
31
|
+
# Run RuboCop linter
|
32
|
+
bundle exec rubocop
|
33
|
+
|
34
|
+
# Auto-fix RuboCop violations
|
35
|
+
bundle exec rubocop -a
|
36
|
+
|
37
|
+
# Run all checks (tests + linting)
|
38
|
+
bundle exec rake
|
39
|
+
```
|
40
|
+
|
41
|
+
### CLI Testing
|
42
|
+
```bash
|
43
|
+
# Test CLI locally
|
44
|
+
bundle exec bin/llm-docs-builder generate --docs ./docs
|
45
|
+
bundle exec bin/llm-docs-builder transform --docs README.md
|
46
|
+
bundle exec bin/llm-docs-builder bulk-transform --docs ./docs
|
47
|
+
|
48
|
+
# Test compare command (requires network)
|
49
|
+
bundle exec bin/llm-docs-builder compare --url https://karafka.io/docs/Getting-Started.html
|
50
|
+
bundle exec bin/llm-docs-builder compare --url https://example.com/page.html --file docs/local.md
|
51
|
+
```
|
52
|
+
|
53
|
+
### Building and Installing
|
54
|
+
```bash
|
55
|
+
# Build gem locally
|
56
|
+
bundle exec rake build
|
57
|
+
|
58
|
+
# Install locally built gem
|
59
|
+
gem install pkg/llm-docs-builder-*.gem
|
60
|
+
|
61
|
+
# Release (maintainers only)
|
62
|
+
bundle exec rake release
|
63
|
+
```
|
64
|
+
|
65
|
+
## Architecture
|
66
|
+
|
67
|
+
### Core Components
|
68
|
+
|
69
|
+
**LlmDocsBuilder Module** (`lib/llm_docs_builder.rb`)
|
70
|
+
- Main API entry point with class methods for all operations
|
71
|
+
- Uses Zeitwerk for autoloading
|
72
|
+
- Delegates to specialized classes for generation, transformation, and validation
|
73
|
+
- All methods support both config file and direct options via `Config#merge_with_options`
|
74
|
+
|
75
|
+
**Generator** (`lib/llm_docs_builder/generator.rb`)
|
76
|
+
- Scans documentation directories recursively using `Find.find`
|
77
|
+
- Extracts title from first H1 header, description from first paragraph
|
78
|
+
- Prioritizes files: README (1), getting started (2), guides (3), tutorials (4), API (5), reference (6), others (7)
|
79
|
+
- Builds formatted llms.txt with links and descriptions
|
80
|
+
|
81
|
+
**MarkdownTransformer** (`lib/llm_docs_builder/markdown_transformer.rb`)
|
82
|
+
- Transforms individual markdown files using regex patterns
|
83
|
+
- `expand_relative_links`: Converts relative links to absolute URLs using base_url
|
84
|
+
- `convert_html_urls`: Changes .html/.htm URLs to .md format
|
85
|
+
- Leaves absolute URLs and anchor links unchanged
|
86
|
+
|
87
|
+
**BulkTransformer** (`lib/llm_docs_builder/bulk_transformer.rb`)
|
88
|
+
- Recursively processes all markdown files in a directory
|
89
|
+
- Uses `MarkdownTransformer` for each file
|
90
|
+
- Generates output paths with configurable suffix (default: `.llm`)
|
91
|
+
- Empty suffix (`""`) enables in-place transformation
|
92
|
+
- Supports glob-based exclusion patterns via `File.fnmatch`
|
93
|
+
|
94
|
+
**Comparator** (`lib/llm_docs_builder/comparator.rb`)
|
95
|
+
- Measures context window savings by comparing content sizes
|
96
|
+
- Fetches URLs with different User-Agents (human browser vs AI bot)
|
97
|
+
- Can compare remote URL with local markdown file
|
98
|
+
- Uses Net::HTTP for fetching with redirect support
|
99
|
+
- Calculates reduction percentage, bytes saved, and compression factor
|
100
|
+
|
101
|
+
**Config** (`lib/llm_docs_builder/config.rb`)
|
102
|
+
- Loads YAML config from file or auto-finds `llms-txt.yml`
|
103
|
+
- Merges config file options with programmatic options (programmatic takes precedence)
|
104
|
+
- Handles defaults: `suffix: '.llm'`, `output: 'llms.txt'`, `excludes: []`
|
105
|
+
|
106
|
+
**CLI** (`lib/llm_docs_builder/cli.rb`)
|
107
|
+
- Parses commands: generate, transform, bulk-transform, compare, parse, validate, version
|
108
|
+
- Uses OptionParser for flag parsing
|
109
|
+
- Loads config and merges with CLI options before delegating to main module
|
110
|
+
- Handles errors gracefully with user-friendly messages
|
111
|
+
- Compare command displays formatted output with human-readable byte sizes (bytes/KB/MB)
|
112
|
+
|
113
|
+
### Configuration Precedence
|
114
|
+
|
115
|
+
Options are resolved in this order (highest to lowest priority):
|
116
|
+
1. Direct method arguments (e.g., `LlmDocsBuilder.generate_from_docs('./docs', title: 'Override')`)
|
117
|
+
2. CLI flags (e.g., `--docs ./docs`)
|
118
|
+
3. Config file values (e.g., `llms-txt.yml`)
|
119
|
+
4. Defaults (e.g., `suffix: '.llm'`, `output: 'llms.txt'`)
|
120
|
+
|
121
|
+
### File Priority System
|
122
|
+
|
123
|
+
When generating llms.txt, files are automatically ordered by importance:
|
124
|
+
- Priority 1: README files (always listed first)
|
125
|
+
- Priority 2: Getting started guides
|
126
|
+
- Priority 3: General guides
|
127
|
+
- Priority 4: Tutorials
|
128
|
+
- Priority 5: API documentation
|
129
|
+
- Priority 6: Reference documentation
|
130
|
+
- Priority 7: All other files
|
131
|
+
|
132
|
+
### Link Transformation Logic
|
133
|
+
|
134
|
+
**Relative Link Expansion** (when `base_url` provided):
|
135
|
+
- Converts `[text](./path.md)` → `[text](https://base.url/path.md)`
|
136
|
+
- Converts `[text](../other.md)` → `[text](https://base.url/other.md)`
|
137
|
+
- Skips URLs starting with `http://`, `https://`, `//`, or `#`
|
138
|
+
|
139
|
+
**URL Conversion** (when `convert_urls: true`):
|
140
|
+
- Changes `https://example.com/page.html` → `https://example.com/page.md`
|
141
|
+
- Changes `https://example.com/doc.htm` → `https://example.com/doc.md`
|
142
|
+
|
143
|
+
### In-Place vs Separate Files
|
144
|
+
|
145
|
+
**Separate Files** (`suffix: '.llm'` - default):
|
146
|
+
- Creates new files: `README.md` → `README.llm.md`
|
147
|
+
- Preserves originals for human-readable documentation
|
148
|
+
- Useful for dual-serving human and AI versions
|
149
|
+
|
150
|
+
**In-Place** (`suffix: ""`):
|
151
|
+
- Overwrites originals: `README.md` → `README.md` (transformed)
|
152
|
+
- Used in build pipelines (e.g., Karafka framework)
|
153
|
+
- Transforms documentation before deployment
|
154
|
+
|
155
|
+
## Testing Strategy
|
156
|
+
|
157
|
+
- RSpec for all tests with SimpleCov coverage tracking
|
158
|
+
- Unit tests for each component in isolation
|
159
|
+
- Integration tests in `spec/integrations/` for end-to-end workflows
|
160
|
+
- Example outputs saved in `spec/examples.txt` for persistence
|
161
|
+
- CI tests against Ruby 3.2, 3.3, 3.4 via GitHub Actions
|
162
|
+
|
163
|
+
## Dependencies
|
164
|
+
|
165
|
+
- **zeitwerk**: Autoloading and code organization
|
166
|
+
- **optparse**: Built-in Ruby CLI parsing (no external CLI framework)
|
167
|
+
- **rspec**: Testing framework
|
168
|
+
- **rubocop**: Code linting and style enforcement
|
169
|
+
- **simplecov**: Test coverage reporting
|
170
|
+
|
171
|
+
## Code Style
|
172
|
+
|
173
|
+
- Ruby 3.2+ syntax and features required
|
174
|
+
- Frozen string literals in all files
|
175
|
+
- Explicit module nesting (no `class Foo::Bar`)
|
176
|
+
- Comprehensive YARD documentation for public APIs
|
177
|
+
- Private methods clearly marked and documented
|
178
|
+
- RuboCop enforces consistent style
|