llmstxt-standalone 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llmstxt_standalone-0.1.0 → llmstxt_standalone-0.2.0}/PKG-INFO +68 -8
- {llmstxt_standalone-0.1.0 → llmstxt_standalone-0.2.0}/README.md +65 -7
- {llmstxt_standalone-0.1.0 → llmstxt_standalone-0.2.0}/pyproject.toml +3 -1
- llmstxt_standalone-0.2.0/src/llmstxt_standalone/cli.py +422 -0
- {llmstxt_standalone-0.1.0 → llmstxt_standalone-0.2.0}/src/llmstxt_standalone/config/derive.py +5 -2
- llmstxt_standalone-0.2.0/src/llmstxt_standalone/config/load.py +172 -0
- {llmstxt_standalone-0.1.0 → llmstxt_standalone-0.2.0}/src/llmstxt_standalone/config/model.py +3 -3
- {llmstxt_standalone-0.1.0 → llmstxt_standalone-0.2.0}/src/llmstxt_standalone/convert.py +23 -10
- {llmstxt_standalone-0.1.0 → llmstxt_standalone-0.2.0}/src/llmstxt_standalone/generate.py +33 -4
- llmstxt_standalone-0.1.0/src/llmstxt_standalone/cli.py +0 -172
- llmstxt_standalone-0.1.0/src/llmstxt_standalone/config/load.py +0 -93
- {llmstxt_standalone-0.1.0 → llmstxt_standalone-0.2.0}/src/llmstxt_standalone/__init__.py +0 -0
- {llmstxt_standalone-0.1.0 → llmstxt_standalone-0.2.0}/src/llmstxt_standalone/__main__.py +0 -0
- {llmstxt_standalone-0.1.0 → llmstxt_standalone-0.2.0}/src/llmstxt_standalone/config/__init__.py +0 -0
- {llmstxt_standalone-0.1.0 → llmstxt_standalone-0.2.0}/src/llmstxt_standalone/config/plugin.py +0 -0
- {llmstxt_standalone-0.1.0 → llmstxt_standalone-0.2.0}/src/llmstxt_standalone/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: llmstxt-standalone
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Generate llms.txt from built HTML documentation
|
|
5
5
|
Keywords: llms,documentation,markdown,mkdocs
|
|
6
6
|
Author: Shaan Majid
|
|
@@ -20,10 +20,12 @@ Classifier: Topic :: Documentation
|
|
|
20
20
|
Classifier: Typing :: Typed
|
|
21
21
|
Requires-Dist: typer>=0.9.0
|
|
22
22
|
Requires-Dist: pyyaml>=6.0
|
|
23
|
+
Requires-Dist: ruamel-yaml>=0.18
|
|
23
24
|
Requires-Dist: beautifulsoup4>=4.12
|
|
24
25
|
Requires-Dist: markdownify>=0.14,<2.0
|
|
25
26
|
Requires-Dist: mdformat>=0.7,<2.0
|
|
26
27
|
Requires-Dist: mdformat-tables>=1.0
|
|
28
|
+
Requires-Dist: pydantic>=2.12.5
|
|
27
29
|
Requires-Python: >=3.10
|
|
28
30
|
Project-URL: Repository, https://github.com/shaanmajid/llmstxt-standalone
|
|
29
31
|
Project-URL: Issues, https://github.com/shaanmajid/llmstxt-standalone/issues
|
|
@@ -59,21 +61,25 @@ uv add llmstxt-standalone # or: pip install
|
|
|
59
61
|
|
|
60
62
|
## Usage
|
|
61
63
|
|
|
64
|
+
### build
|
|
65
|
+
|
|
66
|
+
Generate llms.txt from a built MkDocs site:
|
|
67
|
+
|
|
62
68
|
```bash
|
|
63
69
|
# Run from project root (expects mkdocs.yml and site/)
|
|
64
|
-
llmstxt-standalone
|
|
70
|
+
llmstxt-standalone build
|
|
65
71
|
|
|
66
72
|
# Explicit paths
|
|
67
|
-
llmstxt-standalone --config mkdocs.yml --site-dir ./build --output-dir ./dist
|
|
73
|
+
llmstxt-standalone build --config mkdocs.yml --site-dir ./build --output-dir ./dist
|
|
68
74
|
|
|
69
75
|
# Preview without writing files
|
|
70
|
-
llmstxt-standalone --dry-run
|
|
76
|
+
llmstxt-standalone build --dry-run
|
|
71
77
|
|
|
72
78
|
# Suppress output
|
|
73
|
-
llmstxt-standalone --quiet
|
|
79
|
+
llmstxt-standalone build --quiet
|
|
74
80
|
|
|
75
81
|
# Show detailed progress
|
|
76
|
-
llmstxt-standalone --verbose
|
|
82
|
+
llmstxt-standalone build --verbose
|
|
77
83
|
```
|
|
78
84
|
|
|
79
85
|
| Option | Short | Default | Description |
|
|
@@ -84,11 +90,65 @@ llmstxt-standalone --verbose
|
|
|
84
90
|
| `--dry-run` | `-n` | | Preview without writing |
|
|
85
91
|
| `--quiet` | `-q` | | Suppress output |
|
|
86
92
|
| `--verbose` | `-v` | | Show detailed progress |
|
|
87
|
-
|
|
93
|
+
|
|
94
|
+
### init
|
|
95
|
+
|
|
96
|
+
Add llmstxt plugin configuration to an existing mkdocs.yml:
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
llmstxt-standalone init
|
|
100
|
+
|
|
101
|
+
# Specify config path
|
|
102
|
+
llmstxt-standalone init --config path/to/mkdocs.yml
|
|
103
|
+
|
|
104
|
+
# Overwrite existing llmstxt config
|
|
105
|
+
llmstxt-standalone init --force
|
|
106
|
+
|
|
107
|
+
# Show detailed progress
|
|
108
|
+
llmstxt-standalone init --verbose
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
| Option | Short | Description |
|
|
112
|
+
|--------|-------|-------------|
|
|
113
|
+
| `--config` | `-c` | Path to mkdocs.yml (default: mkdocs.yml) |
|
|
114
|
+
| `--force` | `-f` | Overwrite existing llmstxt section |
|
|
115
|
+
| `--quiet` | `-q` | Suppress output |
|
|
116
|
+
| `--verbose` | `-v` | Show detailed progress |
|
|
117
|
+
|
|
118
|
+
### validate
|
|
119
|
+
|
|
120
|
+
Check that a config file is valid:
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
$ llmstxt-standalone validate
|
|
124
|
+
Config valid: mkdocs.yml
|
|
125
|
+
Site: My Project
|
|
126
|
+
Sections: 3
|
|
127
|
+
Pages: 12
|
|
128
|
+
|
|
129
|
+
# Exit code only (for scripts)
|
|
130
|
+
llmstxt-standalone validate --quiet
|
|
131
|
+
|
|
132
|
+
# Show section details
|
|
133
|
+
llmstxt-standalone validate --verbose
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
| Option | Short | Description |
|
|
137
|
+
|--------|-------|-------------|
|
|
138
|
+
| `--config` | `-c` | Path to mkdocs.yml (default: mkdocs.yml) |
|
|
139
|
+
| `--quiet` | `-q` | Suppress output |
|
|
140
|
+
| `--verbose` | `-v` | Show detailed config information |
|
|
141
|
+
|
|
142
|
+
### Global options
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
llmstxt-standalone --version # Show version
|
|
146
|
+
llmstxt-standalone --help # Show available commands
|
|
147
|
+
```
|
|
88
148
|
|
|
89
149
|
## Output
|
|
90
150
|
|
|
91
|
-
The
|
|
151
|
+
The `build` command generates three outputs:
|
|
92
152
|
|
|
93
153
|
1. `llms.txt` — an index file with markdown links to all pages
|
|
94
154
|
1. `llms-full.txt` — concatenated content of all pages
|
|
@@ -28,21 +28,25 @@ uv add llmstxt-standalone # or: pip install
|
|
|
28
28
|
|
|
29
29
|
## Usage
|
|
30
30
|
|
|
31
|
+
### build
|
|
32
|
+
|
|
33
|
+
Generate llms.txt from a built MkDocs site:
|
|
34
|
+
|
|
31
35
|
```bash
|
|
32
36
|
# Run from project root (expects mkdocs.yml and site/)
|
|
33
|
-
llmstxt-standalone
|
|
37
|
+
llmstxt-standalone build
|
|
34
38
|
|
|
35
39
|
# Explicit paths
|
|
36
|
-
llmstxt-standalone --config mkdocs.yml --site-dir ./build --output-dir ./dist
|
|
40
|
+
llmstxt-standalone build --config mkdocs.yml --site-dir ./build --output-dir ./dist
|
|
37
41
|
|
|
38
42
|
# Preview without writing files
|
|
39
|
-
llmstxt-standalone --dry-run
|
|
43
|
+
llmstxt-standalone build --dry-run
|
|
40
44
|
|
|
41
45
|
# Suppress output
|
|
42
|
-
llmstxt-standalone --quiet
|
|
46
|
+
llmstxt-standalone build --quiet
|
|
43
47
|
|
|
44
48
|
# Show detailed progress
|
|
45
|
-
llmstxt-standalone --verbose
|
|
49
|
+
llmstxt-standalone build --verbose
|
|
46
50
|
```
|
|
47
51
|
|
|
48
52
|
| Option | Short | Default | Description |
|
|
@@ -53,11 +57,65 @@ llmstxt-standalone --verbose
|
|
|
53
57
|
| `--dry-run` | `-n` | | Preview without writing |
|
|
54
58
|
| `--quiet` | `-q` | | Suppress output |
|
|
55
59
|
| `--verbose` | `-v` | | Show detailed progress |
|
|
56
|
-
|
|
60
|
+
|
|
61
|
+
### init
|
|
62
|
+
|
|
63
|
+
Add llmstxt plugin configuration to an existing mkdocs.yml:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
llmstxt-standalone init
|
|
67
|
+
|
|
68
|
+
# Specify config path
|
|
69
|
+
llmstxt-standalone init --config path/to/mkdocs.yml
|
|
70
|
+
|
|
71
|
+
# Overwrite existing llmstxt config
|
|
72
|
+
llmstxt-standalone init --force
|
|
73
|
+
|
|
74
|
+
# Show detailed progress
|
|
75
|
+
llmstxt-standalone init --verbose
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
| Option | Short | Description |
|
|
79
|
+
|--------|-------|-------------|
|
|
80
|
+
| `--config` | `-c` | Path to mkdocs.yml (default: mkdocs.yml) |
|
|
81
|
+
| `--force` | `-f` | Overwrite existing llmstxt section |
|
|
82
|
+
| `--quiet` | `-q` | Suppress output |
|
|
83
|
+
| `--verbose` | `-v` | Show detailed progress |
|
|
84
|
+
|
|
85
|
+
### validate
|
|
86
|
+
|
|
87
|
+
Check that a config file is valid:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
$ llmstxt-standalone validate
|
|
91
|
+
Config valid: mkdocs.yml
|
|
92
|
+
Site: My Project
|
|
93
|
+
Sections: 3
|
|
94
|
+
Pages: 12
|
|
95
|
+
|
|
96
|
+
# Exit code only (for scripts)
|
|
97
|
+
llmstxt-standalone validate --quiet
|
|
98
|
+
|
|
99
|
+
# Show section details
|
|
100
|
+
llmstxt-standalone validate --verbose
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
| Option | Short | Description |
|
|
104
|
+
|--------|-------|-------------|
|
|
105
|
+
| `--config` | `-c` | Path to mkdocs.yml (default: mkdocs.yml) |
|
|
106
|
+
| `--quiet` | `-q` | Suppress output |
|
|
107
|
+
| `--verbose` | `-v` | Show detailed config information |
|
|
108
|
+
|
|
109
|
+
### Global options
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
llmstxt-standalone --version # Show version
|
|
113
|
+
llmstxt-standalone --help # Show available commands
|
|
114
|
+
```
|
|
57
115
|
|
|
58
116
|
## Output
|
|
59
117
|
|
|
60
|
-
The
|
|
118
|
+
The `build` command generates three outputs:
|
|
61
119
|
|
|
62
120
|
1. `llms.txt` — an index file with markdown links to all pages
|
|
63
121
|
1. `llms-full.txt` — concatenated content of all pages
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "llmstxt-standalone"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.2.0"
|
|
4
4
|
description = "Generate llms.txt from built HTML documentation"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.10"
|
|
@@ -26,10 +26,12 @@ authors = [{ name = "Shaan Majid", email = "shaanmajid64@gmail.com" }]
|
|
|
26
26
|
dependencies = [
|
|
27
27
|
"typer>=0.9.0",
|
|
28
28
|
"pyyaml>=6.0",
|
|
29
|
+
"ruamel.yaml>=0.18",
|
|
29
30
|
"beautifulsoup4>=4.12",
|
|
30
31
|
"markdownify>=0.14,<2.0",
|
|
31
32
|
"mdformat>=0.7,<2.0",
|
|
32
33
|
"mdformat-tables>=1.0",
|
|
34
|
+
"pydantic>=2.12.5",
|
|
33
35
|
]
|
|
34
36
|
|
|
35
37
|
[project.scripts]
|
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
"""Command-line interface."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Annotated
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
import yaml
|
|
11
|
+
from ruamel.yaml import YAML
|
|
12
|
+
from ruamel.yaml import YAMLError as RuamelYAMLError
|
|
13
|
+
|
|
14
|
+
from llmstxt_standalone import __version__
|
|
15
|
+
from llmstxt_standalone.config import load_config
|
|
16
|
+
from llmstxt_standalone.generate import (
|
|
17
|
+
build_llms_output,
|
|
18
|
+
ensure_safe_md_path,
|
|
19
|
+
write_markdown_files,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _make_logger(
|
|
24
|
+
quiet: bool, verbose: bool = False
|
|
25
|
+
) -> tuple[Callable[..., None], Callable[..., None]]:
|
|
26
|
+
"""Create log and log_verbose functions for CLI output.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
quiet: If True, suppress all output.
|
|
30
|
+
verbose: If True, enable verbose logging (quiet overrides this).
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Tuple of (log, log_verbose) functions.
|
|
34
|
+
"""
|
|
35
|
+
effective_verbose = verbose and not quiet
|
|
36
|
+
|
|
37
|
+
def log(msg: str, color: str = "green", err: bool = False) -> None:
|
|
38
|
+
if not quiet:
|
|
39
|
+
typer.secho(msg, fg=color, err=err)
|
|
40
|
+
|
|
41
|
+
def log_verbose(msg: str, color: str = "green", err: bool = False) -> None:
|
|
42
|
+
if effective_verbose:
|
|
43
|
+
typer.secho(msg, fg=color, err=err)
|
|
44
|
+
|
|
45
|
+
return log, log_verbose
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def version_callback(value: bool) -> None:
|
|
49
|
+
"""Print version and exit if --version flag is set."""
|
|
50
|
+
if value:
|
|
51
|
+
typer.echo(f"llmstxt-standalone {__version__}")
|
|
52
|
+
raise typer.Exit()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
app = typer.Typer(
|
|
56
|
+
help="Generate llms.txt from built HTML documentation.",
|
|
57
|
+
no_args_is_help=True,
|
|
58
|
+
context_settings={"help_option_names": ["-h", "--help"]},
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@app.callback(invoke_without_command=True)
|
|
63
|
+
def main(
|
|
64
|
+
version: Annotated[
|
|
65
|
+
bool,
|
|
66
|
+
typer.Option(
|
|
67
|
+
"--version",
|
|
68
|
+
"-V",
|
|
69
|
+
callback=version_callback,
|
|
70
|
+
is_eager=True,
|
|
71
|
+
help="Show version and exit",
|
|
72
|
+
),
|
|
73
|
+
] = False,
|
|
74
|
+
) -> None:
|
|
75
|
+
"""Generate llms.txt from built HTML documentation."""
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@app.command()
|
|
79
|
+
def build(
|
|
80
|
+
config: Annotated[
|
|
81
|
+
Path,
|
|
82
|
+
typer.Option("--config", "-c", help="Path to mkdocs.yml config file"),
|
|
83
|
+
] = Path("mkdocs.yml"),
|
|
84
|
+
site_dir: Annotated[
|
|
85
|
+
Path,
|
|
86
|
+
typer.Option("--site-dir", "-s", help="Path to built HTML site directory"),
|
|
87
|
+
] = Path("site"),
|
|
88
|
+
output_dir: Annotated[
|
|
89
|
+
Path | None,
|
|
90
|
+
typer.Option(
|
|
91
|
+
"--output-dir", "-o", help="Output directory (defaults to site-dir)"
|
|
92
|
+
),
|
|
93
|
+
] = None,
|
|
94
|
+
dry_run: Annotated[
|
|
95
|
+
bool,
|
|
96
|
+
typer.Option(
|
|
97
|
+
"--dry-run",
|
|
98
|
+
"-n",
|
|
99
|
+
help="Preview what would be generated without writing files",
|
|
100
|
+
),
|
|
101
|
+
] = False,
|
|
102
|
+
quiet: Annotated[
|
|
103
|
+
bool,
|
|
104
|
+
typer.Option("--quiet", "-q", help="Suppress output (exit code only)"),
|
|
105
|
+
] = False,
|
|
106
|
+
verbose: Annotated[
|
|
107
|
+
bool,
|
|
108
|
+
typer.Option("--verbose", "-v", help="Show detailed progress"),
|
|
109
|
+
] = False,
|
|
110
|
+
) -> None:
|
|
111
|
+
"""Generate llms.txt and llms-full.txt from built MkDocs site."""
|
|
112
|
+
# Resolve output directory
|
|
113
|
+
out_dir = output_dir or site_dir
|
|
114
|
+
log, log_verbose = _make_logger(quiet, verbose)
|
|
115
|
+
|
|
116
|
+
# Validate inputs
|
|
117
|
+
if not config.exists():
|
|
118
|
+
log(f"Error: Config file not found: {config}", color="red", err=True)
|
|
119
|
+
raise typer.Exit(1)
|
|
120
|
+
|
|
121
|
+
if not site_dir.exists():
|
|
122
|
+
log(f"Error: Site directory not found: {site_dir}", color="red", err=True)
|
|
123
|
+
log(
|
|
124
|
+
"Hint: Run 'mkdocs build' first to generate the HTML documentation.",
|
|
125
|
+
color="yellow",
|
|
126
|
+
err=True,
|
|
127
|
+
)
|
|
128
|
+
raise typer.Exit(1)
|
|
129
|
+
|
|
130
|
+
# Load config
|
|
131
|
+
try:
|
|
132
|
+
cfg = load_config(config)
|
|
133
|
+
except (FileNotFoundError, ValueError, yaml.YAMLError) as e:
|
|
134
|
+
log(f"Error loading config: {e}", color="red", err=True)
|
|
135
|
+
raise typer.Exit(1) from None
|
|
136
|
+
|
|
137
|
+
# Validate sections
|
|
138
|
+
if not cfg.sections:
|
|
139
|
+
log("Error: No sections configured.", color="red", err=True)
|
|
140
|
+
log(
|
|
141
|
+
"Add a 'nav' to your mkdocs.yml, or configure 'sections' "
|
|
142
|
+
"in the llmstxt plugin.",
|
|
143
|
+
color="yellow",
|
|
144
|
+
err=True,
|
|
145
|
+
)
|
|
146
|
+
raise typer.Exit(1)
|
|
147
|
+
|
|
148
|
+
log_verbose(f"Site: {cfg.site_name}")
|
|
149
|
+
log_verbose(f"Sections: {list(cfg.sections.keys())}")
|
|
150
|
+
if dry_run:
|
|
151
|
+
log_verbose("Dry run - no files will be written")
|
|
152
|
+
|
|
153
|
+
# Generate content
|
|
154
|
+
llms_build = build_llms_output(
|
|
155
|
+
config=cfg,
|
|
156
|
+
site_dir=site_dir,
|
|
157
|
+
)
|
|
158
|
+
try:
|
|
159
|
+
markdown_files = write_markdown_files(
|
|
160
|
+
llms_build.pages,
|
|
161
|
+
output_dir=out_dir,
|
|
162
|
+
use_directory_urls=cfg.use_directory_urls,
|
|
163
|
+
dry_run=dry_run,
|
|
164
|
+
)
|
|
165
|
+
except (OSError, ValueError) as exc:
|
|
166
|
+
log(f"Error writing markdown files: {exc}", color="red", err=True)
|
|
167
|
+
raise typer.Exit(1) from None
|
|
168
|
+
|
|
169
|
+
# Define output paths
|
|
170
|
+
llms_path = out_dir / "llms.txt"
|
|
171
|
+
try:
|
|
172
|
+
full_output_path = ensure_safe_md_path(cfg.full_output)
|
|
173
|
+
except ValueError:
|
|
174
|
+
log(
|
|
175
|
+
"Error: Invalid full_output: must be a relative path without '..'",
|
|
176
|
+
color="red",
|
|
177
|
+
err=True,
|
|
178
|
+
)
|
|
179
|
+
raise typer.Exit(1) from None
|
|
180
|
+
full_path = out_dir / full_output_path
|
|
181
|
+
|
|
182
|
+
# Write output files (skip in dry-run mode)
|
|
183
|
+
if dry_run:
|
|
184
|
+
action = "Would generate"
|
|
185
|
+
color = "yellow"
|
|
186
|
+
else:
|
|
187
|
+
action = "Generated"
|
|
188
|
+
color = "green"
|
|
189
|
+
try:
|
|
190
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
191
|
+
llms_path.write_text(llms_build.llms_txt, encoding="utf-8")
|
|
192
|
+
full_path.write_text(llms_build.llms_full_txt, encoding="utf-8")
|
|
193
|
+
except OSError as exc:
|
|
194
|
+
log(f"Error writing output files: {exc}", color="red", err=True)
|
|
195
|
+
raise typer.Exit(1) from None
|
|
196
|
+
|
|
197
|
+
log(f"{action} {llms_path} ({len(llms_build.llms_txt):,} bytes)", color)
|
|
198
|
+
log(f"{action} {full_path} ({len(llms_build.llms_full_txt):,} bytes)", color)
|
|
199
|
+
log(f"{action} {len(markdown_files)} markdown files", color)
|
|
200
|
+
|
|
201
|
+
if llms_build.skipped:
|
|
202
|
+
log_verbose("Skipped files:", color="yellow", err=True)
|
|
203
|
+
for path, reason in llms_build.skipped:
|
|
204
|
+
log_verbose(f"- {path} ({reason})", color="yellow", err=True)
|
|
205
|
+
|
|
206
|
+
if llms_build.warnings:
|
|
207
|
+
log("Warnings:", color="yellow", err=True)
|
|
208
|
+
for warning in llms_build.warnings:
|
|
209
|
+
log(f"- {warning}", color="yellow", err=True)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
@app.command()
|
|
213
|
+
def init(
|
|
214
|
+
config: Annotated[
|
|
215
|
+
Path,
|
|
216
|
+
typer.Option("--config", "-c", help="Path to mkdocs.yml config file"),
|
|
217
|
+
] = Path("mkdocs.yml"),
|
|
218
|
+
force: Annotated[
|
|
219
|
+
bool,
|
|
220
|
+
typer.Option("--force", "-f", help="Overwrite existing llmstxt section"),
|
|
221
|
+
] = False,
|
|
222
|
+
quiet: Annotated[
|
|
223
|
+
bool,
|
|
224
|
+
typer.Option("--quiet", "-q", help="Suppress output (exit code only)"),
|
|
225
|
+
] = False,
|
|
226
|
+
verbose: Annotated[
|
|
227
|
+
bool,
|
|
228
|
+
typer.Option("--verbose", "-v", help="Show detailed progress"),
|
|
229
|
+
] = False,
|
|
230
|
+
) -> None:
|
|
231
|
+
"""Add llmstxt plugin config to mkdocs.yml."""
|
|
232
|
+
log, log_verbose = _make_logger(quiet, verbose)
|
|
233
|
+
|
|
234
|
+
if not config.exists():
|
|
235
|
+
log(f"Error: Config file not found: {config}", color="red", err=True)
|
|
236
|
+
log(
|
|
237
|
+
"Create one first or specify path with --config.",
|
|
238
|
+
color="yellow",
|
|
239
|
+
err=True,
|
|
240
|
+
)
|
|
241
|
+
raise typer.Exit(1)
|
|
242
|
+
|
|
243
|
+
yaml_parser = YAML()
|
|
244
|
+
yaml_parser.preserve_quotes = True
|
|
245
|
+
|
|
246
|
+
try:
|
|
247
|
+
with open(config, encoding="utf-8") as f:
|
|
248
|
+
data = yaml_parser.load(f)
|
|
249
|
+
except RuamelYAMLError as e:
|
|
250
|
+
log(f"Error: Invalid YAML: {e}", color="red", err=True)
|
|
251
|
+
raise typer.Exit(1) from None
|
|
252
|
+
|
|
253
|
+
if data is None:
|
|
254
|
+
data = {}
|
|
255
|
+
|
|
256
|
+
# Check for existing llmstxt plugin
|
|
257
|
+
plugins = data.get("plugins", [])
|
|
258
|
+
if plugins is None:
|
|
259
|
+
plugins = []
|
|
260
|
+
if not isinstance(plugins, (list, dict)):
|
|
261
|
+
log(
|
|
262
|
+
"Error: 'plugins' must be a list or mapping in mkdocs.yml.",
|
|
263
|
+
color="red",
|
|
264
|
+
err=True,
|
|
265
|
+
)
|
|
266
|
+
raise typer.Exit(1)
|
|
267
|
+
data["plugins"] = plugins
|
|
268
|
+
|
|
269
|
+
if isinstance(plugins, list):
|
|
270
|
+
has_llmstxt = any(
|
|
271
|
+
p == "llmstxt" or (isinstance(p, dict) and "llmstxt" in p) for p in plugins
|
|
272
|
+
)
|
|
273
|
+
elif isinstance(plugins, dict):
|
|
274
|
+
has_llmstxt = "llmstxt" in plugins
|
|
275
|
+
else:
|
|
276
|
+
has_llmstxt = False
|
|
277
|
+
|
|
278
|
+
if has_llmstxt and not force:
|
|
279
|
+
log("Error: llmstxt plugin already configured.", color="red", err=True)
|
|
280
|
+
log(
|
|
281
|
+
"Use --force to overwrite existing configuration.",
|
|
282
|
+
color="yellow",
|
|
283
|
+
err=True,
|
|
284
|
+
)
|
|
285
|
+
raise typer.Exit(1)
|
|
286
|
+
|
|
287
|
+
# Remove existing llmstxt if force is set
|
|
288
|
+
if has_llmstxt and force:
|
|
289
|
+
if isinstance(plugins, list):
|
|
290
|
+
plugins = [
|
|
291
|
+
p
|
|
292
|
+
for p in plugins
|
|
293
|
+
if p != "llmstxt" and not (isinstance(p, dict) and "llmstxt" in p)
|
|
294
|
+
]
|
|
295
|
+
data["plugins"] = plugins
|
|
296
|
+
elif isinstance(plugins, dict):
|
|
297
|
+
del plugins["llmstxt"]
|
|
298
|
+
|
|
299
|
+
# Create the llmstxt plugin entry with commented example
|
|
300
|
+
llmstxt_entry = {
|
|
301
|
+
"llmstxt": {
|
|
302
|
+
# We'll add comments after writing
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
if isinstance(data["plugins"], list):
|
|
307
|
+
data["plugins"].append(llmstxt_entry)
|
|
308
|
+
else:
|
|
309
|
+
# Preserve dict-style plugins
|
|
310
|
+
data["plugins"]["llmstxt"] = {}
|
|
311
|
+
|
|
312
|
+
# Write the file
|
|
313
|
+
try:
|
|
314
|
+
with open(config, "w", encoding="utf-8") as f:
|
|
315
|
+
yaml_parser.dump(data, f)
|
|
316
|
+
except PermissionError:
|
|
317
|
+
log(f"Error: Permission denied writing to {config}", color="red", err=True)
|
|
318
|
+
raise typer.Exit(1) from None
|
|
319
|
+
|
|
320
|
+
# Now add comments using string manipulation since ruamel.yaml comment API is complex
|
|
321
|
+
content = config.read_text(encoding="utf-8")
|
|
322
|
+
ends_with_newline = content.endswith("\n")
|
|
323
|
+
|
|
324
|
+
# Find the llmstxt entry and add commented example below it
|
|
325
|
+
commented_example_lines = [
|
|
326
|
+
"# markdown_description: |",
|
|
327
|
+
"# Additional context for LLMs.",
|
|
328
|
+
"# sections:",
|
|
329
|
+
"# Getting Started:",
|
|
330
|
+
"# - index.md",
|
|
331
|
+
]
|
|
332
|
+
|
|
333
|
+
def _comment_indent(line: str) -> int:
|
|
334
|
+
leading = len(line) - len(line.lstrip(" "))
|
|
335
|
+
if line.lstrip().startswith("- "):
|
|
336
|
+
return leading + 4
|
|
337
|
+
return leading + 2
|
|
338
|
+
|
|
339
|
+
def _format_commented_example(indent: int) -> list[str]:
|
|
340
|
+
prefix = " " * indent
|
|
341
|
+
return [f"{prefix}{line}" for line in commented_example_lines]
|
|
342
|
+
|
|
343
|
+
# Look for the llmstxt entry and add commented example below it
|
|
344
|
+
lines = content.splitlines()
|
|
345
|
+
new_lines: list[str] = []
|
|
346
|
+
inserted = False
|
|
347
|
+
for line in lines:
|
|
348
|
+
stripped = line.strip()
|
|
349
|
+
if not inserted and stripped == "llmstxt: {}":
|
|
350
|
+
indent = _comment_indent(line)
|
|
351
|
+
new_lines.append(line.replace("llmstxt: {}", "llmstxt:"))
|
|
352
|
+
new_lines.extend(_format_commented_example(indent))
|
|
353
|
+
inserted = True
|
|
354
|
+
continue
|
|
355
|
+
if not inserted and stripped == "llmstxt:":
|
|
356
|
+
indent = _comment_indent(line)
|
|
357
|
+
new_lines.append(line)
|
|
358
|
+
new_lines.extend(_format_commented_example(indent))
|
|
359
|
+
inserted = True
|
|
360
|
+
continue
|
|
361
|
+
new_lines.append(line)
|
|
362
|
+
content = "\n".join(new_lines)
|
|
363
|
+
if ends_with_newline:
|
|
364
|
+
content += "\n"
|
|
365
|
+
|
|
366
|
+
try:
|
|
367
|
+
config.write_text(content, encoding="utf-8")
|
|
368
|
+
except PermissionError:
|
|
369
|
+
log(f"Error: Permission denied writing to {config}", color="red", err=True)
|
|
370
|
+
raise typer.Exit(1) from None
|
|
371
|
+
|
|
372
|
+
log(f"Added llmstxt plugin to {config}")
|
|
373
|
+
log_verbose(
|
|
374
|
+
"Configuration includes commented example for sections and markdown_description"
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
@app.command()
|
|
379
|
+
def validate(
|
|
380
|
+
config: Annotated[
|
|
381
|
+
Path,
|
|
382
|
+
typer.Option("--config", "-c", help="Path to mkdocs.yml config file"),
|
|
383
|
+
] = Path("mkdocs.yml"),
|
|
384
|
+
quiet: Annotated[
|
|
385
|
+
bool,
|
|
386
|
+
typer.Option("--quiet", "-q", help="Suppress output (exit code only)"),
|
|
387
|
+
] = False,
|
|
388
|
+
verbose: Annotated[
|
|
389
|
+
bool,
|
|
390
|
+
typer.Option("--verbose", "-v", help="Show detailed config information"),
|
|
391
|
+
] = False,
|
|
392
|
+
) -> None:
|
|
393
|
+
"""Check config file validity."""
|
|
394
|
+
log, log_verbose = _make_logger(quiet, verbose)
|
|
395
|
+
|
|
396
|
+
try:
|
|
397
|
+
cfg = load_config(config)
|
|
398
|
+
except FileNotFoundError:
|
|
399
|
+
log(f"Config invalid: {config}", color="red", err=True)
|
|
400
|
+
log(f" Error: File not found: {config}", color="red", err=True)
|
|
401
|
+
raise typer.Exit(1) from None
|
|
402
|
+
except (ValueError, yaml.YAMLError) as e:
|
|
403
|
+
log(f"Config invalid: {config}", color="red", err=True)
|
|
404
|
+
log(f" Error: {e}", color="red", err=True)
|
|
405
|
+
raise typer.Exit(1) from None
|
|
406
|
+
|
|
407
|
+
total_pages = sum(len(pages) for pages in cfg.sections.values())
|
|
408
|
+
|
|
409
|
+
log(f"Config valid: {config}")
|
|
410
|
+
log(f" Site: {cfg.site_name}")
|
|
411
|
+
log(f" Sections: {len(cfg.sections)}")
|
|
412
|
+
log(f" Pages: {total_pages}")
|
|
413
|
+
|
|
414
|
+
# Verbose: show section details
|
|
415
|
+
for section_name, pages in cfg.sections.items():
|
|
416
|
+
log_verbose(f" {section_name}: {len(pages)} pages")
|
|
417
|
+
for page in pages:
|
|
418
|
+
log_verbose(f" - {page}")
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
if __name__ == "__main__":
|
|
422
|
+
app()
|
{llmstxt_standalone-0.1.0 → llmstxt_standalone-0.2.0}/src/llmstxt_standalone/config/derive.py
RENAMED
|
@@ -10,10 +10,13 @@ def nav_to_sections(nav: list[Any]) -> dict[str, list[str]]:
|
|
|
10
10
|
sections: dict[str, list[str]] = {}
|
|
11
11
|
|
|
12
12
|
for item in nav:
|
|
13
|
-
if isinstance(item,
|
|
13
|
+
if isinstance(item, str):
|
|
14
|
+
# Bare top-level page: - index.md
|
|
15
|
+
sections.setdefault("Pages", []).append(item)
|
|
16
|
+
elif isinstance(item, dict):
|
|
14
17
|
for key, value in item.items():
|
|
15
18
|
if isinstance(value, str):
|
|
16
|
-
# Top-level page
|
|
19
|
+
# Top-level page with title: - Home: index.md
|
|
17
20
|
sections.setdefault("Pages", []).append(value)
|
|
18
21
|
elif isinstance(value, list):
|
|
19
22
|
# Section with children
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""Configuration loading from mkdocs.yml."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
from pydantic import BaseModel, Field, ValidationError, field_validator
|
|
10
|
+
|
|
11
|
+
from llmstxt_standalone.config.derive import nav_to_sections
|
|
12
|
+
from llmstxt_standalone.config.model import Config
|
|
13
|
+
from llmstxt_standalone.config.plugin import get_llmstxt_config
|
|
14
|
+
|
|
15
|
+
DEFAULT_SITE_NAME = "Documentation"
|
|
16
|
+
DEFAULT_FULL_OUTPUT = "llms-full.txt"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class _PermissiveLoader(yaml.SafeLoader):
|
|
20
|
+
"""SafeLoader that ignores unknown Python tags.
|
|
21
|
+
|
|
22
|
+
MkDocs extensions like pymdownx.slugs use Python-specific YAML tags
|
|
23
|
+
like !python/object/apply which SafeLoader rejects. This loader
|
|
24
|
+
treats them as raw strings to allow parsing the rest of the config.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _ignore_unknown(loader: yaml.Loader, tag_suffix: str, node: yaml.Node) -> str:
|
|
29
|
+
"""Return the raw tag as a placeholder string."""
|
|
30
|
+
return f"<{node.tag}>"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Register handler for all Python tags (both full and shorthand forms)
|
|
34
|
+
_PermissiveLoader.add_multi_constructor("tag:yaml.org,2002:python/", _ignore_unknown)
|
|
35
|
+
_PermissiveLoader.add_multi_constructor("!python/", _ignore_unknown)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class LlmstxtPluginConfig(BaseModel):
|
|
39
|
+
"""Pydantic model for llmstxt plugin configuration."""
|
|
40
|
+
|
|
41
|
+
markdown_description: str = ""
|
|
42
|
+
full_output: str = DEFAULT_FULL_OUTPUT
|
|
43
|
+
content_selector: str | None = None
|
|
44
|
+
sections: dict[str, list[str]] = Field(default_factory=dict)
|
|
45
|
+
|
|
46
|
+
@field_validator("sections", mode="before")
|
|
47
|
+
@classmethod
|
|
48
|
+
def validate_sections(cls, v: Any) -> dict[str, list[str]]:
|
|
49
|
+
"""Validate sections is a dict with string keys and list[str] values."""
|
|
50
|
+
if v is None:
|
|
51
|
+
return {}
|
|
52
|
+
if not isinstance(v, dict):
|
|
53
|
+
raise ValueError(f"'sections' must be a mapping, got {type(v).__name__}")
|
|
54
|
+
for section_name, pages in v.items():
|
|
55
|
+
if not isinstance(section_name, str):
|
|
56
|
+
raise ValueError(
|
|
57
|
+
f"'sections' keys must be strings, got {type(section_name).__name__}"
|
|
58
|
+
)
|
|
59
|
+
if not isinstance(pages, list):
|
|
60
|
+
raise ValueError(
|
|
61
|
+
f"'sections.{section_name}' must be a list of strings, "
|
|
62
|
+
f"got {type(pages).__name__}"
|
|
63
|
+
)
|
|
64
|
+
for page in pages:
|
|
65
|
+
if not isinstance(page, str):
|
|
66
|
+
raise ValueError(
|
|
67
|
+
f"'sections.{section_name}' entries must be strings, "
|
|
68
|
+
f"got {type(page).__name__}"
|
|
69
|
+
)
|
|
70
|
+
return v
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class MkDocsConfig(BaseModel):
|
|
74
|
+
"""Pydantic model for mkdocs.yml top-level fields we care about."""
|
|
75
|
+
|
|
76
|
+
site_name: str = DEFAULT_SITE_NAME
|
|
77
|
+
site_description: str = ""
|
|
78
|
+
site_url: str = ""
|
|
79
|
+
nav: list[Any] = Field(default_factory=list)
|
|
80
|
+
use_directory_urls: bool = True
|
|
81
|
+
|
|
82
|
+
@field_validator("site_name", mode="before")
|
|
83
|
+
@classmethod
|
|
84
|
+
def coerce_site_name(cls, v: Any) -> str:
|
|
85
|
+
"""Coerce None to default."""
|
|
86
|
+
return v if v is not None else DEFAULT_SITE_NAME
|
|
87
|
+
|
|
88
|
+
@field_validator("site_description", "site_url", mode="before")
|
|
89
|
+
@classmethod
|
|
90
|
+
def coerce_str_fields(cls, v: Any) -> str:
|
|
91
|
+
"""Coerce None to empty string."""
|
|
92
|
+
return v if v is not None else ""
|
|
93
|
+
|
|
94
|
+
@field_validator("nav", mode="before")
|
|
95
|
+
@classmethod
|
|
96
|
+
def coerce_nav(cls, v: Any) -> list[Any]:
|
|
97
|
+
"""Coerce None to empty list."""
|
|
98
|
+
return v if v is not None else []
|
|
99
|
+
|
|
100
|
+
@field_validator("site_url", mode="after")
|
|
101
|
+
@classmethod
|
|
102
|
+
def strip_trailing_slash(cls, v: str) -> str:
|
|
103
|
+
"""Remove trailing slash from site_url."""
|
|
104
|
+
return v.rstrip("/")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def load_config(config_path: Path) -> Config:
|
|
108
|
+
"""Load and resolve configuration from mkdocs.yml.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
config_path: Path to mkdocs.yml file.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
Resolved Config object.
|
|
115
|
+
|
|
116
|
+
Raises:
|
|
117
|
+
FileNotFoundError: If config file doesn't exist.
|
|
118
|
+
ValueError: If config is invalid.
|
|
119
|
+
"""
|
|
120
|
+
if not config_path.exists():
|
|
121
|
+
raise FileNotFoundError(f"Config file not found: {config_path}")
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
with open(config_path, encoding="utf-8") as f:
|
|
125
|
+
raw = yaml.load(f, Loader=_PermissiveLoader)
|
|
126
|
+
except RecursionError:
|
|
127
|
+
raise ValueError(
|
|
128
|
+
f"Config file has nav structure too deeply nested: {config_path}"
|
|
129
|
+
) from None
|
|
130
|
+
|
|
131
|
+
if not isinstance(raw, dict):
|
|
132
|
+
raise ValueError(f"Config file must be a mapping: {config_path}")
|
|
133
|
+
|
|
134
|
+
return _config_from_mkdocs(raw)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _config_from_mkdocs(raw: dict[str, Any]) -> Config:
|
|
138
|
+
"""Build a Config from a parsed mkdocs.yml mapping."""
|
|
139
|
+
try:
|
|
140
|
+
mkdocs = MkDocsConfig.model_validate(raw)
|
|
141
|
+
except ValidationError as e:
|
|
142
|
+
raise ValueError(str(e)) from None
|
|
143
|
+
|
|
144
|
+
llmstxt_raw = get_llmstxt_config(raw)
|
|
145
|
+
|
|
146
|
+
if llmstxt_raw is not None:
|
|
147
|
+
try:
|
|
148
|
+
plugin = LlmstxtPluginConfig.model_validate(llmstxt_raw)
|
|
149
|
+
except ValidationError as e:
|
|
150
|
+
# Extract the core error message for cleaner output
|
|
151
|
+
raise ValueError(f"llmstxt {e.errors()[0]['msg']}") from None
|
|
152
|
+
sections = plugin.sections
|
|
153
|
+
markdown_description = plugin.markdown_description
|
|
154
|
+
full_output = plugin.full_output
|
|
155
|
+
content_selector = plugin.content_selector
|
|
156
|
+
else:
|
|
157
|
+
sections = nav_to_sections(mkdocs.nav)
|
|
158
|
+
markdown_description = ""
|
|
159
|
+
full_output = DEFAULT_FULL_OUTPUT
|
|
160
|
+
content_selector = None
|
|
161
|
+
|
|
162
|
+
return Config(
|
|
163
|
+
site_name=mkdocs.site_name,
|
|
164
|
+
site_description=mkdocs.site_description,
|
|
165
|
+
site_url=mkdocs.site_url,
|
|
166
|
+
markdown_description=markdown_description,
|
|
167
|
+
full_output=full_output,
|
|
168
|
+
content_selector=content_selector,
|
|
169
|
+
sections=sections,
|
|
170
|
+
nav=mkdocs.nav,
|
|
171
|
+
use_directory_urls=mkdocs.use_directory_urls,
|
|
172
|
+
)
|
{llmstxt_standalone-0.1.0 → llmstxt_standalone-0.2.0}/src/llmstxt_standalone/config/model.py
RENAMED
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from dataclasses import dataclass
|
|
6
5
|
from typing import Any
|
|
7
6
|
|
|
7
|
+
from pydantic import BaseModel
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
class Config:
|
|
9
|
+
|
|
10
|
+
class Config(BaseModel):
|
|
11
11
|
"""Resolved configuration for llmstxt generation."""
|
|
12
12
|
|
|
13
13
|
site_name: str
|
|
@@ -6,6 +6,11 @@ import mdformat
|
|
|
6
6
|
from bs4 import BeautifulSoup, NavigableString, Tag
|
|
7
7
|
from markdownify import ATX, MarkdownConverter
|
|
8
8
|
|
|
9
|
+
__all__ = [
|
|
10
|
+
"extract_title_from_html",
|
|
11
|
+
"html_to_markdown",
|
|
12
|
+
]
|
|
13
|
+
|
|
9
14
|
|
|
10
15
|
def _should_remove(tag: Tag) -> bool:
|
|
11
16
|
"""Check if a tag should be removed during autoclean."""
|
|
@@ -34,9 +39,15 @@ def _autoclean(soup: BeautifulSoup | Tag) -> None:
|
|
|
34
39
|
for element in soup.find_all("table", attrs={"class": "highlighttable"}):
|
|
35
40
|
code = element.find("code")
|
|
36
41
|
if code:
|
|
37
|
-
|
|
38
|
-
|
|
42
|
+
# Find the root BeautifulSoup document to create new tags
|
|
43
|
+
# (soup parameter may be a Tag, which doesn't have new_tag)
|
|
44
|
+
doc = next(
|
|
45
|
+
(p for p in element.parents if isinstance(p, BeautifulSoup)), None
|
|
39
46
|
)
|
|
47
|
+
if doc:
|
|
48
|
+
pre_tag = doc.new_tag("pre")
|
|
49
|
+
pre_tag.string = code.get_text()
|
|
50
|
+
element.replace_with(pre_tag)
|
|
40
51
|
|
|
41
52
|
|
|
42
53
|
def _get_language(tag: Tag) -> str:
|
|
@@ -64,13 +75,14 @@ def _get_language(tag: Tag) -> str:
|
|
|
64
75
|
return ""
|
|
65
76
|
|
|
66
77
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
78
|
+
def _make_converter() -> MarkdownConverter:
|
|
79
|
+
"""Create a MarkdownConverter with mkdocs-llmstxt-compatible settings."""
|
|
80
|
+
return MarkdownConverter(
|
|
81
|
+
bullets="-",
|
|
82
|
+
code_language_callback=_get_language,
|
|
83
|
+
escape_underscores=False,
|
|
84
|
+
heading_style=ATX,
|
|
85
|
+
)
|
|
74
86
|
|
|
75
87
|
|
|
76
88
|
def extract_title_from_html(html: str, site_name: str | None = None) -> str | None:
|
|
@@ -148,5 +160,6 @@ def html_to_markdown(html: str, content_selector: str | None = None) -> str:
|
|
|
148
160
|
return ""
|
|
149
161
|
|
|
150
162
|
_autoclean(content)
|
|
151
|
-
|
|
163
|
+
converter = _make_converter()
|
|
164
|
+
md = converter.convert_soup(content)
|
|
152
165
|
return mdformat.text(md, options={"wrap": "no"}, extensions=("tables",))
|
|
@@ -8,6 +8,19 @@ from pathlib import Path
|
|
|
8
8
|
from llmstxt_standalone.config import Config
|
|
9
9
|
from llmstxt_standalone.convert import extract_title_from_html, html_to_markdown
|
|
10
10
|
|
|
11
|
+
__all__ = [
|
|
12
|
+
"BuildResult",
|
|
13
|
+
"GenerateResult",
|
|
14
|
+
"PageMarkdown",
|
|
15
|
+
"build_llms_output",
|
|
16
|
+
"ensure_safe_md_path",
|
|
17
|
+
"generate_llms_txt",
|
|
18
|
+
"md_path_to_html_path",
|
|
19
|
+
"md_path_to_output_md_path",
|
|
20
|
+
"md_path_to_page_url",
|
|
21
|
+
"write_markdown_files",
|
|
22
|
+
]
|
|
23
|
+
|
|
11
24
|
|
|
12
25
|
def _escape_markdown_link_text(text: str) -> str:
|
|
13
26
|
r"""Escape characters that break markdown link syntax.
|
|
@@ -32,7 +45,18 @@ def _is_index_md(md_path: str) -> bool:
|
|
|
32
45
|
return md_path == "index.md" or md_path.endswith("/index.md")
|
|
33
46
|
|
|
34
47
|
|
|
35
|
-
def
|
|
48
|
+
def ensure_safe_md_path(md_path: str) -> Path:
|
|
49
|
+
"""Validate and convert a markdown path to a safe Path object.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
md_path: Relative markdown file path (e.g., "install.md").
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Path object for the markdown file.
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
ValueError: If path is absolute or contains '..'.
|
|
59
|
+
"""
|
|
36
60
|
path = Path(md_path)
|
|
37
61
|
if path.is_absolute() or path.drive:
|
|
38
62
|
raise ValueError(f"Markdown path must be relative: {md_path}")
|
|
@@ -63,7 +87,7 @@ def md_path_to_html_path(
|
|
|
63
87
|
Path to the corresponding HTML file.
|
|
64
88
|
"""
|
|
65
89
|
# Handle index.md at any level (root or nested like foo/bar/index.md)
|
|
66
|
-
safe_md_path =
|
|
90
|
+
safe_md_path = ensure_safe_md_path(md_path)
|
|
67
91
|
if _is_index_md(md_path):
|
|
68
92
|
html_path = site_dir / safe_md_path.with_suffix(".html")
|
|
69
93
|
return _ensure_within_dir(site_dir, html_path, "HTML path")
|
|
@@ -117,7 +141,7 @@ def md_path_to_output_md_path(
|
|
|
117
141
|
Path where the markdown file should be written.
|
|
118
142
|
"""
|
|
119
143
|
# Handle index.md at any level (root or nested like foo/bar/index.md)
|
|
120
|
-
safe_md_path =
|
|
144
|
+
safe_md_path = ensure_safe_md_path(md_path)
|
|
121
145
|
if _is_index_md(md_path):
|
|
122
146
|
output_path = site_dir / safe_md_path
|
|
123
147
|
return _ensure_within_dir(site_dir, output_path, "Output path")
|
|
@@ -149,7 +173,12 @@ class BuildResult:
|
|
|
149
173
|
|
|
150
174
|
@dataclass
|
|
151
175
|
class GenerateResult:
|
|
152
|
-
"""Result of llms.txt generation with files written.
|
|
176
|
+
"""Result of llms.txt generation with files written.
|
|
177
|
+
|
|
178
|
+
Used by generate_llms_txt() for programmatic use cases that want
|
|
179
|
+
file writing handled automatically. The CLI uses BuildResult +
|
|
180
|
+
write_markdown_files() for more control over the write step.
|
|
181
|
+
"""
|
|
153
182
|
|
|
154
183
|
llms_txt: str
|
|
155
184
|
llms_full_txt: str
|
|
@@ -1,172 +0,0 @@
|
|
|
1
|
-
"""Command-line interface."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from typing import Annotated
|
|
7
|
-
|
|
8
|
-
import typer
|
|
9
|
-
|
|
10
|
-
from llmstxt_standalone import __version__
|
|
11
|
-
from llmstxt_standalone.config import load_config
|
|
12
|
-
from llmstxt_standalone.generate import build_llms_output, write_markdown_files
|
|
13
|
-
|
|
14
|
-
app = typer.Typer(
|
|
15
|
-
help="Generate llms.txt from built HTML documentation.",
|
|
16
|
-
no_args_is_help=False,
|
|
17
|
-
context_settings={"help_option_names": ["-h", "--help"]},
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def version_callback(value: bool) -> None:
|
|
22
|
-
"""Print version and exit if --version flag is set."""
|
|
23
|
-
if value:
|
|
24
|
-
typer.echo(f"llmstxt-standalone {__version__}")
|
|
25
|
-
raise typer.Exit()
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
@app.command()
|
|
29
|
-
def main(
|
|
30
|
-
config: Annotated[
|
|
31
|
-
Path,
|
|
32
|
-
typer.Option("--config", "-c", help="Path to mkdocs.yml config file"),
|
|
33
|
-
] = Path("mkdocs.yml"),
|
|
34
|
-
site_dir: Annotated[
|
|
35
|
-
Path,
|
|
36
|
-
typer.Option("--site-dir", "-s", help="Path to built HTML site directory"),
|
|
37
|
-
] = Path("site"),
|
|
38
|
-
output_dir: Annotated[
|
|
39
|
-
Path | None,
|
|
40
|
-
typer.Option(
|
|
41
|
-
"--output-dir", "-o", help="Output directory (defaults to site-dir)"
|
|
42
|
-
),
|
|
43
|
-
] = None,
|
|
44
|
-
dry_run: Annotated[
|
|
45
|
-
bool,
|
|
46
|
-
typer.Option(
|
|
47
|
-
"--dry-run",
|
|
48
|
-
"-n",
|
|
49
|
-
help="Preview what would be generated without writing files",
|
|
50
|
-
),
|
|
51
|
-
] = False,
|
|
52
|
-
quiet: Annotated[
|
|
53
|
-
bool,
|
|
54
|
-
typer.Option("--quiet", "-q", help="Suppress output (exit code only)"),
|
|
55
|
-
] = False,
|
|
56
|
-
verbose: Annotated[
|
|
57
|
-
bool,
|
|
58
|
-
typer.Option("--verbose", "-v", help="Show detailed progress"),
|
|
59
|
-
] = False,
|
|
60
|
-
version: Annotated[
|
|
61
|
-
bool,
|
|
62
|
-
typer.Option(
|
|
63
|
-
"--version",
|
|
64
|
-
"-V",
|
|
65
|
-
callback=version_callback,
|
|
66
|
-
is_eager=True,
|
|
67
|
-
help="Show version",
|
|
68
|
-
),
|
|
69
|
-
] = False,
|
|
70
|
-
) -> None:
|
|
71
|
-
"""Generate llms.txt and llms-full.txt from built HTML documentation."""
|
|
72
|
-
# Resolve output directory
|
|
73
|
-
out_dir = output_dir or site_dir
|
|
74
|
-
|
|
75
|
-
# quiet overrides verbose
|
|
76
|
-
if quiet:
|
|
77
|
-
verbose = False
|
|
78
|
-
|
|
79
|
-
def log(msg: str, color: str = "green", err: bool = False) -> None:
|
|
80
|
-
if not quiet:
|
|
81
|
-
typer.secho(msg, fg=color, err=err)
|
|
82
|
-
|
|
83
|
-
# Validate inputs
|
|
84
|
-
if not config.exists():
|
|
85
|
-
typer.secho(f"Error: Config file not found: {config}", fg="red", err=True)
|
|
86
|
-
raise typer.Exit(1)
|
|
87
|
-
|
|
88
|
-
if not site_dir.exists():
|
|
89
|
-
typer.secho(f"Error: Site directory not found: {site_dir}", fg="red", err=True)
|
|
90
|
-
typer.secho(
|
|
91
|
-
"Hint: Run 'mkdocs build' first to generate the HTML documentation.",
|
|
92
|
-
fg="yellow",
|
|
93
|
-
err=True,
|
|
94
|
-
)
|
|
95
|
-
raise typer.Exit(1)
|
|
96
|
-
|
|
97
|
-
# Load config
|
|
98
|
-
try:
|
|
99
|
-
cfg = load_config(config)
|
|
100
|
-
except Exception as e:
|
|
101
|
-
typer.secho(f"Error loading config: {e}", fg="red", err=True)
|
|
102
|
-
raise typer.Exit(1) from None
|
|
103
|
-
|
|
104
|
-
# Validate sections
|
|
105
|
-
if not cfg.sections:
|
|
106
|
-
typer.secho("Error: No sections configured.", fg="red", err=True)
|
|
107
|
-
typer.secho(
|
|
108
|
-
"Add a 'nav' to your mkdocs.yml, or configure 'sections' "
|
|
109
|
-
"in the llmstxt plugin.",
|
|
110
|
-
fg="yellow",
|
|
111
|
-
err=True,
|
|
112
|
-
)
|
|
113
|
-
raise typer.Exit(1)
|
|
114
|
-
|
|
115
|
-
if verbose:
|
|
116
|
-
typer.echo(f"Site: {cfg.site_name}")
|
|
117
|
-
typer.echo(f"Sections: {list(cfg.sections.keys())}")
|
|
118
|
-
if dry_run:
|
|
119
|
-
typer.echo("Dry run - no files will be written")
|
|
120
|
-
|
|
121
|
-
# Generate content
|
|
122
|
-
build = build_llms_output(
|
|
123
|
-
config=cfg,
|
|
124
|
-
site_dir=site_dir,
|
|
125
|
-
)
|
|
126
|
-
try:
|
|
127
|
-
markdown_files = write_markdown_files(
|
|
128
|
-
build.pages,
|
|
129
|
-
output_dir=out_dir,
|
|
130
|
-
use_directory_urls=cfg.use_directory_urls,
|
|
131
|
-
dry_run=dry_run,
|
|
132
|
-
)
|
|
133
|
-
except (OSError, ValueError) as exc:
|
|
134
|
-
typer.secho(f"Error writing markdown files: {exc}", fg="red", err=True)
|
|
135
|
-
raise typer.Exit(1) from None
|
|
136
|
-
|
|
137
|
-
# Define output paths
|
|
138
|
-
llms_path = out_dir / "llms.txt"
|
|
139
|
-
full_path = out_dir / cfg.full_output
|
|
140
|
-
|
|
141
|
-
# Write output files (skip in dry-run mode)
|
|
142
|
-
if dry_run:
|
|
143
|
-
action = "Would generate"
|
|
144
|
-
color = "yellow"
|
|
145
|
-
else:
|
|
146
|
-
action = "Generated"
|
|
147
|
-
color = "green"
|
|
148
|
-
try:
|
|
149
|
-
out_dir.mkdir(parents=True, exist_ok=True)
|
|
150
|
-
llms_path.write_text(build.llms_txt, encoding="utf-8")
|
|
151
|
-
full_path.write_text(build.llms_full_txt, encoding="utf-8")
|
|
152
|
-
except OSError as exc:
|
|
153
|
-
typer.secho(f"Error writing output files: {exc}", fg="red", err=True)
|
|
154
|
-
raise typer.Exit(1) from None
|
|
155
|
-
|
|
156
|
-
log(f"{action} {llms_path} ({len(build.llms_txt):,} bytes)", color)
|
|
157
|
-
log(f"{action} {full_path} ({len(build.llms_full_txt):,} bytes)", color)
|
|
158
|
-
log(f"{action} {len(markdown_files)} markdown files", color)
|
|
159
|
-
|
|
160
|
-
if verbose and build.skipped:
|
|
161
|
-
log("Skipped files:", color="yellow", err=True)
|
|
162
|
-
for path, reason in build.skipped:
|
|
163
|
-
log(f"- {path} ({reason})", color="yellow", err=True)
|
|
164
|
-
|
|
165
|
-
if build.warnings:
|
|
166
|
-
log("Warnings:", color="yellow", err=True)
|
|
167
|
-
for warning in build.warnings:
|
|
168
|
-
log(f"- {warning}", color="yellow", err=True)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
if __name__ == "__main__":
|
|
172
|
-
app()
|
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
"""Configuration loading from mkdocs.yml."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from typing import Any
|
|
7
|
-
|
|
8
|
-
import yaml
|
|
9
|
-
|
|
10
|
-
from llmstxt_standalone.config.derive import nav_to_sections
|
|
11
|
-
from llmstxt_standalone.config.model import Config
|
|
12
|
-
from llmstxt_standalone.config.plugin import get_llmstxt_config
|
|
13
|
-
|
|
14
|
-
DEFAULT_SITE_NAME = "Documentation"
|
|
15
|
-
DEFAULT_FULL_OUTPUT = "llms-full.txt"
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class _PermissiveLoader(yaml.SafeLoader):
|
|
19
|
-
"""SafeLoader that ignores unknown Python tags.
|
|
20
|
-
|
|
21
|
-
MkDocs extensions like pymdownx.slugs use Python-specific YAML tags
|
|
22
|
-
like !python/object/apply which SafeLoader rejects. This loader
|
|
23
|
-
treats them as raw strings to allow parsing the rest of the config.
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def _ignore_unknown(loader: yaml.Loader, tag_suffix: str, node: yaml.Node) -> str:
|
|
28
|
-
"""Return the raw tag as a placeholder string."""
|
|
29
|
-
return f"<{node.tag}>"
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
# Register handler for all Python tags (both full and shorthand forms)
|
|
33
|
-
_PermissiveLoader.add_multi_constructor("tag:yaml.org,2002:python/", _ignore_unknown)
|
|
34
|
-
_PermissiveLoader.add_multi_constructor("!python/", _ignore_unknown)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def load_config(config_path: Path) -> Config:
|
|
38
|
-
"""Load and resolve configuration from mkdocs.yml.
|
|
39
|
-
|
|
40
|
-
Args:
|
|
41
|
-
config_path: Path to mkdocs.yml file.
|
|
42
|
-
|
|
43
|
-
Returns:
|
|
44
|
-
Resolved Config object.
|
|
45
|
-
|
|
46
|
-
Raises:
|
|
47
|
-
FileNotFoundError: If config file doesn't exist.
|
|
48
|
-
"""
|
|
49
|
-
if not config_path.exists():
|
|
50
|
-
raise FileNotFoundError(f"Config file not found: {config_path}")
|
|
51
|
-
|
|
52
|
-
with open(config_path, encoding="utf-8") as f:
|
|
53
|
-
raw = yaml.load(f, Loader=_PermissiveLoader)
|
|
54
|
-
|
|
55
|
-
if not isinstance(raw, dict):
|
|
56
|
-
raise ValueError(f"Config file must be a mapping: {config_path}")
|
|
57
|
-
|
|
58
|
-
return _config_from_mkdocs(raw)
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
def _config_from_mkdocs(raw: dict[str, Any]) -> Config:
|
|
62
|
-
"""Build a Config from a parsed mkdocs.yml mapping."""
|
|
63
|
-
site_name = raw.get("site_name", DEFAULT_SITE_NAME)
|
|
64
|
-
site_description = raw.get("site_description", "")
|
|
65
|
-
site_url = raw.get("site_url", "").rstrip("/")
|
|
66
|
-
nav = raw.get("nav", [])
|
|
67
|
-
# MkDocs defaults use_directory_urls to true
|
|
68
|
-
use_directory_urls = raw.get("use_directory_urls", True)
|
|
69
|
-
|
|
70
|
-
llmstxt_config = get_llmstxt_config(raw)
|
|
71
|
-
|
|
72
|
-
if llmstxt_config is not None:
|
|
73
|
-
markdown_description = llmstxt_config.get("markdown_description", "")
|
|
74
|
-
full_output = llmstxt_config.get("full_output", DEFAULT_FULL_OUTPUT)
|
|
75
|
-
content_selector = llmstxt_config.get("content_selector")
|
|
76
|
-
sections = llmstxt_config.get("sections", {})
|
|
77
|
-
else:
|
|
78
|
-
markdown_description = ""
|
|
79
|
-
full_output = DEFAULT_FULL_OUTPUT
|
|
80
|
-
content_selector = None
|
|
81
|
-
sections = nav_to_sections(nav)
|
|
82
|
-
|
|
83
|
-
return Config(
|
|
84
|
-
site_name=site_name,
|
|
85
|
-
site_description=site_description,
|
|
86
|
-
site_url=site_url,
|
|
87
|
-
markdown_description=markdown_description,
|
|
88
|
-
full_output=full_output,
|
|
89
|
-
content_selector=content_selector,
|
|
90
|
-
sections=sections,
|
|
91
|
-
nav=nav,
|
|
92
|
-
use_directory_urls=use_directory_urls,
|
|
93
|
-
)
|
|
File without changes
|
|
File without changes
|
{llmstxt_standalone-0.1.0 → llmstxt_standalone-0.2.0}/src/llmstxt_standalone/config/__init__.py
RENAMED
|
File without changes
|
{llmstxt_standalone-0.1.0 → llmstxt_standalone-0.2.0}/src/llmstxt_standalone/config/plugin.py
RENAMED
|
File without changes
|
|
File without changes
|