llmstxt-standalone 0.1.1__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: llmstxt-standalone
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: Generate llms.txt from built HTML documentation
5
5
  Keywords: llms,documentation,markdown,mkdocs
6
6
  Author: Shaan Majid
@@ -20,10 +20,12 @@ Classifier: Topic :: Documentation
20
20
  Classifier: Typing :: Typed
21
21
  Requires-Dist: typer>=0.9.0
22
22
  Requires-Dist: pyyaml>=6.0
23
+ Requires-Dist: ruamel-yaml>=0.18
23
24
  Requires-Dist: beautifulsoup4>=4.12
24
25
  Requires-Dist: markdownify>=0.14,<2.0
25
26
  Requires-Dist: mdformat>=0.7,<2.0
26
27
  Requires-Dist: mdformat-tables>=1.0
28
+ Requires-Dist: pydantic>=2.12.5
27
29
  Requires-Python: >=3.10
28
30
  Project-URL: Repository, https://github.com/shaanmajid/llmstxt-standalone
29
31
  Project-URL: Issues, https://github.com/shaanmajid/llmstxt-standalone/issues
@@ -59,21 +61,25 @@ uv add llmstxt-standalone # or: pip install
59
61
 
60
62
  ## Usage
61
63
 
64
+ ### build
65
+
66
+ Generate llms.txt from a built MkDocs site:
67
+
62
68
  ```bash
63
69
  # Run from project root (expects mkdocs.yml and site/)
64
- llmstxt-standalone
70
+ llmstxt-standalone build
65
71
 
66
72
  # Explicit paths
67
- llmstxt-standalone --config mkdocs.yml --site-dir ./build --output-dir ./dist
73
+ llmstxt-standalone build --config mkdocs.yml --site-dir ./build --output-dir ./dist
68
74
 
69
75
  # Preview without writing files
70
- llmstxt-standalone --dry-run
76
+ llmstxt-standalone build --dry-run
71
77
 
72
78
  # Suppress output
73
- llmstxt-standalone --quiet
79
+ llmstxt-standalone build --quiet
74
80
 
75
81
  # Show detailed progress
76
- llmstxt-standalone --verbose
82
+ llmstxt-standalone build --verbose
77
83
  ```
78
84
 
79
85
  | Option | Short | Default | Description |
@@ -84,11 +90,65 @@ llmstxt-standalone --verbose
84
90
  | `--dry-run` | `-n` | | Preview without writing |
85
91
  | `--quiet` | `-q` | | Suppress output |
86
92
  | `--verbose` | `-v` | | Show detailed progress |
87
- | `--version` | `-V` | | Show version |
93
+
94
+ ### init
95
+
96
+ Add llmstxt plugin configuration to an existing mkdocs.yml:
97
+
98
+ ```bash
99
+ llmstxt-standalone init
100
+
101
+ # Specify config path
102
+ llmstxt-standalone init --config path/to/mkdocs.yml
103
+
104
+ # Overwrite existing llmstxt config
105
+ llmstxt-standalone init --force
106
+
107
+ # Show detailed progress
108
+ llmstxt-standalone init --verbose
109
+ ```
110
+
111
+ | Option | Short | Description |
112
+ |--------|-------|-------------|
113
+ | `--config` | `-c` | Path to mkdocs.yml (default: mkdocs.yml) |
114
+ | `--force` | `-f` | Overwrite existing llmstxt section |
115
+ | `--quiet` | `-q` | Suppress output |
116
+ | `--verbose` | `-v` | Show detailed progress |
117
+
118
+ ### validate
119
+
120
+ Check that a config file is valid:
121
+
122
+ ```bash
123
+ $ llmstxt-standalone validate
124
+ Config valid: mkdocs.yml
125
+ Site: My Project
126
+ Sections: 3
127
+ Pages: 12
128
+
129
+ # Exit code only (for scripts)
130
+ llmstxt-standalone validate --quiet
131
+
132
+ # Show section details
133
+ llmstxt-standalone validate --verbose
134
+ ```
135
+
136
+ | Option | Short | Description |
137
+ |--------|-------|-------------|
138
+ | `--config` | `-c` | Path to mkdocs.yml (default: mkdocs.yml) |
139
+ | `--quiet` | `-q` | Suppress output |
140
+ | `--verbose` | `-v` | Show detailed config information |
141
+
142
+ ### Global options
143
+
144
+ ```bash
145
+ llmstxt-standalone --version # Show version
146
+ llmstxt-standalone --help # Show available commands
147
+ ```
88
148
 
89
149
  ## Output
90
150
 
91
- The tool generates three outputs:
151
+ The `build` command generates three outputs:
92
152
 
93
153
  1. `llms.txt` — an index file with markdown links to all pages
94
154
  1. `llms-full.txt` — concatenated content of all pages
@@ -28,21 +28,25 @@ uv add llmstxt-standalone # or: pip install
28
28
 
29
29
  ## Usage
30
30
 
31
+ ### build
32
+
33
+ Generate llms.txt from a built MkDocs site:
34
+
31
35
  ```bash
32
36
  # Run from project root (expects mkdocs.yml and site/)
33
- llmstxt-standalone
37
+ llmstxt-standalone build
34
38
 
35
39
  # Explicit paths
36
- llmstxt-standalone --config mkdocs.yml --site-dir ./build --output-dir ./dist
40
+ llmstxt-standalone build --config mkdocs.yml --site-dir ./build --output-dir ./dist
37
41
 
38
42
  # Preview without writing files
39
- llmstxt-standalone --dry-run
43
+ llmstxt-standalone build --dry-run
40
44
 
41
45
  # Suppress output
42
- llmstxt-standalone --quiet
46
+ llmstxt-standalone build --quiet
43
47
 
44
48
  # Show detailed progress
45
- llmstxt-standalone --verbose
49
+ llmstxt-standalone build --verbose
46
50
  ```
47
51
 
48
52
  | Option | Short | Default | Description |
@@ -53,11 +57,65 @@ llmstxt-standalone --verbose
53
57
  | `--dry-run` | `-n` | | Preview without writing |
54
58
  | `--quiet` | `-q` | | Suppress output |
55
59
  | `--verbose` | `-v` | | Show detailed progress |
56
- | `--version` | `-V` | | Show version |
60
+
61
+ ### init
62
+
63
+ Add llmstxt plugin configuration to an existing mkdocs.yml:
64
+
65
+ ```bash
66
+ llmstxt-standalone init
67
+
68
+ # Specify config path
69
+ llmstxt-standalone init --config path/to/mkdocs.yml
70
+
71
+ # Overwrite existing llmstxt config
72
+ llmstxt-standalone init --force
73
+
74
+ # Show detailed progress
75
+ llmstxt-standalone init --verbose
76
+ ```
77
+
78
+ | Option | Short | Description |
79
+ |--------|-------|-------------|
80
+ | `--config` | `-c` | Path to mkdocs.yml (default: mkdocs.yml) |
81
+ | `--force` | `-f` | Overwrite existing llmstxt section |
82
+ | `--quiet` | `-q` | Suppress output |
83
+ | `--verbose` | `-v` | Show detailed progress |
84
+
85
+ ### validate
86
+
87
+ Check that a config file is valid:
88
+
89
+ ```bash
90
+ $ llmstxt-standalone validate
91
+ Config valid: mkdocs.yml
92
+ Site: My Project
93
+ Sections: 3
94
+ Pages: 12
95
+
96
+ # Exit code only (for scripts)
97
+ llmstxt-standalone validate --quiet
98
+
99
+ # Show section details
100
+ llmstxt-standalone validate --verbose
101
+ ```
102
+
103
+ | Option | Short | Description |
104
+ |--------|-------|-------------|
105
+ | `--config` | `-c` | Path to mkdocs.yml (default: mkdocs.yml) |
106
+ | `--quiet` | `-q` | Suppress output |
107
+ | `--verbose` | `-v` | Show detailed config information |
108
+
109
+ ### Global options
110
+
111
+ ```bash
112
+ llmstxt-standalone --version # Show version
113
+ llmstxt-standalone --help # Show available commands
114
+ ```
57
115
 
58
116
  ## Output
59
117
 
60
- The tool generates three outputs:
118
+ The `build` command generates three outputs:
61
119
 
62
120
  1. `llms.txt` — an index file with markdown links to all pages
63
121
  1. `llms-full.txt` — concatenated content of all pages
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "llmstxt-standalone"
3
- version = "0.1.1"
3
+ version = "0.2.0"
4
4
  description = "Generate llms.txt from built HTML documentation"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -26,10 +26,12 @@ authors = [{ name = "Shaan Majid", email = "shaanmajid64@gmail.com" }]
26
26
  dependencies = [
27
27
  "typer>=0.9.0",
28
28
  "pyyaml>=6.0",
29
+ "ruamel.yaml>=0.18",
29
30
  "beautifulsoup4>=4.12",
30
31
  "markdownify>=0.14,<2.0",
31
32
  "mdformat>=0.7,<2.0",
32
33
  "mdformat-tables>=1.0",
34
+ "pydantic>=2.12.5",
33
35
  ]
34
36
 
35
37
  [project.scripts]
@@ -0,0 +1,422 @@
1
+ """Command-line interface."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Callable
6
+ from pathlib import Path
7
+ from typing import Annotated
8
+
9
+ import typer
10
+ import yaml
11
+ from ruamel.yaml import YAML
12
+ from ruamel.yaml import YAMLError as RuamelYAMLError
13
+
14
+ from llmstxt_standalone import __version__
15
+ from llmstxt_standalone.config import load_config
16
+ from llmstxt_standalone.generate import (
17
+ build_llms_output,
18
+ ensure_safe_md_path,
19
+ write_markdown_files,
20
+ )
21
+
22
+
23
+ def _make_logger(
24
+ quiet: bool, verbose: bool = False
25
+ ) -> tuple[Callable[..., None], Callable[..., None]]:
26
+ """Create log and log_verbose functions for CLI output.
27
+
28
+ Args:
29
+ quiet: If True, suppress all output.
30
+ verbose: If True, enable verbose logging (quiet overrides this).
31
+
32
+ Returns:
33
+ Tuple of (log, log_verbose) functions.
34
+ """
35
+ effective_verbose = verbose and not quiet
36
+
37
+ def log(msg: str, color: str = "green", err: bool = False) -> None:
38
+ if not quiet:
39
+ typer.secho(msg, fg=color, err=err)
40
+
41
+ def log_verbose(msg: str, color: str = "green", err: bool = False) -> None:
42
+ if effective_verbose:
43
+ typer.secho(msg, fg=color, err=err)
44
+
45
+ return log, log_verbose
46
+
47
+
48
+ def version_callback(value: bool) -> None:
49
+ """Print version and exit if --version flag is set."""
50
+ if value:
51
+ typer.echo(f"llmstxt-standalone {__version__}")
52
+ raise typer.Exit()
53
+
54
+
55
+ app = typer.Typer(
56
+ help="Generate llms.txt from built HTML documentation.",
57
+ no_args_is_help=True,
58
+ context_settings={"help_option_names": ["-h", "--help"]},
59
+ )
60
+
61
+
62
+ @app.callback(invoke_without_command=True)
63
+ def main(
64
+ version: Annotated[
65
+ bool,
66
+ typer.Option(
67
+ "--version",
68
+ "-V",
69
+ callback=version_callback,
70
+ is_eager=True,
71
+ help="Show version and exit",
72
+ ),
73
+ ] = False,
74
+ ) -> None:
75
+ """Generate llms.txt from built HTML documentation."""
76
+
77
+
78
+ @app.command()
79
+ def build(
80
+ config: Annotated[
81
+ Path,
82
+ typer.Option("--config", "-c", help="Path to mkdocs.yml config file"),
83
+ ] = Path("mkdocs.yml"),
84
+ site_dir: Annotated[
85
+ Path,
86
+ typer.Option("--site-dir", "-s", help="Path to built HTML site directory"),
87
+ ] = Path("site"),
88
+ output_dir: Annotated[
89
+ Path | None,
90
+ typer.Option(
91
+ "--output-dir", "-o", help="Output directory (defaults to site-dir)"
92
+ ),
93
+ ] = None,
94
+ dry_run: Annotated[
95
+ bool,
96
+ typer.Option(
97
+ "--dry-run",
98
+ "-n",
99
+ help="Preview what would be generated without writing files",
100
+ ),
101
+ ] = False,
102
+ quiet: Annotated[
103
+ bool,
104
+ typer.Option("--quiet", "-q", help="Suppress output (exit code only)"),
105
+ ] = False,
106
+ verbose: Annotated[
107
+ bool,
108
+ typer.Option("--verbose", "-v", help="Show detailed progress"),
109
+ ] = False,
110
+ ) -> None:
111
+ """Generate llms.txt and llms-full.txt from built MkDocs site."""
112
+ # Resolve output directory
113
+ out_dir = output_dir or site_dir
114
+ log, log_verbose = _make_logger(quiet, verbose)
115
+
116
+ # Validate inputs
117
+ if not config.exists():
118
+ log(f"Error: Config file not found: {config}", color="red", err=True)
119
+ raise typer.Exit(1)
120
+
121
+ if not site_dir.exists():
122
+ log(f"Error: Site directory not found: {site_dir}", color="red", err=True)
123
+ log(
124
+ "Hint: Run 'mkdocs build' first to generate the HTML documentation.",
125
+ color="yellow",
126
+ err=True,
127
+ )
128
+ raise typer.Exit(1)
129
+
130
+ # Load config
131
+ try:
132
+ cfg = load_config(config)
133
+ except (FileNotFoundError, ValueError, yaml.YAMLError) as e:
134
+ log(f"Error loading config: {e}", color="red", err=True)
135
+ raise typer.Exit(1) from None
136
+
137
+ # Validate sections
138
+ if not cfg.sections:
139
+ log("Error: No sections configured.", color="red", err=True)
140
+ log(
141
+ "Add a 'nav' to your mkdocs.yml, or configure 'sections' "
142
+ "in the llmstxt plugin.",
143
+ color="yellow",
144
+ err=True,
145
+ )
146
+ raise typer.Exit(1)
147
+
148
+ log_verbose(f"Site: {cfg.site_name}")
149
+ log_verbose(f"Sections: {list(cfg.sections.keys())}")
150
+ if dry_run:
151
+ log_verbose("Dry run - no files will be written")
152
+
153
+ # Generate content
154
+ llms_build = build_llms_output(
155
+ config=cfg,
156
+ site_dir=site_dir,
157
+ )
158
+ try:
159
+ markdown_files = write_markdown_files(
160
+ llms_build.pages,
161
+ output_dir=out_dir,
162
+ use_directory_urls=cfg.use_directory_urls,
163
+ dry_run=dry_run,
164
+ )
165
+ except (OSError, ValueError) as exc:
166
+ log(f"Error writing markdown files: {exc}", color="red", err=True)
167
+ raise typer.Exit(1) from None
168
+
169
+ # Define output paths
170
+ llms_path = out_dir / "llms.txt"
171
+ try:
172
+ full_output_path = ensure_safe_md_path(cfg.full_output)
173
+ except ValueError:
174
+ log(
175
+ "Error: Invalid full_output: must be a relative path without '..'",
176
+ color="red",
177
+ err=True,
178
+ )
179
+ raise typer.Exit(1) from None
180
+ full_path = out_dir / full_output_path
181
+
182
+ # Write output files (skip in dry-run mode)
183
+ if dry_run:
184
+ action = "Would generate"
185
+ color = "yellow"
186
+ else:
187
+ action = "Generated"
188
+ color = "green"
189
+ try:
190
+ out_dir.mkdir(parents=True, exist_ok=True)
191
+ llms_path.write_text(llms_build.llms_txt, encoding="utf-8")
192
+ full_path.write_text(llms_build.llms_full_txt, encoding="utf-8")
193
+ except OSError as exc:
194
+ log(f"Error writing output files: {exc}", color="red", err=True)
195
+ raise typer.Exit(1) from None
196
+
197
+ log(f"{action} {llms_path} ({len(llms_build.llms_txt):,} bytes)", color)
198
+ log(f"{action} {full_path} ({len(llms_build.llms_full_txt):,} bytes)", color)
199
+ log(f"{action} {len(markdown_files)} markdown files", color)
200
+
201
+ if llms_build.skipped:
202
+ log_verbose("Skipped files:", color="yellow", err=True)
203
+ for path, reason in llms_build.skipped:
204
+ log_verbose(f"- {path} ({reason})", color="yellow", err=True)
205
+
206
+ if llms_build.warnings:
207
+ log("Warnings:", color="yellow", err=True)
208
+ for warning in llms_build.warnings:
209
+ log(f"- {warning}", color="yellow", err=True)
210
+
211
+
212
+ @app.command()
213
+ def init(
214
+ config: Annotated[
215
+ Path,
216
+ typer.Option("--config", "-c", help="Path to mkdocs.yml config file"),
217
+ ] = Path("mkdocs.yml"),
218
+ force: Annotated[
219
+ bool,
220
+ typer.Option("--force", "-f", help="Overwrite existing llmstxt section"),
221
+ ] = False,
222
+ quiet: Annotated[
223
+ bool,
224
+ typer.Option("--quiet", "-q", help="Suppress output (exit code only)"),
225
+ ] = False,
226
+ verbose: Annotated[
227
+ bool,
228
+ typer.Option("--verbose", "-v", help="Show detailed progress"),
229
+ ] = False,
230
+ ) -> None:
231
+ """Add llmstxt plugin config to mkdocs.yml."""
232
+ log, log_verbose = _make_logger(quiet, verbose)
233
+
234
+ if not config.exists():
235
+ log(f"Error: Config file not found: {config}", color="red", err=True)
236
+ log(
237
+ "Create one first or specify path with --config.",
238
+ color="yellow",
239
+ err=True,
240
+ )
241
+ raise typer.Exit(1)
242
+
243
+ yaml_parser = YAML()
244
+ yaml_parser.preserve_quotes = True
245
+
246
+ try:
247
+ with open(config, encoding="utf-8") as f:
248
+ data = yaml_parser.load(f)
249
+ except RuamelYAMLError as e:
250
+ log(f"Error: Invalid YAML: {e}", color="red", err=True)
251
+ raise typer.Exit(1) from None
252
+
253
+ if data is None:
254
+ data = {}
255
+
256
+ # Check for existing llmstxt plugin
257
+ plugins = data.get("plugins", [])
258
+ if plugins is None:
259
+ plugins = []
260
+ if not isinstance(plugins, (list, dict)):
261
+ log(
262
+ "Error: 'plugins' must be a list or mapping in mkdocs.yml.",
263
+ color="red",
264
+ err=True,
265
+ )
266
+ raise typer.Exit(1)
267
+ data["plugins"] = plugins
268
+
269
+ if isinstance(plugins, list):
270
+ has_llmstxt = any(
271
+ p == "llmstxt" or (isinstance(p, dict) and "llmstxt" in p) for p in plugins
272
+ )
273
+ elif isinstance(plugins, dict):
274
+ has_llmstxt = "llmstxt" in plugins
275
+ else:
276
+ has_llmstxt = False
277
+
278
+ if has_llmstxt and not force:
279
+ log("Error: llmstxt plugin already configured.", color="red", err=True)
280
+ log(
281
+ "Use --force to overwrite existing configuration.",
282
+ color="yellow",
283
+ err=True,
284
+ )
285
+ raise typer.Exit(1)
286
+
287
+ # Remove existing llmstxt if force is set
288
+ if has_llmstxt and force:
289
+ if isinstance(plugins, list):
290
+ plugins = [
291
+ p
292
+ for p in plugins
293
+ if p != "llmstxt" and not (isinstance(p, dict) and "llmstxt" in p)
294
+ ]
295
+ data["plugins"] = plugins
296
+ elif isinstance(plugins, dict):
297
+ del plugins["llmstxt"]
298
+
299
+ # Create the llmstxt plugin entry with commented example
300
+ llmstxt_entry = {
301
+ "llmstxt": {
302
+ # We'll add comments after writing
303
+ }
304
+ }
305
+
306
+ if isinstance(data["plugins"], list):
307
+ data["plugins"].append(llmstxt_entry)
308
+ else:
309
+ # Preserve dict-style plugins
310
+ data["plugins"]["llmstxt"] = {}
311
+
312
+ # Write the file
313
+ try:
314
+ with open(config, "w", encoding="utf-8") as f:
315
+ yaml_parser.dump(data, f)
316
+ except PermissionError:
317
+ log(f"Error: Permission denied writing to {config}", color="red", err=True)
318
+ raise typer.Exit(1) from None
319
+
320
+ # Now add comments using string manipulation since ruamel.yaml comment API is complex
321
+ content = config.read_text(encoding="utf-8")
322
+ ends_with_newline = content.endswith("\n")
323
+
324
+ # Find the llmstxt entry and add commented example below it
325
+ commented_example_lines = [
326
+ "# markdown_description: |",
327
+ "# Additional context for LLMs.",
328
+ "# sections:",
329
+ "# Getting Started:",
330
+ "# - index.md",
331
+ ]
332
+
333
+ def _comment_indent(line: str) -> int:
334
+ leading = len(line) - len(line.lstrip(" "))
335
+ if line.lstrip().startswith("- "):
336
+ return leading + 4
337
+ return leading + 2
338
+
339
+ def _format_commented_example(indent: int) -> list[str]:
340
+ prefix = " " * indent
341
+ return [f"{prefix}{line}" for line in commented_example_lines]
342
+
343
+ # Look for the llmstxt entry and add commented example below it
344
+ lines = content.splitlines()
345
+ new_lines: list[str] = []
346
+ inserted = False
347
+ for line in lines:
348
+ stripped = line.strip()
349
+ if not inserted and stripped == "llmstxt: {}":
350
+ indent = _comment_indent(line)
351
+ new_lines.append(line.replace("llmstxt: {}", "llmstxt:"))
352
+ new_lines.extend(_format_commented_example(indent))
353
+ inserted = True
354
+ continue
355
+ if not inserted and stripped == "llmstxt:":
356
+ indent = _comment_indent(line)
357
+ new_lines.append(line)
358
+ new_lines.extend(_format_commented_example(indent))
359
+ inserted = True
360
+ continue
361
+ new_lines.append(line)
362
+ content = "\n".join(new_lines)
363
+ if ends_with_newline:
364
+ content += "\n"
365
+
366
+ try:
367
+ config.write_text(content, encoding="utf-8")
368
+ except PermissionError:
369
+ log(f"Error: Permission denied writing to {config}", color="red", err=True)
370
+ raise typer.Exit(1) from None
371
+
372
+ log(f"Added llmstxt plugin to {config}")
373
+ log_verbose(
374
+ "Configuration includes commented example for sections and markdown_description"
375
+ )
376
+
377
+
378
+ @app.command()
379
+ def validate(
380
+ config: Annotated[
381
+ Path,
382
+ typer.Option("--config", "-c", help="Path to mkdocs.yml config file"),
383
+ ] = Path("mkdocs.yml"),
384
+ quiet: Annotated[
385
+ bool,
386
+ typer.Option("--quiet", "-q", help="Suppress output (exit code only)"),
387
+ ] = False,
388
+ verbose: Annotated[
389
+ bool,
390
+ typer.Option("--verbose", "-v", help="Show detailed config information"),
391
+ ] = False,
392
+ ) -> None:
393
+ """Check config file validity."""
394
+ log, log_verbose = _make_logger(quiet, verbose)
395
+
396
+ try:
397
+ cfg = load_config(config)
398
+ except FileNotFoundError:
399
+ log(f"Config invalid: {config}", color="red", err=True)
400
+ log(f" Error: File not found: {config}", color="red", err=True)
401
+ raise typer.Exit(1) from None
402
+ except (ValueError, yaml.YAMLError) as e:
403
+ log(f"Config invalid: {config}", color="red", err=True)
404
+ log(f" Error: {e}", color="red", err=True)
405
+ raise typer.Exit(1) from None
406
+
407
+ total_pages = sum(len(pages) for pages in cfg.sections.values())
408
+
409
+ log(f"Config valid: {config}")
410
+ log(f" Site: {cfg.site_name}")
411
+ log(f" Sections: {len(cfg.sections)}")
412
+ log(f" Pages: {total_pages}")
413
+
414
+ # Verbose: show section details
415
+ for section_name, pages in cfg.sections.items():
416
+ log_verbose(f" {section_name}: {len(pages)} pages")
417
+ for page in pages:
418
+ log_verbose(f" - {page}")
419
+
420
+
421
+ if __name__ == "__main__":
422
+ app()
@@ -0,0 +1,172 @@
1
+ """Configuration loading from mkdocs.yml."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ import yaml
9
+ from pydantic import BaseModel, Field, ValidationError, field_validator
10
+
11
+ from llmstxt_standalone.config.derive import nav_to_sections
12
+ from llmstxt_standalone.config.model import Config
13
+ from llmstxt_standalone.config.plugin import get_llmstxt_config
14
+
15
+ DEFAULT_SITE_NAME = "Documentation"
16
+ DEFAULT_FULL_OUTPUT = "llms-full.txt"
17
+
18
+
19
+ class _PermissiveLoader(yaml.SafeLoader):
20
+ """SafeLoader that ignores unknown Python tags.
21
+
22
+ MkDocs extensions like pymdownx.slugs use Python-specific YAML tags
23
+ like !python/object/apply which SafeLoader rejects. This loader
24
+ treats them as raw strings to allow parsing the rest of the config.
25
+ """
26
+
27
+
28
+ def _ignore_unknown(loader: yaml.Loader, tag_suffix: str, node: yaml.Node) -> str:
29
+ """Return the raw tag as a placeholder string."""
30
+ return f"<{node.tag}>"
31
+
32
+
33
+ # Register handler for all Python tags (both full and shorthand forms)
34
+ _PermissiveLoader.add_multi_constructor("tag:yaml.org,2002:python/", _ignore_unknown)
35
+ _PermissiveLoader.add_multi_constructor("!python/", _ignore_unknown)
36
+
37
+
38
+ class LlmstxtPluginConfig(BaseModel):
39
+ """Pydantic model for llmstxt plugin configuration."""
40
+
41
+ markdown_description: str = ""
42
+ full_output: str = DEFAULT_FULL_OUTPUT
43
+ content_selector: str | None = None
44
+ sections: dict[str, list[str]] = Field(default_factory=dict)
45
+
46
+ @field_validator("sections", mode="before")
47
+ @classmethod
48
+ def validate_sections(cls, v: Any) -> dict[str, list[str]]:
49
+ """Validate sections is a dict with string keys and list[str] values."""
50
+ if v is None:
51
+ return {}
52
+ if not isinstance(v, dict):
53
+ raise ValueError(f"'sections' must be a mapping, got {type(v).__name__}")
54
+ for section_name, pages in v.items():
55
+ if not isinstance(section_name, str):
56
+ raise ValueError(
57
+ f"'sections' keys must be strings, got {type(section_name).__name__}"
58
+ )
59
+ if not isinstance(pages, list):
60
+ raise ValueError(
61
+ f"'sections.{section_name}' must be a list of strings, "
62
+ f"got {type(pages).__name__}"
63
+ )
64
+ for page in pages:
65
+ if not isinstance(page, str):
66
+ raise ValueError(
67
+ f"'sections.{section_name}' entries must be strings, "
68
+ f"got {type(page).__name__}"
69
+ )
70
+ return v
71
+
72
+
73
+ class MkDocsConfig(BaseModel):
74
+ """Pydantic model for mkdocs.yml top-level fields we care about."""
75
+
76
+ site_name: str = DEFAULT_SITE_NAME
77
+ site_description: str = ""
78
+ site_url: str = ""
79
+ nav: list[Any] = Field(default_factory=list)
80
+ use_directory_urls: bool = True
81
+
82
+ @field_validator("site_name", mode="before")
83
+ @classmethod
84
+ def coerce_site_name(cls, v: Any) -> str:
85
+ """Coerce None to default."""
86
+ return v if v is not None else DEFAULT_SITE_NAME
87
+
88
+ @field_validator("site_description", "site_url", mode="before")
89
+ @classmethod
90
+ def coerce_str_fields(cls, v: Any) -> str:
91
+ """Coerce None to empty string."""
92
+ return v if v is not None else ""
93
+
94
+ @field_validator("nav", mode="before")
95
+ @classmethod
96
+ def coerce_nav(cls, v: Any) -> list[Any]:
97
+ """Coerce None to empty list."""
98
+ return v if v is not None else []
99
+
100
+ @field_validator("site_url", mode="after")
101
+ @classmethod
102
+ def strip_trailing_slash(cls, v: str) -> str:
103
+ """Remove trailing slash from site_url."""
104
+ return v.rstrip("/")
105
+
106
+
107
+ def load_config(config_path: Path) -> Config:
108
+ """Load and resolve configuration from mkdocs.yml.
109
+
110
+ Args:
111
+ config_path: Path to mkdocs.yml file.
112
+
113
+ Returns:
114
+ Resolved Config object.
115
+
116
+ Raises:
117
+ FileNotFoundError: If config file doesn't exist.
118
+ ValueError: If config is invalid.
119
+ """
120
+ if not config_path.exists():
121
+ raise FileNotFoundError(f"Config file not found: {config_path}")
122
+
123
+ try:
124
+ with open(config_path, encoding="utf-8") as f:
125
+ raw = yaml.load(f, Loader=_PermissiveLoader)
126
+ except RecursionError:
127
+ raise ValueError(
128
+ f"Config file has nav structure too deeply nested: {config_path}"
129
+ ) from None
130
+
131
+ if not isinstance(raw, dict):
132
+ raise ValueError(f"Config file must be a mapping: {config_path}")
133
+
134
+ return _config_from_mkdocs(raw)
135
+
136
+
137
+ def _config_from_mkdocs(raw: dict[str, Any]) -> Config:
138
+ """Build a Config from a parsed mkdocs.yml mapping."""
139
+ try:
140
+ mkdocs = MkDocsConfig.model_validate(raw)
141
+ except ValidationError as e:
142
+ raise ValueError(str(e)) from None
143
+
144
+ llmstxt_raw = get_llmstxt_config(raw)
145
+
146
+ if llmstxt_raw is not None:
147
+ try:
148
+ plugin = LlmstxtPluginConfig.model_validate(llmstxt_raw)
149
+ except ValidationError as e:
150
+ # Extract the core error message for cleaner output
151
+ raise ValueError(f"llmstxt {e.errors()[0]['msg']}") from None
152
+ sections = plugin.sections
153
+ markdown_description = plugin.markdown_description
154
+ full_output = plugin.full_output
155
+ content_selector = plugin.content_selector
156
+ else:
157
+ sections = nav_to_sections(mkdocs.nav)
158
+ markdown_description = ""
159
+ full_output = DEFAULT_FULL_OUTPUT
160
+ content_selector = None
161
+
162
+ return Config(
163
+ site_name=mkdocs.site_name,
164
+ site_description=mkdocs.site_description,
165
+ site_url=mkdocs.site_url,
166
+ markdown_description=markdown_description,
167
+ full_output=full_output,
168
+ content_selector=content_selector,
169
+ sections=sections,
170
+ nav=mkdocs.nav,
171
+ use_directory_urls=mkdocs.use_directory_urls,
172
+ )
@@ -2,12 +2,12 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from dataclasses import dataclass
6
5
  from typing import Any
7
6
 
7
+ from pydantic import BaseModel
8
8
 
9
- @dataclass
10
- class Config:
9
+
10
+ class Config(BaseModel):
11
11
  """Resolved configuration for llmstxt generation."""
12
12
 
13
13
  site_name: str
@@ -6,6 +6,11 @@ import mdformat
6
6
  from bs4 import BeautifulSoup, NavigableString, Tag
7
7
  from markdownify import ATX, MarkdownConverter
8
8
 
9
+ __all__ = [
10
+ "extract_title_from_html",
11
+ "html_to_markdown",
12
+ ]
13
+
9
14
 
10
15
  def _should_remove(tag: Tag) -> bool:
11
16
  """Check if a tag should be removed during autoclean."""
@@ -70,13 +75,14 @@ def _get_language(tag: Tag) -> str:
70
75
  return ""
71
76
 
72
77
 
73
- # Converter with mkdocs-llmstxt-compatible settings
74
- _converter = MarkdownConverter(
75
- bullets="-",
76
- code_language_callback=_get_language,
77
- escape_underscores=False,
78
- heading_style=ATX,
79
- )
78
+ def _make_converter() -> MarkdownConverter:
79
+ """Create a MarkdownConverter with mkdocs-llmstxt-compatible settings."""
80
+ return MarkdownConverter(
81
+ bullets="-",
82
+ code_language_callback=_get_language,
83
+ escape_underscores=False,
84
+ heading_style=ATX,
85
+ )
80
86
 
81
87
 
82
88
  def extract_title_from_html(html: str, site_name: str | None = None) -> str | None:
@@ -154,5 +160,6 @@ def html_to_markdown(html: str, content_selector: str | None = None) -> str:
154
160
  return ""
155
161
 
156
162
  _autoclean(content)
157
- md = _converter.convert_soup(content)
163
+ converter = _make_converter()
164
+ md = converter.convert_soup(content)
158
165
  return mdformat.text(md, options={"wrap": "no"}, extensions=("tables",))
@@ -8,6 +8,19 @@ from pathlib import Path
8
8
  from llmstxt_standalone.config import Config
9
9
  from llmstxt_standalone.convert import extract_title_from_html, html_to_markdown
10
10
 
11
+ __all__ = [
12
+ "BuildResult",
13
+ "GenerateResult",
14
+ "PageMarkdown",
15
+ "build_llms_output",
16
+ "ensure_safe_md_path",
17
+ "generate_llms_txt",
18
+ "md_path_to_html_path",
19
+ "md_path_to_output_md_path",
20
+ "md_path_to_page_url",
21
+ "write_markdown_files",
22
+ ]
23
+
11
24
 
12
25
  def _escape_markdown_link_text(text: str) -> str:
13
26
  r"""Escape characters that break markdown link syntax.
@@ -32,7 +45,18 @@ def _is_index_md(md_path: str) -> bool:
32
45
  return md_path == "index.md" or md_path.endswith("/index.md")
33
46
 
34
47
 
35
- def _ensure_safe_md_path(md_path: str) -> Path:
48
+ def ensure_safe_md_path(md_path: str) -> Path:
49
+ """Validate and convert a markdown path to a safe Path object.
50
+
51
+ Args:
52
+ md_path: Relative markdown file path (e.g., "install.md").
53
+
54
+ Returns:
55
+ Path object for the markdown file.
56
+
57
+ Raises:
58
+ ValueError: If path is absolute or contains '..'.
59
+ """
36
60
  path = Path(md_path)
37
61
  if path.is_absolute() or path.drive:
38
62
  raise ValueError(f"Markdown path must be relative: {md_path}")
@@ -63,7 +87,7 @@ def md_path_to_html_path(
63
87
  Path to the corresponding HTML file.
64
88
  """
65
89
  # Handle index.md at any level (root or nested like foo/bar/index.md)
66
- safe_md_path = _ensure_safe_md_path(md_path)
90
+ safe_md_path = ensure_safe_md_path(md_path)
67
91
  if _is_index_md(md_path):
68
92
  html_path = site_dir / safe_md_path.with_suffix(".html")
69
93
  return _ensure_within_dir(site_dir, html_path, "HTML path")
@@ -117,7 +141,7 @@ def md_path_to_output_md_path(
117
141
  Path where the markdown file should be written.
118
142
  """
119
143
  # Handle index.md at any level (root or nested like foo/bar/index.md)
120
- safe_md_path = _ensure_safe_md_path(md_path)
144
+ safe_md_path = ensure_safe_md_path(md_path)
121
145
  if _is_index_md(md_path):
122
146
  output_path = site_dir / safe_md_path
123
147
  return _ensure_within_dir(site_dir, output_path, "Output path")
@@ -149,7 +173,12 @@ class BuildResult:
149
173
 
150
174
  @dataclass
151
175
  class GenerateResult:
152
- """Result of llms.txt generation with files written."""
176
+ """Result of llms.txt generation with files written.
177
+
178
+ Used by generate_llms_txt() for programmatic use cases that want
179
+ file writing handled automatically. The CLI uses BuildResult +
180
+ write_markdown_files() for more control over the write step.
181
+ """
153
182
 
154
183
  llms_txt: str
155
184
  llms_full_txt: str
@@ -1,172 +0,0 @@
1
- """Command-line interface."""
2
-
3
- from __future__ import annotations
4
-
5
- from pathlib import Path
6
- from typing import Annotated
7
-
8
- import typer
9
-
10
- from llmstxt_standalone import __version__
11
- from llmstxt_standalone.config import load_config
12
- from llmstxt_standalone.generate import build_llms_output, write_markdown_files
13
-
14
- app = typer.Typer(
15
- help="Generate llms.txt from built HTML documentation.",
16
- no_args_is_help=False,
17
- context_settings={"help_option_names": ["-h", "--help"]},
18
- )
19
-
20
-
21
- def version_callback(value: bool) -> None:
22
- """Print version and exit if --version flag is set."""
23
- if value:
24
- typer.echo(f"llmstxt-standalone {__version__}")
25
- raise typer.Exit()
26
-
27
-
28
- @app.command()
29
- def main(
30
- config: Annotated[
31
- Path,
32
- typer.Option("--config", "-c", help="Path to mkdocs.yml config file"),
33
- ] = Path("mkdocs.yml"),
34
- site_dir: Annotated[
35
- Path,
36
- typer.Option("--site-dir", "-s", help="Path to built HTML site directory"),
37
- ] = Path("site"),
38
- output_dir: Annotated[
39
- Path | None,
40
- typer.Option(
41
- "--output-dir", "-o", help="Output directory (defaults to site-dir)"
42
- ),
43
- ] = None,
44
- dry_run: Annotated[
45
- bool,
46
- typer.Option(
47
- "--dry-run",
48
- "-n",
49
- help="Preview what would be generated without writing files",
50
- ),
51
- ] = False,
52
- quiet: Annotated[
53
- bool,
54
- typer.Option("--quiet", "-q", help="Suppress output (exit code only)"),
55
- ] = False,
56
- verbose: Annotated[
57
- bool,
58
- typer.Option("--verbose", "-v", help="Show detailed progress"),
59
- ] = False,
60
- version: Annotated[
61
- bool,
62
- typer.Option(
63
- "--version",
64
- "-V",
65
- callback=version_callback,
66
- is_eager=True,
67
- help="Show version",
68
- ),
69
- ] = False,
70
- ) -> None:
71
- """Generate llms.txt and llms-full.txt from built HTML documentation."""
72
- # Resolve output directory
73
- out_dir = output_dir or site_dir
74
-
75
- # quiet overrides verbose
76
- if quiet:
77
- verbose = False
78
-
79
- def log(msg: str, color: str = "green", err: bool = False) -> None:
80
- if not quiet:
81
- typer.secho(msg, fg=color, err=err)
82
-
83
- # Validate inputs
84
- if not config.exists():
85
- typer.secho(f"Error: Config file not found: {config}", fg="red", err=True)
86
- raise typer.Exit(1)
87
-
88
- if not site_dir.exists():
89
- typer.secho(f"Error: Site directory not found: {site_dir}", fg="red", err=True)
90
- typer.secho(
91
- "Hint: Run 'mkdocs build' first to generate the HTML documentation.",
92
- fg="yellow",
93
- err=True,
94
- )
95
- raise typer.Exit(1)
96
-
97
- # Load config
98
- try:
99
- cfg = load_config(config)
100
- except Exception as e:
101
- typer.secho(f"Error loading config: {e}", fg="red", err=True)
102
- raise typer.Exit(1) from None
103
-
104
- # Validate sections
105
- if not cfg.sections:
106
- typer.secho("Error: No sections configured.", fg="red", err=True)
107
- typer.secho(
108
- "Add a 'nav' to your mkdocs.yml, or configure 'sections' "
109
- "in the llmstxt plugin.",
110
- fg="yellow",
111
- err=True,
112
- )
113
- raise typer.Exit(1)
114
-
115
- if verbose:
116
- typer.echo(f"Site: {cfg.site_name}")
117
- typer.echo(f"Sections: {list(cfg.sections.keys())}")
118
- if dry_run:
119
- typer.echo("Dry run - no files will be written")
120
-
121
- # Generate content
122
- build = build_llms_output(
123
- config=cfg,
124
- site_dir=site_dir,
125
- )
126
- try:
127
- markdown_files = write_markdown_files(
128
- build.pages,
129
- output_dir=out_dir,
130
- use_directory_urls=cfg.use_directory_urls,
131
- dry_run=dry_run,
132
- )
133
- except (OSError, ValueError) as exc:
134
- typer.secho(f"Error writing markdown files: {exc}", fg="red", err=True)
135
- raise typer.Exit(1) from None
136
-
137
- # Define output paths
138
- llms_path = out_dir / "llms.txt"
139
- full_path = out_dir / cfg.full_output
140
-
141
- # Write output files (skip in dry-run mode)
142
- if dry_run:
143
- action = "Would generate"
144
- color = "yellow"
145
- else:
146
- action = "Generated"
147
- color = "green"
148
- try:
149
- out_dir.mkdir(parents=True, exist_ok=True)
150
- llms_path.write_text(build.llms_txt, encoding="utf-8")
151
- full_path.write_text(build.llms_full_txt, encoding="utf-8")
152
- except OSError as exc:
153
- typer.secho(f"Error writing output files: {exc}", fg="red", err=True)
154
- raise typer.Exit(1) from None
155
-
156
- log(f"{action} {llms_path} ({len(build.llms_txt):,} bytes)", color)
157
- log(f"{action} {full_path} ({len(build.llms_full_txt):,} bytes)", color)
158
- log(f"{action} {len(markdown_files)} markdown files", color)
159
-
160
- if verbose and build.skipped:
161
- log("Skipped files:", color="yellow", err=True)
162
- for path, reason in build.skipped:
163
- log(f"- {path} ({reason})", color="yellow", err=True)
164
-
165
- if build.warnings:
166
- log("Warnings:", color="yellow", err=True)
167
- for warning in build.warnings:
168
- log(f"- {warning}", color="yellow", err=True)
169
-
170
-
171
- if __name__ == "__main__":
172
- app()
@@ -1,114 +0,0 @@
1
- """Configuration loading from mkdocs.yml."""
2
-
3
- from __future__ import annotations
4
-
5
- from pathlib import Path
6
- from typing import Any
7
-
8
- import yaml
9
-
10
- from llmstxt_standalone.config.derive import nav_to_sections
11
- from llmstxt_standalone.config.model import Config
12
- from llmstxt_standalone.config.plugin import get_llmstxt_config
13
-
14
- DEFAULT_SITE_NAME = "Documentation"
15
- DEFAULT_FULL_OUTPUT = "llms-full.txt"
16
-
17
-
18
- class _PermissiveLoader(yaml.SafeLoader):
19
- """SafeLoader that ignores unknown Python tags.
20
-
21
- MkDocs extensions like pymdownx.slugs use Python-specific YAML tags
22
- like !python/object/apply which SafeLoader rejects. This loader
23
- treats them as raw strings to allow parsing the rest of the config.
24
- """
25
-
26
-
27
- def _ignore_unknown(loader: yaml.Loader, tag_suffix: str, node: yaml.Node) -> str:
28
- """Return the raw tag as a placeholder string."""
29
- return f"<{node.tag}>"
30
-
31
-
32
- # Register handler for all Python tags (both full and shorthand forms)
33
- _PermissiveLoader.add_multi_constructor("tag:yaml.org,2002:python/", _ignore_unknown)
34
- _PermissiveLoader.add_multi_constructor("!python/", _ignore_unknown)
35
-
36
-
37
- def load_config(config_path: Path) -> Config:
38
- """Load and resolve configuration from mkdocs.yml.
39
-
40
- Args:
41
- config_path: Path to mkdocs.yml file.
42
-
43
- Returns:
44
- Resolved Config object.
45
-
46
- Raises:
47
- FileNotFoundError: If config file doesn't exist.
48
- """
49
- if not config_path.exists():
50
- raise FileNotFoundError(f"Config file not found: {config_path}")
51
-
52
- with open(config_path, encoding="utf-8") as f:
53
- raw = yaml.load(f, Loader=_PermissiveLoader)
54
-
55
- if not isinstance(raw, dict):
56
- raise ValueError(f"Config file must be a mapping: {config_path}")
57
-
58
- return _config_from_mkdocs(raw)
59
-
60
-
61
- def _config_from_mkdocs(raw: dict[str, Any]) -> Config:
62
- """Build a Config from a parsed mkdocs.yml mapping."""
63
- site_name = raw.get("site_name", DEFAULT_SITE_NAME)
64
- site_description = raw.get("site_description", "")
65
- site_url = raw.get("site_url", "").rstrip("/")
66
- nav = raw.get("nav", [])
67
- # MkDocs defaults use_directory_urls to true
68
- use_directory_urls = raw.get("use_directory_urls", True)
69
-
70
- llmstxt_config = get_llmstxt_config(raw)
71
-
72
- if llmstxt_config is not None:
73
- markdown_description = llmstxt_config.get("markdown_description", "")
74
- full_output = llmstxt_config.get("full_output", DEFAULT_FULL_OUTPUT)
75
- content_selector = llmstxt_config.get("content_selector")
76
- sections = llmstxt_config.get("sections", {})
77
- if not isinstance(sections, dict):
78
- raise ValueError(
79
- f"llmstxt 'sections' must be a mapping, got {type(sections).__name__}"
80
- )
81
- for section_name, pages in sections.items():
82
- if not isinstance(section_name, str):
83
- raise ValueError(
84
- "llmstxt 'sections' keys must be strings, "
85
- f"got {type(section_name).__name__}"
86
- )
87
- if not isinstance(pages, list):
88
- raise ValueError(
89
- f"llmstxt 'sections.{section_name}' must be a list of strings, "
90
- f"got {type(pages).__name__}"
91
- )
92
- for page in pages:
93
- if not isinstance(page, str):
94
- raise ValueError(
95
- f"llmstxt 'sections.{section_name}' entries must be strings, "
96
- f"got {type(page).__name__}"
97
- )
98
- else:
99
- markdown_description = ""
100
- full_output = DEFAULT_FULL_OUTPUT
101
- content_selector = None
102
- sections = nav_to_sections(nav)
103
-
104
- return Config(
105
- site_name=site_name,
106
- site_description=site_description,
107
- site_url=site_url,
108
- markdown_description=markdown_description,
109
- full_output=full_output,
110
- content_selector=content_selector,
111
- sections=sections,
112
- nav=nav,
113
- use_directory_urls=use_directory_urls,
114
- )