llmstxt-standalone 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llmstxt_standalone/cli.py CHANGED
@@ -2,22 +2,49 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from collections.abc import Callable
5
6
  from pathlib import Path
6
7
  from typing import Annotated
7
8
 
8
9
  import typer
10
+ import yaml
11
+ from ruamel.yaml import YAML
12
+ from ruamel.yaml import YAMLError as RuamelYAMLError
9
13
 
10
14
  from llmstxt_standalone import __version__
11
15
  from llmstxt_standalone.config import load_config
12
- from llmstxt_standalone.generate import build_llms_output, write_markdown_files
13
-
14
- app = typer.Typer(
15
- help="Generate llms.txt from built HTML documentation.",
16
- no_args_is_help=False,
17
- context_settings={"help_option_names": ["-h", "--help"]},
16
+ from llmstxt_standalone.generate import (
17
+ build_llms_output,
18
+ ensure_safe_md_path,
19
+ write_markdown_files,
18
20
  )
19
21
 
20
22
 
23
+ def _make_logger(
24
+ quiet: bool, verbose: bool = False
25
+ ) -> tuple[Callable[..., None], Callable[..., None]]:
26
+ """Create log and log_verbose functions for CLI output.
27
+
28
+ Args:
29
+ quiet: If True, suppress all output.
30
+ verbose: If True, enable verbose logging (quiet overrides this).
31
+
32
+ Returns:
33
+ Tuple of (log, log_verbose) functions.
34
+ """
35
+ effective_verbose = verbose and not quiet
36
+
37
+ def log(msg: str, color: str = "green", err: bool = False) -> None:
38
+ if not quiet:
39
+ typer.secho(msg, fg=color, err=err)
40
+
41
+ def log_verbose(msg: str, color: str = "green", err: bool = False) -> None:
42
+ if effective_verbose:
43
+ typer.secho(msg, fg=color, err=err)
44
+
45
+ return log, log_verbose
46
+
47
+
21
48
  def version_callback(value: bool) -> None:
22
49
  """Print version and exit if --version flag is set."""
23
50
  if value:
@@ -25,8 +52,31 @@ def version_callback(value: bool) -> None:
25
52
  raise typer.Exit()
26
53
 
27
54
 
28
- @app.command()
55
+ app = typer.Typer(
56
+ help="Generate llms.txt from built HTML documentation.",
57
+ no_args_is_help=True,
58
+ context_settings={"help_option_names": ["-h", "--help"]},
59
+ )
60
+
61
+
62
+ @app.callback(invoke_without_command=True)
29
63
  def main(
64
+ version: Annotated[
65
+ bool,
66
+ typer.Option(
67
+ "--version",
68
+ "-V",
69
+ callback=version_callback,
70
+ is_eager=True,
71
+ help="Show version and exit",
72
+ ),
73
+ ] = False,
74
+ ) -> None:
75
+ """Generate llms.txt from built HTML documentation."""
76
+
77
+
78
+ @app.command()
79
+ def build(
30
80
  config: Annotated[
31
81
  Path,
32
82
  typer.Option("--config", "-c", help="Path to mkdocs.yml config file"),
@@ -57,39 +107,22 @@ def main(
57
107
  bool,
58
108
  typer.Option("--verbose", "-v", help="Show detailed progress"),
59
109
  ] = False,
60
- version: Annotated[
61
- bool,
62
- typer.Option(
63
- "--version",
64
- "-V",
65
- callback=version_callback,
66
- is_eager=True,
67
- help="Show version",
68
- ),
69
- ] = False,
70
110
  ) -> None:
71
- """Generate llms.txt and llms-full.txt from built HTML documentation."""
111
+ """Generate llms.txt and llms-full.txt from built MkDocs site."""
72
112
  # Resolve output directory
73
113
  out_dir = output_dir or site_dir
74
-
75
- # quiet overrides verbose
76
- if quiet:
77
- verbose = False
78
-
79
- def log(msg: str, color: str = "green", err: bool = False) -> None:
80
- if not quiet:
81
- typer.secho(msg, fg=color, err=err)
114
+ log, log_verbose = _make_logger(quiet, verbose)
82
115
 
83
116
  # Validate inputs
84
117
  if not config.exists():
85
- typer.secho(f"Error: Config file not found: {config}", fg="red", err=True)
118
+ log(f"Error: Config file not found: {config}", color="red", err=True)
86
119
  raise typer.Exit(1)
87
120
 
88
121
  if not site_dir.exists():
89
- typer.secho(f"Error: Site directory not found: {site_dir}", fg="red", err=True)
90
- typer.secho(
122
+ log(f"Error: Site directory not found: {site_dir}", color="red", err=True)
123
+ log(
91
124
  "Hint: Run 'mkdocs build' first to generate the HTML documentation.",
92
- fg="yellow",
125
+ color="yellow",
93
126
  err=True,
94
127
  )
95
128
  raise typer.Exit(1)
@@ -97,46 +130,54 @@ def main(
97
130
  # Load config
98
131
  try:
99
132
  cfg = load_config(config)
100
- except Exception as e:
101
- typer.secho(f"Error loading config: {e}", fg="red", err=True)
133
+ except (FileNotFoundError, ValueError, yaml.YAMLError) as e:
134
+ log(f"Error loading config: {e}", color="red", err=True)
102
135
  raise typer.Exit(1) from None
103
136
 
104
137
  # Validate sections
105
138
  if not cfg.sections:
106
- typer.secho("Error: No sections configured.", fg="red", err=True)
107
- typer.secho(
139
+ log("Error: No sections configured.", color="red", err=True)
140
+ log(
108
141
  "Add a 'nav' to your mkdocs.yml, or configure 'sections' "
109
142
  "in the llmstxt plugin.",
110
- fg="yellow",
143
+ color="yellow",
111
144
  err=True,
112
145
  )
113
146
  raise typer.Exit(1)
114
147
 
115
- if verbose:
116
- typer.echo(f"Site: {cfg.site_name}")
117
- typer.echo(f"Sections: {list(cfg.sections.keys())}")
118
- if dry_run:
119
- typer.echo("Dry run - no files will be written")
148
+ log_verbose(f"Site: {cfg.site_name}")
149
+ log_verbose(f"Sections: {list(cfg.sections.keys())}")
150
+ if dry_run:
151
+ log_verbose("Dry run - no files will be written")
120
152
 
121
153
  # Generate content
122
- build = build_llms_output(
154
+ llms_build = build_llms_output(
123
155
  config=cfg,
124
156
  site_dir=site_dir,
125
157
  )
126
158
  try:
127
159
  markdown_files = write_markdown_files(
128
- build.pages,
160
+ llms_build.pages,
129
161
  output_dir=out_dir,
130
162
  use_directory_urls=cfg.use_directory_urls,
131
163
  dry_run=dry_run,
132
164
  )
133
165
  except (OSError, ValueError) as exc:
134
- typer.secho(f"Error writing markdown files: {exc}", fg="red", err=True)
166
+ log(f"Error writing markdown files: {exc}", color="red", err=True)
135
167
  raise typer.Exit(1) from None
136
168
 
137
169
  # Define output paths
138
170
  llms_path = out_dir / "llms.txt"
139
- full_path = out_dir / cfg.full_output
171
+ try:
172
+ full_output_path = ensure_safe_md_path(cfg.full_output)
173
+ except ValueError:
174
+ log(
175
+ "Error: Invalid full_output: must be a relative path without '..'",
176
+ color="red",
177
+ err=True,
178
+ )
179
+ raise typer.Exit(1) from None
180
+ full_path = out_dir / full_output_path
140
181
 
141
182
  # Write output files (skip in dry-run mode)
142
183
  if dry_run:
@@ -147,26 +188,235 @@ def main(
147
188
  color = "green"
148
189
  try:
149
190
  out_dir.mkdir(parents=True, exist_ok=True)
150
- llms_path.write_text(build.llms_txt, encoding="utf-8")
151
- full_path.write_text(build.llms_full_txt, encoding="utf-8")
191
+ llms_path.write_text(llms_build.llms_txt, encoding="utf-8")
192
+ full_path.write_text(llms_build.llms_full_txt, encoding="utf-8")
152
193
  except OSError as exc:
153
- typer.secho(f"Error writing output files: {exc}", fg="red", err=True)
194
+ log(f"Error writing output files: {exc}", color="red", err=True)
154
195
  raise typer.Exit(1) from None
155
196
 
156
- log(f"{action} {llms_path} ({len(build.llms_txt):,} bytes)", color)
157
- log(f"{action} {full_path} ({len(build.llms_full_txt):,} bytes)", color)
197
+ log(f"{action} {llms_path} ({len(llms_build.llms_txt):,} bytes)", color)
198
+ log(f"{action} {full_path} ({len(llms_build.llms_full_txt):,} bytes)", color)
158
199
  log(f"{action} {len(markdown_files)} markdown files", color)
159
200
 
160
- if verbose and build.skipped:
161
- log("Skipped files:", color="yellow", err=True)
162
- for path, reason in build.skipped:
163
- log(f"- {path} ({reason})", color="yellow", err=True)
201
+ if llms_build.skipped:
202
+ log_verbose("Skipped files:", color="yellow", err=True)
203
+ for path, reason in llms_build.skipped:
204
+ log_verbose(f"- {path} ({reason})", color="yellow", err=True)
164
205
 
165
- if build.warnings:
206
+ if llms_build.warnings:
166
207
  log("Warnings:", color="yellow", err=True)
167
- for warning in build.warnings:
208
+ for warning in llms_build.warnings:
168
209
  log(f"- {warning}", color="yellow", err=True)
169
210
 
170
211
 
212
+ @app.command()
213
+ def init(
214
+ config: Annotated[
215
+ Path,
216
+ typer.Option("--config", "-c", help="Path to mkdocs.yml config file"),
217
+ ] = Path("mkdocs.yml"),
218
+ force: Annotated[
219
+ bool,
220
+ typer.Option("--force", "-f", help="Overwrite existing llmstxt section"),
221
+ ] = False,
222
+ quiet: Annotated[
223
+ bool,
224
+ typer.Option("--quiet", "-q", help="Suppress output (exit code only)"),
225
+ ] = False,
226
+ verbose: Annotated[
227
+ bool,
228
+ typer.Option("--verbose", "-v", help="Show detailed progress"),
229
+ ] = False,
230
+ ) -> None:
231
+ """Add llmstxt plugin config to mkdocs.yml."""
232
+ log, log_verbose = _make_logger(quiet, verbose)
233
+
234
+ if not config.exists():
235
+ log(f"Error: Config file not found: {config}", color="red", err=True)
236
+ log(
237
+ "Create one first or specify path with --config.",
238
+ color="yellow",
239
+ err=True,
240
+ )
241
+ raise typer.Exit(1)
242
+
243
+ yaml_parser = YAML()
244
+ yaml_parser.preserve_quotes = True
245
+
246
+ try:
247
+ with open(config, encoding="utf-8") as f:
248
+ data = yaml_parser.load(f)
249
+ except RuamelYAMLError as e:
250
+ log(f"Error: Invalid YAML: {e}", color="red", err=True)
251
+ raise typer.Exit(1) from None
252
+
253
+ if data is None:
254
+ data = {}
255
+
256
+ # Check for existing llmstxt plugin
257
+ plugins = data.get("plugins", [])
258
+ if plugins is None:
259
+ plugins = []
260
+ if not isinstance(plugins, (list, dict)):
261
+ log(
262
+ "Error: 'plugins' must be a list or mapping in mkdocs.yml.",
263
+ color="red",
264
+ err=True,
265
+ )
266
+ raise typer.Exit(1)
267
+ data["plugins"] = plugins
268
+
269
+ if isinstance(plugins, list):
270
+ has_llmstxt = any(
271
+ p == "llmstxt" or (isinstance(p, dict) and "llmstxt" in p) for p in plugins
272
+ )
273
+ elif isinstance(plugins, dict):
274
+ has_llmstxt = "llmstxt" in plugins
275
+ else:
276
+ has_llmstxt = False
277
+
278
+ if has_llmstxt and not force:
279
+ log("Error: llmstxt plugin already configured.", color="red", err=True)
280
+ log(
281
+ "Use --force to overwrite existing configuration.",
282
+ color="yellow",
283
+ err=True,
284
+ )
285
+ raise typer.Exit(1)
286
+
287
+ # Remove existing llmstxt if force is set
288
+ if has_llmstxt and force:
289
+ if isinstance(plugins, list):
290
+ plugins = [
291
+ p
292
+ for p in plugins
293
+ if p != "llmstxt" and not (isinstance(p, dict) and "llmstxt" in p)
294
+ ]
295
+ data["plugins"] = plugins
296
+ elif isinstance(plugins, dict):
297
+ del plugins["llmstxt"]
298
+
299
+ # Create the llmstxt plugin entry with commented example
300
+ llmstxt_entry = {
301
+ "llmstxt": {
302
+ # We'll add comments after writing
303
+ }
304
+ }
305
+
306
+ if isinstance(data["plugins"], list):
307
+ data["plugins"].append(llmstxt_entry)
308
+ else:
309
+ # Preserve dict-style plugins
310
+ data["plugins"]["llmstxt"] = {}
311
+
312
+ # Write the file
313
+ try:
314
+ with open(config, "w", encoding="utf-8") as f:
315
+ yaml_parser.dump(data, f)
316
+ except PermissionError:
317
+ log(f"Error: Permission denied writing to {config}", color="red", err=True)
318
+ raise typer.Exit(1) from None
319
+
320
+ # Now add comments using string manipulation since ruamel.yaml comment API is complex
321
+ content = config.read_text(encoding="utf-8")
322
+ ends_with_newline = content.endswith("\n")
323
+
324
+ # Find the llmstxt entry and add commented example below it
325
+ commented_example_lines = [
326
+ "# markdown_description: |",
327
+ "# Additional context for LLMs.",
328
+ "# sections:",
329
+ "# Getting Started:",
330
+ "# - index.md",
331
+ ]
332
+
333
+ def _comment_indent(line: str) -> int:
334
+ leading = len(line) - len(line.lstrip(" "))
335
+ if line.lstrip().startswith("- "):
336
+ return leading + 4
337
+ return leading + 2
338
+
339
+ def _format_commented_example(indent: int) -> list[str]:
340
+ prefix = " " * indent
341
+ return [f"{prefix}{line}" for line in commented_example_lines]
342
+
343
+ # Look for the llmstxt entry and add commented example below it
344
+ lines = content.splitlines()
345
+ new_lines: list[str] = []
346
+ inserted = False
347
+ for line in lines:
348
+ stripped = line.strip()
349
+ if not inserted and stripped == "llmstxt: {}":
350
+ indent = _comment_indent(line)
351
+ new_lines.append(line.replace("llmstxt: {}", "llmstxt:"))
352
+ new_lines.extend(_format_commented_example(indent))
353
+ inserted = True
354
+ continue
355
+ if not inserted and stripped == "llmstxt:":
356
+ indent = _comment_indent(line)
357
+ new_lines.append(line)
358
+ new_lines.extend(_format_commented_example(indent))
359
+ inserted = True
360
+ continue
361
+ new_lines.append(line)
362
+ content = "\n".join(new_lines)
363
+ if ends_with_newline:
364
+ content += "\n"
365
+
366
+ try:
367
+ config.write_text(content, encoding="utf-8")
368
+ except PermissionError:
369
+ log(f"Error: Permission denied writing to {config}", color="red", err=True)
370
+ raise typer.Exit(1) from None
371
+
372
+ log(f"Added llmstxt plugin to {config}")
373
+ log_verbose(
374
+ "Configuration includes commented example for sections and markdown_description"
375
+ )
376
+
377
+
378
+ @app.command()
379
+ def validate(
380
+ config: Annotated[
381
+ Path,
382
+ typer.Option("--config", "-c", help="Path to mkdocs.yml config file"),
383
+ ] = Path("mkdocs.yml"),
384
+ quiet: Annotated[
385
+ bool,
386
+ typer.Option("--quiet", "-q", help="Suppress output (exit code only)"),
387
+ ] = False,
388
+ verbose: Annotated[
389
+ bool,
390
+ typer.Option("--verbose", "-v", help="Show detailed config information"),
391
+ ] = False,
392
+ ) -> None:
393
+ """Check config file validity."""
394
+ log, log_verbose = _make_logger(quiet, verbose)
395
+
396
+ try:
397
+ cfg = load_config(config)
398
+ except FileNotFoundError:
399
+ log(f"Config invalid: {config}", color="red", err=True)
400
+ log(f" Error: File not found: {config}", color="red", err=True)
401
+ raise typer.Exit(1) from None
402
+ except (ValueError, yaml.YAMLError) as e:
403
+ log(f"Config invalid: {config}", color="red", err=True)
404
+ log(f" Error: {e}", color="red", err=True)
405
+ raise typer.Exit(1) from None
406
+
407
+ total_pages = sum(len(pages) for pages in cfg.sections.values())
408
+
409
+ log(f"Config valid: {config}")
410
+ log(f" Site: {cfg.site_name}")
411
+ log(f" Sections: {len(cfg.sections)}")
412
+ log(f" Pages: {total_pages}")
413
+
414
+ # Verbose: show section details
415
+ for section_name, pages in cfg.sections.items():
416
+ log_verbose(f" {section_name}: {len(pages)} pages")
417
+ for page in pages:
418
+ log_verbose(f" - {page}")
419
+
420
+
171
421
  if __name__ == "__main__":
172
422
  app()
@@ -6,6 +6,7 @@ from pathlib import Path
6
6
  from typing import Any
7
7
 
8
8
  import yaml
9
+ from pydantic import BaseModel, Field, ValidationError, field_validator
9
10
 
10
11
  from llmstxt_standalone.config.derive import nav_to_sections
11
12
  from llmstxt_standalone.config.model import Config
@@ -34,6 +35,75 @@ _PermissiveLoader.add_multi_constructor("tag:yaml.org,2002:python/", _ignore_unk
34
35
  _PermissiveLoader.add_multi_constructor("!python/", _ignore_unknown)
35
36
 
36
37
 
38
+ class LlmstxtPluginConfig(BaseModel):
39
+ """Pydantic model for llmstxt plugin configuration."""
40
+
41
+ markdown_description: str = ""
42
+ full_output: str = DEFAULT_FULL_OUTPUT
43
+ content_selector: str | None = None
44
+ sections: dict[str, list[str]] = Field(default_factory=dict)
45
+
46
+ @field_validator("sections", mode="before")
47
+ @classmethod
48
+ def validate_sections(cls, v: Any) -> dict[str, list[str]]:
49
+ """Validate sections is a dict with string keys and list[str] values."""
50
+ if v is None:
51
+ return {}
52
+ if not isinstance(v, dict):
53
+ raise ValueError(f"'sections' must be a mapping, got {type(v).__name__}")
54
+ for section_name, pages in v.items():
55
+ if not isinstance(section_name, str):
56
+ raise ValueError(
57
+ f"'sections' keys must be strings, got {type(section_name).__name__}"
58
+ )
59
+ if not isinstance(pages, list):
60
+ raise ValueError(
61
+ f"'sections.{section_name}' must be a list of strings, "
62
+ f"got {type(pages).__name__}"
63
+ )
64
+ for page in pages:
65
+ if not isinstance(page, str):
66
+ raise ValueError(
67
+ f"'sections.{section_name}' entries must be strings, "
68
+ f"got {type(page).__name__}"
69
+ )
70
+ return v
71
+
72
+
73
+ class MkDocsConfig(BaseModel):
74
+ """Pydantic model for mkdocs.yml top-level fields we care about."""
75
+
76
+ site_name: str = DEFAULT_SITE_NAME
77
+ site_description: str = ""
78
+ site_url: str = ""
79
+ nav: list[Any] = Field(default_factory=list)
80
+ use_directory_urls: bool = True
81
+
82
+ @field_validator("site_name", mode="before")
83
+ @classmethod
84
+ def coerce_site_name(cls, v: Any) -> str:
85
+ """Coerce None to default."""
86
+ return v if v is not None else DEFAULT_SITE_NAME
87
+
88
+ @field_validator("site_description", "site_url", mode="before")
89
+ @classmethod
90
+ def coerce_str_fields(cls, v: Any) -> str:
91
+ """Coerce None to empty string."""
92
+ return v if v is not None else ""
93
+
94
+ @field_validator("nav", mode="before")
95
+ @classmethod
96
+ def coerce_nav(cls, v: Any) -> list[Any]:
97
+ """Coerce None to empty list."""
98
+ return v if v is not None else []
99
+
100
+ @field_validator("site_url", mode="after")
101
+ @classmethod
102
+ def strip_trailing_slash(cls, v: str) -> str:
103
+ """Remove trailing slash from site_url."""
104
+ return v.rstrip("/")
105
+
106
+
37
107
  def load_config(config_path: Path) -> Config:
38
108
  """Load and resolve configuration from mkdocs.yml.
39
109
 
@@ -45,12 +115,18 @@ def load_config(config_path: Path) -> Config:
45
115
 
46
116
  Raises:
47
117
  FileNotFoundError: If config file doesn't exist.
118
+ ValueError: If config is invalid.
48
119
  """
49
120
  if not config_path.exists():
50
121
  raise FileNotFoundError(f"Config file not found: {config_path}")
51
122
 
52
- with open(config_path, encoding="utf-8") as f:
53
- raw = yaml.load(f, Loader=_PermissiveLoader)
123
+ try:
124
+ with open(config_path, encoding="utf-8") as f:
125
+ raw = yaml.load(f, Loader=_PermissiveLoader)
126
+ except RecursionError:
127
+ raise ValueError(
128
+ f"Config file has nav structure too deeply nested: {config_path}"
129
+ ) from None
54
130
 
55
131
  if not isinstance(raw, dict):
56
132
  raise ValueError(f"Config file must be a mapping: {config_path}")
@@ -60,55 +136,37 @@ def load_config(config_path: Path) -> Config:
60
136
 
61
137
  def _config_from_mkdocs(raw: dict[str, Any]) -> Config:
62
138
  """Build a Config from a parsed mkdocs.yml mapping."""
63
- site_name = raw.get("site_name", DEFAULT_SITE_NAME)
64
- site_description = raw.get("site_description", "")
65
- site_url = raw.get("site_url", "").rstrip("/")
66
- nav = raw.get("nav", [])
67
- # MkDocs defaults use_directory_urls to true
68
- use_directory_urls = raw.get("use_directory_urls", True)
69
-
70
- llmstxt_config = get_llmstxt_config(raw)
71
-
72
- if llmstxt_config is not None:
73
- markdown_description = llmstxt_config.get("markdown_description", "")
74
- full_output = llmstxt_config.get("full_output", DEFAULT_FULL_OUTPUT)
75
- content_selector = llmstxt_config.get("content_selector")
76
- sections = llmstxt_config.get("sections", {})
77
- if not isinstance(sections, dict):
78
- raise ValueError(
79
- f"llmstxt 'sections' must be a mapping, got {type(sections).__name__}"
80
- )
81
- for section_name, pages in sections.items():
82
- if not isinstance(section_name, str):
83
- raise ValueError(
84
- "llmstxt 'sections' keys must be strings, "
85
- f"got {type(section_name).__name__}"
86
- )
87
- if not isinstance(pages, list):
88
- raise ValueError(
89
- f"llmstxt 'sections.{section_name}' must be a list of strings, "
90
- f"got {type(pages).__name__}"
91
- )
92
- for page in pages:
93
- if not isinstance(page, str):
94
- raise ValueError(
95
- f"llmstxt 'sections.{section_name}' entries must be strings, "
96
- f"got {type(page).__name__}"
97
- )
139
+ try:
140
+ mkdocs = MkDocsConfig.model_validate(raw)
141
+ except ValidationError as e:
142
+ raise ValueError(str(e)) from None
143
+
144
+ llmstxt_raw = get_llmstxt_config(raw)
145
+
146
+ if llmstxt_raw is not None:
147
+ try:
148
+ plugin = LlmstxtPluginConfig.model_validate(llmstxt_raw)
149
+ except ValidationError as e:
150
+ # Extract the core error message for cleaner output
151
+ raise ValueError(f"llmstxt {e.errors()[0]['msg']}") from None
152
+ sections = plugin.sections
153
+ markdown_description = plugin.markdown_description
154
+ full_output = plugin.full_output
155
+ content_selector = plugin.content_selector
98
156
  else:
157
+ sections = nav_to_sections(mkdocs.nav)
99
158
  markdown_description = ""
100
159
  full_output = DEFAULT_FULL_OUTPUT
101
160
  content_selector = None
102
- sections = nav_to_sections(nav)
103
161
 
104
162
  return Config(
105
- site_name=site_name,
106
- site_description=site_description,
107
- site_url=site_url,
163
+ site_name=mkdocs.site_name,
164
+ site_description=mkdocs.site_description,
165
+ site_url=mkdocs.site_url,
108
166
  markdown_description=markdown_description,
109
167
  full_output=full_output,
110
168
  content_selector=content_selector,
111
169
  sections=sections,
112
- nav=nav,
113
- use_directory_urls=use_directory_urls,
170
+ nav=mkdocs.nav,
171
+ use_directory_urls=mkdocs.use_directory_urls,
114
172
  )
@@ -2,12 +2,12 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from dataclasses import dataclass
6
5
  from typing import Any
7
6
 
7
+ from pydantic import BaseModel
8
8
 
9
- @dataclass
10
- class Config:
9
+
10
+ class Config(BaseModel):
11
11
  """Resolved configuration for llmstxt generation."""
12
12
 
13
13
  site_name: str
@@ -6,6 +6,11 @@ import mdformat
6
6
  from bs4 import BeautifulSoup, NavigableString, Tag
7
7
  from markdownify import ATX, MarkdownConverter
8
8
 
9
+ __all__ = [
10
+ "extract_title_from_html",
11
+ "html_to_markdown",
12
+ ]
13
+
9
14
 
10
15
  def _should_remove(tag: Tag) -> bool:
11
16
  """Check if a tag should be removed during autoclean."""
@@ -70,13 +75,14 @@ def _get_language(tag: Tag) -> str:
70
75
  return ""
71
76
 
72
77
 
73
- # Converter with mkdocs-llmstxt-compatible settings
74
- _converter = MarkdownConverter(
75
- bullets="-",
76
- code_language_callback=_get_language,
77
- escape_underscores=False,
78
- heading_style=ATX,
79
- )
78
+ def _make_converter() -> MarkdownConverter:
79
+ """Create a MarkdownConverter with mkdocs-llmstxt-compatible settings."""
80
+ return MarkdownConverter(
81
+ bullets="-",
82
+ code_language_callback=_get_language,
83
+ escape_underscores=False,
84
+ heading_style=ATX,
85
+ )
80
86
 
81
87
 
82
88
  def extract_title_from_html(html: str, site_name: str | None = None) -> str | None:
@@ -154,5 +160,6 @@ def html_to_markdown(html: str, content_selector: str | None = None) -> str:
154
160
  return ""
155
161
 
156
162
  _autoclean(content)
157
- md = _converter.convert_soup(content)
163
+ converter = _make_converter()
164
+ md = converter.convert_soup(content)
158
165
  return mdformat.text(md, options={"wrap": "no"}, extensions=("tables",))
@@ -8,6 +8,19 @@ from pathlib import Path
8
8
  from llmstxt_standalone.config import Config
9
9
  from llmstxt_standalone.convert import extract_title_from_html, html_to_markdown
10
10
 
11
+ __all__ = [
12
+ "BuildResult",
13
+ "GenerateResult",
14
+ "PageMarkdown",
15
+ "build_llms_output",
16
+ "ensure_safe_md_path",
17
+ "generate_llms_txt",
18
+ "md_path_to_html_path",
19
+ "md_path_to_output_md_path",
20
+ "md_path_to_page_url",
21
+ "write_markdown_files",
22
+ ]
23
+
11
24
 
12
25
  def _escape_markdown_link_text(text: str) -> str:
13
26
  r"""Escape characters that break markdown link syntax.
@@ -32,7 +45,18 @@ def _is_index_md(md_path: str) -> bool:
32
45
  return md_path == "index.md" or md_path.endswith("/index.md")
33
46
 
34
47
 
35
- def _ensure_safe_md_path(md_path: str) -> Path:
48
+ def ensure_safe_md_path(md_path: str) -> Path:
49
+ """Validate and convert a markdown path to a safe Path object.
50
+
51
+ Args:
52
+ md_path: Relative markdown file path (e.g., "install.md").
53
+
54
+ Returns:
55
+ Path object for the markdown file.
56
+
57
+ Raises:
58
+ ValueError: If path is absolute or contains '..'.
59
+ """
36
60
  path = Path(md_path)
37
61
  if path.is_absolute() or path.drive:
38
62
  raise ValueError(f"Markdown path must be relative: {md_path}")
@@ -63,7 +87,7 @@ def md_path_to_html_path(
63
87
  Path to the corresponding HTML file.
64
88
  """
65
89
  # Handle index.md at any level (root or nested like foo/bar/index.md)
66
- safe_md_path = _ensure_safe_md_path(md_path)
90
+ safe_md_path = ensure_safe_md_path(md_path)
67
91
  if _is_index_md(md_path):
68
92
  html_path = site_dir / safe_md_path.with_suffix(".html")
69
93
  return _ensure_within_dir(site_dir, html_path, "HTML path")
@@ -117,7 +141,7 @@ def md_path_to_output_md_path(
117
141
  Path where the markdown file should be written.
118
142
  """
119
143
  # Handle index.md at any level (root or nested like foo/bar/index.md)
120
- safe_md_path = _ensure_safe_md_path(md_path)
144
+ safe_md_path = ensure_safe_md_path(md_path)
121
145
  if _is_index_md(md_path):
122
146
  output_path = site_dir / safe_md_path
123
147
  return _ensure_within_dir(site_dir, output_path, "Output path")
@@ -149,7 +173,12 @@ class BuildResult:
149
173
 
150
174
  @dataclass
151
175
  class GenerateResult:
152
- """Result of llms.txt generation with files written."""
176
+ """Result of llms.txt generation with files written.
177
+
178
+ Used by generate_llms_txt() for programmatic use cases that want
179
+ file writing handled automatically. The CLI uses BuildResult +
180
+ write_markdown_files() for more control over the write step.
181
+ """
153
182
 
154
183
  llms_txt: str
155
184
  llms_full_txt: str
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: llmstxt-standalone
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: Generate llms.txt from built HTML documentation
5
5
  Keywords: llms,documentation,markdown,mkdocs
6
6
  Author: Shaan Majid
@@ -20,10 +20,12 @@ Classifier: Topic :: Documentation
20
20
  Classifier: Typing :: Typed
21
21
  Requires-Dist: typer>=0.9.0
22
22
  Requires-Dist: pyyaml>=6.0
23
+ Requires-Dist: ruamel-yaml>=0.18
23
24
  Requires-Dist: beautifulsoup4>=4.12
24
25
  Requires-Dist: markdownify>=0.14,<2.0
25
26
  Requires-Dist: mdformat>=0.7,<2.0
26
27
  Requires-Dist: mdformat-tables>=1.0
28
+ Requires-Dist: pydantic>=2.12.5
27
29
  Requires-Python: >=3.10
28
30
  Project-URL: Repository, https://github.com/shaanmajid/llmstxt-standalone
29
31
  Project-URL: Issues, https://github.com/shaanmajid/llmstxt-standalone/issues
@@ -59,21 +61,25 @@ uv add llmstxt-standalone # or: pip install
59
61
 
60
62
  ## Usage
61
63
 
64
+ ### build
65
+
66
+ Generate llms.txt from a built MkDocs site:
67
+
62
68
  ```bash
63
69
  # Run from project root (expects mkdocs.yml and site/)
64
- llmstxt-standalone
70
+ llmstxt-standalone build
65
71
 
66
72
  # Explicit paths
67
- llmstxt-standalone --config mkdocs.yml --site-dir ./build --output-dir ./dist
73
+ llmstxt-standalone build --config mkdocs.yml --site-dir ./build --output-dir ./dist
68
74
 
69
75
  # Preview without writing files
70
- llmstxt-standalone --dry-run
76
+ llmstxt-standalone build --dry-run
71
77
 
72
78
  # Suppress output
73
- llmstxt-standalone --quiet
79
+ llmstxt-standalone build --quiet
74
80
 
75
81
  # Show detailed progress
76
- llmstxt-standalone --verbose
82
+ llmstxt-standalone build --verbose
77
83
  ```
78
84
 
79
85
  | Option | Short | Default | Description |
@@ -84,11 +90,65 @@ llmstxt-standalone --verbose
84
90
  | `--dry-run` | `-n` | | Preview without writing |
85
91
  | `--quiet` | `-q` | | Suppress output |
86
92
  | `--verbose` | `-v` | | Show detailed progress |
87
- | `--version` | `-V` | | Show version |
93
+
94
+ ### init
95
+
96
+ Add llmstxt plugin configuration to an existing mkdocs.yml:
97
+
98
+ ```bash
99
+ llmstxt-standalone init
100
+
101
+ # Specify config path
102
+ llmstxt-standalone init --config path/to/mkdocs.yml
103
+
104
+ # Overwrite existing llmstxt config
105
+ llmstxt-standalone init --force
106
+
107
+ # Show detailed progress
108
+ llmstxt-standalone init --verbose
109
+ ```
110
+
111
+ | Option | Short | Description |
112
+ |--------|-------|-------------|
113
+ | `--config` | `-c` | Path to mkdocs.yml (default: mkdocs.yml) |
114
+ | `--force` | `-f` | Overwrite existing llmstxt section |
115
+ | `--quiet` | `-q` | Suppress output |
116
+ | `--verbose` | `-v` | Show detailed progress |
117
+
118
+ ### validate
119
+
120
+ Check that a config file is valid:
121
+
122
+ ```bash
123
+ $ llmstxt-standalone validate
124
+ Config valid: mkdocs.yml
125
+ Site: My Project
126
+ Sections: 3
127
+ Pages: 12
128
+
129
+ # Exit code only (for scripts)
130
+ llmstxt-standalone validate --quiet
131
+
132
+ # Show section details
133
+ llmstxt-standalone validate --verbose
134
+ ```
135
+
136
+ | Option | Short | Description |
137
+ |--------|-------|-------------|
138
+ | `--config` | `-c` | Path to mkdocs.yml (default: mkdocs.yml) |
139
+ | `--quiet` | `-q` | Suppress output |
140
+ | `--verbose` | `-v` | Show detailed config information |
141
+
142
+ ### Global options
143
+
144
+ ```bash
145
+ llmstxt-standalone --version # Show version
146
+ llmstxt-standalone --help # Show available commands
147
+ ```
88
148
 
89
149
  ## Output
90
150
 
91
- The tool generates three outputs:
151
+ The `build` command generates three outputs:
92
152
 
93
153
  1. `llms.txt` — an index file with markdown links to all pages
94
154
  1. `llms-full.txt` — concatenated content of all pages
@@ -0,0 +1,15 @@
1
+ llmstxt_standalone/__init__.py,sha256=AaGtXJrKNfa-wiDIRdOhqHyu9qtkXIOACn-xxiiNK2Q,160
2
+ llmstxt_standalone/__main__.py,sha256=yHZXO67IkwE1cr6iTo5AezwfYtsQcOQqXEfvMxm4_1Q,131
3
+ llmstxt_standalone/cli.py,sha256=mEHQ5XNcdmeoHRbnj3-qko0dpjFNCiGjXOB6UQ14vyo,13081
4
+ llmstxt_standalone/config/__init__.py,sha256=Vu2xQjxG-xe1YYb5JvnZFzwe6Rb5upEewi7UvLgYvQ8,188
5
+ llmstxt_standalone/config/derive.py,sha256=Mfwgsac786ARyNcElrFWp5piQUgusC1tdlvspxbK4Vw,1411
6
+ llmstxt_standalone/config/load.py,sha256=SvBdhHRDyMdmPUrUs2yqNVwbW7bVTI3ENvFLDJdi1uw,5845
7
+ llmstxt_standalone/config/model.py,sha256=1AeiG4zfxkrvyD_ggweHSeHb1Jt1mytQyU12DltAtks,2152
8
+ llmstxt_standalone/config/plugin.py,sha256=FoebHUfbl9M1KGcRyiQRIOCRkMek7LPcPxV_1nRrqqo,1175
9
+ llmstxt_standalone/convert.py,sha256=Otowo33aNPVYb7NUHxlwdCDRsalfqT8dLzOm1Z6jRbA,4930
10
+ llmstxt_standalone/generate.py,sha256=XQ1Dsf7W4FFFMlNs3QdJgQskVwW-W6UUPQQWHY-SEG8,11835
11
+ llmstxt_standalone/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ llmstxt_standalone-0.2.0.dist-info/WHEEL,sha256=e_m4S054HL0hyR3CpOk-b7Q7fDX6BuFkgL5OjAExXas,80
13
+ llmstxt_standalone-0.2.0.dist-info/entry_points.txt,sha256=Hy5G1GIRPUMgcHrUuxnu_D2xWJQ2svUPgNI6kuJl4hA,67
14
+ llmstxt_standalone-0.2.0.dist-info/METADATA,sha256=BQg0HRmqdwejF4m3XhzywRwWtqnyFzhMhE8uFiriuVk,7897
15
+ llmstxt_standalone-0.2.0.dist-info/RECORD,,
@@ -1,15 +0,0 @@
1
- llmstxt_standalone/__init__.py,sha256=AaGtXJrKNfa-wiDIRdOhqHyu9qtkXIOACn-xxiiNK2Q,160
2
- llmstxt_standalone/__main__.py,sha256=yHZXO67IkwE1cr6iTo5AezwfYtsQcOQqXEfvMxm4_1Q,131
3
- llmstxt_standalone/cli.py,sha256=oufJK-RS0enSd709ldtCh_OgSH5LIXb_vvdElm9a8DM,5193
4
- llmstxt_standalone/config/__init__.py,sha256=Vu2xQjxG-xe1YYb5JvnZFzwe6Rb5upEewi7UvLgYvQ8,188
5
- llmstxt_standalone/config/derive.py,sha256=Mfwgsac786ARyNcElrFWp5piQUgusC1tdlvspxbK4Vw,1411
6
- llmstxt_standalone/config/load.py,sha256=R3Am4gGesZRWt_oYQ1T6lStKdN9ShX4ilOIu35bZnWE,3981
7
- llmstxt_standalone/config/model.py,sha256=vkN5BPajYlwT_hYTVnf5b196SMNaFreYUJgEzwhH9l4,2154
8
- llmstxt_standalone/config/plugin.py,sha256=FoebHUfbl9M1KGcRyiQRIOCRkMek7LPcPxV_1nRrqqo,1175
9
- llmstxt_standalone/convert.py,sha256=f2iYtnNJYPCJpMPxQMkDonPhvqXssx-z3cU7_KnEUIw,4739
10
- llmstxt_standalone/generate.py,sha256=N-ryfHouG9ne28MRgapS2IZXnlJSad0eVNQxg0YPPGQ,11081
11
- llmstxt_standalone/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- llmstxt_standalone-0.1.1.dist-info/WHEEL,sha256=e_m4S054HL0hyR3CpOk-b7Q7fDX6BuFkgL5OjAExXas,80
13
- llmstxt_standalone-0.1.1.dist-info/entry_points.txt,sha256=Hy5G1GIRPUMgcHrUuxnu_D2xWJQ2svUPgNI6kuJl4hA,67
14
- llmstxt_standalone-0.1.1.dist-info/METADATA,sha256=p9VCqI8ha-eDvViv2Dsz3vNGSuzms8vwLC1RE3ZOXng,6530
15
- llmstxt_standalone-0.1.1.dist-info/RECORD,,