llmstxt-standalone 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: llmstxt-standalone
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Generate llms.txt from built HTML documentation
5
5
  Keywords: llms,documentation,markdown,mkdocs
6
6
  Author: Shaan Majid
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "llmstxt-standalone"
3
- version = "0.1.0"
3
+ version = "0.1.1"
4
4
  description = "Generate llms.txt from built HTML documentation"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -10,10 +10,13 @@ def nav_to_sections(nav: list[Any]) -> dict[str, list[str]]:
10
10
  sections: dict[str, list[str]] = {}
11
11
 
12
12
  for item in nav:
13
- if isinstance(item, dict):
13
+ if isinstance(item, str):
14
+ # Bare top-level page: - index.md
15
+ sections.setdefault("Pages", []).append(item)
16
+ elif isinstance(item, dict):
14
17
  for key, value in item.items():
15
18
  if isinstance(value, str):
16
- # Top-level page, add to "Pages" section
19
+ # Top-level page with title: - Home: index.md
17
20
  sections.setdefault("Pages", []).append(value)
18
21
  elif isinstance(value, list):
19
22
  # Section with children
@@ -74,6 +74,27 @@ def _config_from_mkdocs(raw: dict[str, Any]) -> Config:
74
74
  full_output = llmstxt_config.get("full_output", DEFAULT_FULL_OUTPUT)
75
75
  content_selector = llmstxt_config.get("content_selector")
76
76
  sections = llmstxt_config.get("sections", {})
77
+ if not isinstance(sections, dict):
78
+ raise ValueError(
79
+ f"llmstxt 'sections' must be a mapping, got {type(sections).__name__}"
80
+ )
81
+ for section_name, pages in sections.items():
82
+ if not isinstance(section_name, str):
83
+ raise ValueError(
84
+ "llmstxt 'sections' keys must be strings, "
85
+ f"got {type(section_name).__name__}"
86
+ )
87
+ if not isinstance(pages, list):
88
+ raise ValueError(
89
+ f"llmstxt 'sections.{section_name}' must be a list of strings, "
90
+ f"got {type(pages).__name__}"
91
+ )
92
+ for page in pages:
93
+ if not isinstance(page, str):
94
+ raise ValueError(
95
+ f"llmstxt 'sections.{section_name}' entries must be strings, "
96
+ f"got {type(page).__name__}"
97
+ )
77
98
  else:
78
99
  markdown_description = ""
79
100
  full_output = DEFAULT_FULL_OUTPUT
@@ -34,9 +34,15 @@ def _autoclean(soup: BeautifulSoup | Tag) -> None:
34
34
  for element in soup.find_all("table", attrs={"class": "highlighttable"}):
35
35
  code = element.find("code")
36
36
  if code:
37
- element.replace_with(
38
- BeautifulSoup(f"<pre>{code.get_text()}</pre>", "html.parser")
37
+ # Find the root BeautifulSoup document to create new tags
38
+ # (soup parameter may be a Tag, which doesn't have new_tag)
39
+ doc = next(
40
+ (p for p in element.parents if isinstance(p, BeautifulSoup)), None
39
41
  )
42
+ if doc:
43
+ pre_tag = doc.new_tag("pre")
44
+ pre_tag.string = code.get_text()
45
+ element.replace_with(pre_tag)
40
46
 
41
47
 
42
48
  def _get_language(tag: Tag) -> str: