mkdocs-ultralytics-plugin 0.2.3__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (17) hide show
  1. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.5}/PKG-INFO +7 -5
  2. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.5}/README.md +6 -4
  3. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.5}/mkdocs_ultralytics_plugin.egg-info/PKG-INFO +7 -5
  4. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.5}/plugin/__init__.py +1 -1
  5. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.5}/plugin/main.py +25 -1
  6. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.5}/plugin/postprocess.py +121 -4
  7. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.5}/plugin/processor.py +25 -47
  8. mkdocs_ultralytics_plugin-0.2.5/plugin/utils.py +262 -0
  9. mkdocs_ultralytics_plugin-0.2.3/plugin/utils.py +0 -236
  10. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.5}/LICENSE +0 -0
  11. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.5}/mkdocs_ultralytics_plugin.egg-info/SOURCES.txt +0 -0
  12. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.5}/mkdocs_ultralytics_plugin.egg-info/dependency_links.txt +0 -0
  13. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.5}/mkdocs_ultralytics_plugin.egg-info/entry_points.txt +0 -0
  14. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.5}/mkdocs_ultralytics_plugin.egg-info/requires.txt +0 -0
  15. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.5}/mkdocs_ultralytics_plugin.egg-info/top_level.txt +0 -0
  16. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.5}/pyproject.toml +0 -0
  17. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.5}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mkdocs-ultralytics-plugin
3
- Version: 0.2.3
3
+ Version: 0.2.5
4
4
  Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
5
5
  Author-email: Glenn Jocher <hello@ultralytics.com>
6
6
  Maintainer-email: Ultralytics <hello@ultralytics.com>
@@ -45,7 +45,7 @@ Dynamic: license-file
45
45
 
46
46
  # 🚀 MkDocs Ultralytics Plugin
47
47
 
48
- Welcome to the MkDocs Ultralytics Plugin! 📄 This powerful tool enhances your [MkDocs](https://www.mkdocs.org/), [Zensical](https://zensical.com/), or any static site documentation with advanced Search Engine Optimization (SEO) features, interactive social elements, and structured data support. It automates the generation of essential meta tags, incorporates social sharing capabilities, and adds [JSON-LD](https://json-ld.org/) structured data to elevate user engagement and improve your documentation's visibility on the web.
48
+ Welcome to the MkDocs Ultralytics Plugin! 📄 This powerful tool enhances your [MkDocs](https://www.mkdocs.org/), [Zensical](https://zensical.org/), or any static site documentation with advanced Search Engine Optimization (SEO) features, interactive social elements, and structured data support. It automates the generation of essential meta tags, incorporates social sharing capabilities, and adds [JSON-LD](https://json-ld.org/) structured data to elevate user engagement and improve your documentation's visibility on the web.
49
49
 
50
50
  **Two modes available:**
51
51
 
@@ -64,12 +64,13 @@ This tool seamlessly integrates valuable features into your documentation site:
64
64
 
65
65
  - **Meta Tag Generation**: Automatically creates meta description and image tags using the first paragraph and image found on each page, crucial for SEO and social previews.
66
66
  - **Keyword Customization**: Allows you to define specific meta keywords directly within your Markdown front matter for targeted SEO.
67
- - **Social Media Optimization**: Generates [Open Graph](https://ogp.me/) and [Twitter Card](https://developer.x.com/en/docs/x-for-websites/cards/overview/summary-card-with-large-image) meta tags to ensure your content looks great when shared on social platforms.
67
+ - **Social Media Optimization**: Generates [Open Graph](https://ogp.me/) and [Twitter Card](https://docs.x.com/overview) meta tags to ensure your content looks great when shared on social platforms.
68
68
  - **Simple Sharing**: Inserts convenient share buttons for Twitter and LinkedIn at the end of your content, encouraging readers to share.
69
69
  - **Git Insights**: Gathers and displays [Git](https://git-scm.com/) commit information, including update dates and authors, directly within the page footer for transparency.
70
70
  - **JSON-LD Support**: Adds structured data in JSON-LD format, helping search engines understand your content better and potentially enabling rich results.
71
71
  - **FAQ Parsing**: Automatically parses FAQ sections (if present) and includes them in the structured data for enhanced search visibility.
72
72
  - **Copy for LLM**: Adds a button to copy page content in Markdown format, optimized for sharing with AI assistants.
73
+ - **LLMs.txt Generation**: Generates an `llms.txt` index after builds for LLM-friendly site discovery.
73
74
  - **Customizable Styling**: Includes optional inline CSS to maintain consistent styling across your documentation, aligning with themes like [MkDocs Material](https://squidfunk.github.io/mkdocs-material/).
74
75
 
75
76
  ## 🛠️ Installation
@@ -177,6 +178,7 @@ Both modes support the same configuration options:
177
178
  | `add_json_ld` | bool | `False` | Add JSON-LD structured data |
178
179
  | `add_css` | bool | `True` | Include inline CSS styles |
179
180
  | `add_copy_llm` | bool | `True` | Add "Copy for LLM" button |
181
+ | `add_llms_txt` | bool | `True` | Generate an `llms.txt` index |
180
182
 
181
183
  ## 🧩 How It Works
182
184
 
@@ -244,7 +246,7 @@ Please see our [Contributing Guide](https://docs.ultralytics.com/help/contributi
244
246
 
245
247
  Ultralytics provides two licensing options:
246
248
 
247
- - **AGPL-3.0 License**: Ideal for students, researchers, and enthusiasts, this [OSI-approved](https://opensource.org/license/agpl-v3) license promotes open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/mkdocs/blob/main/LICENSE) file for details.
249
+ - **AGPL-3.0 License**: Ideal for students, researchers, and enthusiasts, this [OSI-approved](https://opensource.org/license/agpl-3.0) license promotes open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/mkdocs/blob/main/LICENSE) file for details.
248
250
  - **Enterprise License**: Designed for commercial applications, this license allows seamless integration of Ultralytics software into commercial products, bypassing AGPL-3.0 requirements. Visit [Ultralytics Licensing](https://www.ultralytics.com/license) for details.
249
251
 
250
252
  ## ✉️ Connect with Us
@@ -259,7 +261,7 @@ Encountered a bug or have an idea? Visit [GitHub Issues](https://github.com/ultr
259
261
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%" alt="space">
260
262
  <a href="https://twitter.com/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-twitter.png" width="3%" alt="Ultralytics Twitter"></a>
261
263
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%" alt="space">
262
- <a href="https://youtube.com/ultralytics?sub_confirmation=1"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-youtube.png" width="3%" alt="Ultralytics YouTube"></a>
264
+ <a href="https://www.youtube.com/ultralytics?sub_confirmation=1"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-youtube.png" width="3%" alt="Ultralytics YouTube"></a>
263
265
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%" alt="space">
264
266
  <a href="https://www.tiktok.com/@ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-tiktok.png" width="3%" alt="Ultralytics TikTok"></a>
265
267
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%" alt="space">
@@ -2,7 +2,7 @@
2
2
 
3
3
  # 🚀 MkDocs Ultralytics Plugin
4
4
 
5
- Welcome to the MkDocs Ultralytics Plugin! 📄 This powerful tool enhances your [MkDocs](https://www.mkdocs.org/), [Zensical](https://zensical.com/), or any static site documentation with advanced Search Engine Optimization (SEO) features, interactive social elements, and structured data support. It automates the generation of essential meta tags, incorporates social sharing capabilities, and adds [JSON-LD](https://json-ld.org/) structured data to elevate user engagement and improve your documentation's visibility on the web.
5
+ Welcome to the MkDocs Ultralytics Plugin! 📄 This powerful tool enhances your [MkDocs](https://www.mkdocs.org/), [Zensical](https://zensical.org/), or any static site documentation with advanced Search Engine Optimization (SEO) features, interactive social elements, and structured data support. It automates the generation of essential meta tags, incorporates social sharing capabilities, and adds [JSON-LD](https://json-ld.org/) structured data to elevate user engagement and improve your documentation's visibility on the web.
6
6
 
7
7
  **Two modes available:**
8
8
 
@@ -21,12 +21,13 @@ This tool seamlessly integrates valuable features into your documentation site:
21
21
 
22
22
  - **Meta Tag Generation**: Automatically creates meta description and image tags using the first paragraph and image found on each page, crucial for SEO and social previews.
23
23
  - **Keyword Customization**: Allows you to define specific meta keywords directly within your Markdown front matter for targeted SEO.
24
- - **Social Media Optimization**: Generates [Open Graph](https://ogp.me/) and [Twitter Card](https://developer.x.com/en/docs/x-for-websites/cards/overview/summary-card-with-large-image) meta tags to ensure your content looks great when shared on social platforms.
24
+ - **Social Media Optimization**: Generates [Open Graph](https://ogp.me/) and [Twitter Card](https://docs.x.com/overview) meta tags to ensure your content looks great when shared on social platforms.
25
25
  - **Simple Sharing**: Inserts convenient share buttons for Twitter and LinkedIn at the end of your content, encouraging readers to share.
26
26
  - **Git Insights**: Gathers and displays [Git](https://git-scm.com/) commit information, including update dates and authors, directly within the page footer for transparency.
27
27
  - **JSON-LD Support**: Adds structured data in JSON-LD format, helping search engines understand your content better and potentially enabling rich results.
28
28
  - **FAQ Parsing**: Automatically parses FAQ sections (if present) and includes them in the structured data for enhanced search visibility.
29
29
  - **Copy for LLM**: Adds a button to copy page content in Markdown format, optimized for sharing with AI assistants.
30
+ - **LLMs.txt Generation**: Generates an `llms.txt` index after builds for LLM-friendly site discovery.
30
31
  - **Customizable Styling**: Includes optional inline CSS to maintain consistent styling across your documentation, aligning with themes like [MkDocs Material](https://squidfunk.github.io/mkdocs-material/).
31
32
 
32
33
  ## 🛠️ Installation
@@ -134,6 +135,7 @@ Both modes support the same configuration options:
134
135
  | `add_json_ld` | bool | `False` | Add JSON-LD structured data |
135
136
  | `add_css` | bool | `True` | Include inline CSS styles |
136
137
  | `add_copy_llm` | bool | `True` | Add "Copy for LLM" button |
138
+ | `add_llms_txt` | bool | `True` | Generate an `llms.txt` index |
137
139
 
138
140
  ## 🧩 How It Works
139
141
 
@@ -201,7 +203,7 @@ Please see our [Contributing Guide](https://docs.ultralytics.com/help/contributi
201
203
 
202
204
  Ultralytics provides two licensing options:
203
205
 
204
- - **AGPL-3.0 License**: Ideal for students, researchers, and enthusiasts, this [OSI-approved](https://opensource.org/license/agpl-v3) license promotes open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/mkdocs/blob/main/LICENSE) file for details.
206
+ - **AGPL-3.0 License**: Ideal for students, researchers, and enthusiasts, this [OSI-approved](https://opensource.org/license/agpl-3.0) license promotes open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/mkdocs/blob/main/LICENSE) file for details.
205
207
  - **Enterprise License**: Designed for commercial applications, this license allows seamless integration of Ultralytics software into commercial products, bypassing AGPL-3.0 requirements. Visit [Ultralytics Licensing](https://www.ultralytics.com/license) for details.
206
208
 
207
209
  ## ✉️ Connect with Us
@@ -216,7 +218,7 @@ Encountered a bug or have an idea? Visit [GitHub Issues](https://github.com/ultr
216
218
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%" alt="space">
217
219
  <a href="https://twitter.com/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-twitter.png" width="3%" alt="Ultralytics Twitter"></a>
218
220
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%" alt="space">
219
- <a href="https://youtube.com/ultralytics?sub_confirmation=1"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-youtube.png" width="3%" alt="Ultralytics YouTube"></a>
221
+ <a href="https://www.youtube.com/ultralytics?sub_confirmation=1"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-youtube.png" width="3%" alt="Ultralytics YouTube"></a>
220
222
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%" alt="space">
221
223
  <a href="https://www.tiktok.com/@ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-tiktok.png" width="3%" alt="Ultralytics TikTok"></a>
222
224
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%" alt="space">
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mkdocs-ultralytics-plugin
3
- Version: 0.2.3
3
+ Version: 0.2.5
4
4
  Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
5
5
  Author-email: Glenn Jocher <hello@ultralytics.com>
6
6
  Maintainer-email: Ultralytics <hello@ultralytics.com>
@@ -45,7 +45,7 @@ Dynamic: license-file
45
45
 
46
46
  # 🚀 MkDocs Ultralytics Plugin
47
47
 
48
- Welcome to the MkDocs Ultralytics Plugin! 📄 This powerful tool enhances your [MkDocs](https://www.mkdocs.org/), [Zensical](https://zensical.com/), or any static site documentation with advanced Search Engine Optimization (SEO) features, interactive social elements, and structured data support. It automates the generation of essential meta tags, incorporates social sharing capabilities, and adds [JSON-LD](https://json-ld.org/) structured data to elevate user engagement and improve your documentation's visibility on the web.
48
+ Welcome to the MkDocs Ultralytics Plugin! 📄 This powerful tool enhances your [MkDocs](https://www.mkdocs.org/), [Zensical](https://zensical.org/), or any static site documentation with advanced Search Engine Optimization (SEO) features, interactive social elements, and structured data support. It automates the generation of essential meta tags, incorporates social sharing capabilities, and adds [JSON-LD](https://json-ld.org/) structured data to elevate user engagement and improve your documentation's visibility on the web.
49
49
 
50
50
  **Two modes available:**
51
51
 
@@ -64,12 +64,13 @@ This tool seamlessly integrates valuable features into your documentation site:
64
64
 
65
65
  - **Meta Tag Generation**: Automatically creates meta description and image tags using the first paragraph and image found on each page, crucial for SEO and social previews.
66
66
  - **Keyword Customization**: Allows you to define specific meta keywords directly within your Markdown front matter for targeted SEO.
67
- - **Social Media Optimization**: Generates [Open Graph](https://ogp.me/) and [Twitter Card](https://developer.x.com/en/docs/x-for-websites/cards/overview/summary-card-with-large-image) meta tags to ensure your content looks great when shared on social platforms.
67
+ - **Social Media Optimization**: Generates [Open Graph](https://ogp.me/) and [Twitter Card](https://docs.x.com/overview) meta tags to ensure your content looks great when shared on social platforms.
68
68
  - **Simple Sharing**: Inserts convenient share buttons for Twitter and LinkedIn at the end of your content, encouraging readers to share.
69
69
  - **Git Insights**: Gathers and displays [Git](https://git-scm.com/) commit information, including update dates and authors, directly within the page footer for transparency.
70
70
  - **JSON-LD Support**: Adds structured data in JSON-LD format, helping search engines understand your content better and potentially enabling rich results.
71
71
  - **FAQ Parsing**: Automatically parses FAQ sections (if present) and includes them in the structured data for enhanced search visibility.
72
72
  - **Copy for LLM**: Adds a button to copy page content in Markdown format, optimized for sharing with AI assistants.
73
+ - **LLMs.txt Generation**: Generates an `llms.txt` index after builds for LLM-friendly site discovery.
73
74
  - **Customizable Styling**: Includes optional inline CSS to maintain consistent styling across your documentation, aligning with themes like [MkDocs Material](https://squidfunk.github.io/mkdocs-material/).
74
75
 
75
76
  ## 🛠️ Installation
@@ -177,6 +178,7 @@ Both modes support the same configuration options:
177
178
  | `add_json_ld` | bool | `False` | Add JSON-LD structured data |
178
179
  | `add_css` | bool | `True` | Include inline CSS styles |
179
180
  | `add_copy_llm` | bool | `True` | Add "Copy for LLM" button |
181
+ | `add_llms_txt` | bool | `True` | Generate an `llms.txt` index |
180
182
 
181
183
  ## 🧩 How It Works
182
184
 
@@ -244,7 +246,7 @@ Please see our [Contributing Guide](https://docs.ultralytics.com/help/contributi
244
246
 
245
247
  Ultralytics provides two licensing options:
246
248
 
247
- - **AGPL-3.0 License**: Ideal for students, researchers, and enthusiasts, this [OSI-approved](https://opensource.org/license/agpl-v3) license promotes open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/mkdocs/blob/main/LICENSE) file for details.
249
+ - **AGPL-3.0 License**: Ideal for students, researchers, and enthusiasts, this [OSI-approved](https://opensource.org/license/agpl-3.0) license promotes open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/mkdocs/blob/main/LICENSE) file for details.
248
250
  - **Enterprise License**: Designed for commercial applications, this license allows seamless integration of Ultralytics software into commercial products, bypassing AGPL-3.0 requirements. Visit [Ultralytics Licensing](https://www.ultralytics.com/license) for details.
249
251
 
250
252
  ## ✉️ Connect with Us
@@ -259,7 +261,7 @@ Encountered a bug or have an idea? Visit [GitHub Issues](https://github.com/ultr
259
261
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%" alt="space">
260
262
  <a href="https://twitter.com/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-twitter.png" width="3%" alt="Ultralytics Twitter"></a>
261
263
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%" alt="space">
262
- <a href="https://youtube.com/ultralytics?sub_confirmation=1"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-youtube.png" width="3%" alt="Ultralytics YouTube"></a>
264
+ <a href="https://www.youtube.com/ultralytics?sub_confirmation=1"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-youtube.png" width="3%" alt="Ultralytics YouTube"></a>
263
265
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%" alt="space">
264
266
  <a href="https://www.tiktok.com/@ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-tiktok.png" width="3%" alt="Ultralytics TikTok"></a>
265
267
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%" alt="space">
@@ -1,6 +1,6 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
- __version__ = "0.2.3"
3
+ __version__ = "0.2.5"
4
4
 
5
5
  from .main import MetaPlugin
6
6
  from .postprocess import postprocess_site
@@ -1,4 +1,5 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+ """MkDocs plugin entrypoint for Ultralytics documentation metadata."""
2
3
 
3
4
  from __future__ import annotations
4
5
 
@@ -9,6 +10,7 @@ from mkdocs.plugins import BasePlugin
9
10
 
10
11
  import plugin.processor as processor
11
12
  from plugin.processor import process_html
13
+ from plugin.utils import resolve_all_authors
12
14
 
13
15
 
14
16
  class MetaPlugin(BasePlugin):
@@ -27,9 +29,11 @@ class MetaPlugin(BasePlugin):
27
29
  ("add_json_ld", config_options.Type(bool, default=False)),
28
30
  ("add_css", config_options.Type(bool, default=True)),
29
31
  ("add_copy_llm", config_options.Type(bool, default=True)),
32
+ ("add_llms_txt", config_options.Type(bool, default=True)),
30
33
  )
31
34
 
32
35
  def __init__(self):
36
+ """Initialize cached repository metadata for page processing."""
33
37
  super().__init__()
34
38
  self.git_repo_url = None
35
39
  self.git_data = None
@@ -43,6 +47,12 @@ class MetaPlugin(BasePlugin):
43
47
  docs_dir = Path(config["docs_dir"])
44
48
  md_files = [str(p) for p in docs_dir.rglob("*.md")] if docs_dir.exists() else []
45
49
  self.git_repo_url, self.git_data = processor.build_git_map(md_files)
50
+ self.git_data = resolve_all_authors(
51
+ self.git_data,
52
+ default_author=self.config.get("default_author"),
53
+ repo_url=self.git_repo_url,
54
+ verbose=self.config.get("verbose", True),
55
+ )
46
56
  return config
47
57
 
48
58
  def on_post_page(self, output: str, page, config) -> str:
@@ -69,7 +79,6 @@ class MetaPlugin(BasePlugin):
69
79
  git_data=self.git_data,
70
80
  repo_url=self.git_repo_url,
71
81
  default_image=self.config["default_image"],
72
- default_author=self.config["default_author"],
73
82
  keywords=keywords,
74
83
  add_desc=self.config["add_desc"],
75
84
  add_image=self.config["add_image"],
@@ -84,3 +93,18 @@ class MetaPlugin(BasePlugin):
84
93
  if self.config["verbose"]:
85
94
  print(f"ERROR - mkdocs-ultralytics-plugin: Failed to process {page.file.src_path}: {e}")
86
95
  return output # Return original output on error
96
+
97
+ def on_post_build(self, config):
98
+ """Generate llms.txt after build completes. Added for mkdocs build compatibility. Not needed for zensical build."""
99
+ if not self.config.get("enabled", True) or not self.config.get("add_llms_txt", True):
100
+ return
101
+ from plugin.postprocess import generate_llms_txt
102
+
103
+ generate_llms_txt(
104
+ site_dir=Path(config["site_dir"]),
105
+ docs_dir=Path(config["docs_dir"]),
106
+ site_url=config.get("site_url", ""),
107
+ site_name=config.get("site_name"),
108
+ site_description=config.get("site_description"),
109
+ nav=config.get("nav"),
110
+ )
@@ -16,6 +16,7 @@ except ImportError:
16
16
 
17
17
  import plugin.processor as processor
18
18
  from plugin.processor import process_html
19
+ from plugin.utils import resolve_all_authors
19
20
 
20
21
  # Shared worker state for process pools (avoids re-pickling large read-only data per task)
21
22
  _WORKER_STATE: dict[str, Any] | None = None
@@ -37,7 +38,6 @@ def _process_file(html_file: Path) -> bool:
37
38
  _WORKER_STATE["repo_url"],
38
39
  site_url=_WORKER_STATE["site_url"],
39
40
  default_image=_WORKER_STATE["default_image"],
40
- default_author=_WORKER_STATE["default_author"],
41
41
  add_desc=_WORKER_STATE["add_desc"],
42
42
  add_image=_WORKER_STATE["add_image"],
43
43
  add_keywords=_WORKER_STATE["add_keywords"],
@@ -59,7 +59,6 @@ def process_html_file(
59
59
  repo_url: str | None,
60
60
  site_url: str = "",
61
61
  default_image: str | None = None,
62
- default_author: str | None = None,
63
62
  add_desc: bool = True,
64
63
  add_image: bool = True,
65
64
  add_keywords: bool = True,
@@ -114,7 +113,6 @@ def process_html_file(
114
113
  git_data=git_data,
115
114
  repo_url=repo_url,
116
115
  default_image=default_image,
117
- default_author=default_author,
118
116
  keywords=keywords,
119
117
  add_desc=add_desc,
120
118
  add_image=add_image,
@@ -136,6 +134,119 @@ def process_html_file(
136
134
  return False
137
135
 
138
136
 
137
+ def generate_llms_txt(
138
+ site_dir: Path,
139
+ docs_dir: Path,
140
+ site_url: str,
141
+ site_name: str | None = None,
142
+ site_description: str | None = None,
143
+ nav: list | None = None,
144
+ ) -> None:
145
+ """Generate llms.txt file for LLM consumption."""
146
+ import yaml
147
+
148
+ # Fallback to reading mkdocs.yml if config values not provided (standalone postprocess mode)
149
+ if site_name is None or nav is None:
150
+
151
+ class _Loader(yaml.SafeLoader):
152
+ pass
153
+
154
+ _Loader.add_multi_constructor("", lambda loader, suffix, node: None)
155
+
156
+ mkdocs_yml = site_dir.parent / "mkdocs.yml"
157
+ if mkdocs_yml.exists():
158
+ config = yaml.load(mkdocs_yml.read_text(), Loader=_Loader) or {}
159
+ site_name = site_name or config.get("site_name", "Documentation")
160
+ site_description = site_description or config.get("site_description", "")
161
+ nav = nav or config.get("nav")
162
+ site_name = site_name or "Documentation"
163
+ site_description = site_description or ""
164
+
165
+ lines = [f"# {site_name}", f"> {site_description}"]
166
+ seen_urls: set[str] = set()
167
+ site_url = site_url.rstrip("/")
168
+
169
+ def get_description(md_path: Path) -> str:
170
+ """Extract description from markdown frontmatter."""
171
+ try:
172
+ content = md_path.read_text()
173
+ if content.startswith("---"):
174
+ end = content.find("\n---\n", 3)
175
+ if end != -1:
176
+ fm = yaml.safe_load(content[4:end]) or {}
177
+ return fm.get("description", "")
178
+ except Exception:
179
+ pass
180
+ return ""
181
+
182
+ def md_to_url(md_path: str) -> str:
183
+ """Convert markdown path to HTML URL."""
184
+ url = md_path.replace(".md", "/").replace("/index/", "/")
185
+ return f"{site_url}/{url}" if url != "index/" else f"{site_url}/"
186
+
187
+ if nav:
188
+
189
+ def process_items(items, indent=0):
190
+ """Recursively process nav items with indentation (Vercel-style)."""
191
+ prefix = " " * indent + "- "
192
+ for item in items:
193
+ if isinstance(item, str):
194
+ md = docs_dir / item
195
+ if md.exists():
196
+ url = md_to_url(item)
197
+ if url in seen_urls:
198
+ continue
199
+ seen_urls.add(url)
200
+ desc = get_description(md)
201
+ # Use parent dir name for index.md, else filename
202
+ title = md.parent.name if md.stem == "index" else md.stem
203
+ title = title.replace("-", " ").replace("_", " ").title()
204
+ desc_part = f": {desc}" if desc else ""
205
+ lines.append(f"{prefix}[{title}]({url}){desc_part}")
206
+ elif isinstance(item, dict):
207
+ for k, v in item.items():
208
+ if isinstance(v, str):
209
+ md = docs_dir / v
210
+ if md.exists():
211
+ url = md_to_url(v)
212
+ if url in seen_urls:
213
+ continue
214
+ seen_urls.add(url)
215
+ desc = get_description(md)
216
+ desc_part = f": {desc}" if desc else ""
217
+ lines.append(f"{prefix}[{k}]({url}){desc_part}")
218
+ elif isinstance(v, list):
219
+ # Nested section - plain text header, then recurse
220
+ lines.append(f"{prefix}{k}")
221
+ process_items(v, indent + 1)
222
+
223
+ # Top-level nav items become ## sections
224
+ for item in nav:
225
+ if isinstance(item, str):
226
+ process_items([item], indent=0)
227
+ elif isinstance(item, dict):
228
+ for k, v in item.items():
229
+ if isinstance(v, list):
230
+ lines.extend(["", f"## {k}"])
231
+ process_items(v, indent=0)
232
+ else:
233
+ process_items([{k: v}], indent=0)
234
+ else:
235
+ for md in sorted(docs_dir.rglob("*.md")):
236
+ rel = md.relative_to(docs_dir).as_posix()
237
+ url = md_to_url(rel)
238
+ if url in seen_urls:
239
+ continue
240
+ seen_urls.add(url)
241
+ desc = get_description(md)
242
+ title = md.stem.replace("-", " ").replace("_", " ").title()
243
+ desc_part = f": {desc}" if desc else ""
244
+ lines.append(f"- [{title}]({url}){desc_part}")
245
+
246
+ (site_dir / "llms.txt").write_text("\n".join(lines))
247
+ print("Generated llms.txt")
248
+
249
+
139
250
  def postprocess_site(
140
251
  site_dir: str | Path = "site",
141
252
  docs_dir: str | Path = "docs",
@@ -150,6 +261,7 @@ def postprocess_site(
150
261
  add_json_ld: bool = False,
151
262
  add_css: bool = True,
152
263
  add_copy_llm: bool = True,
264
+ add_llms_txt: bool = True,
153
265
  verbose: bool = True,
154
266
  use_processes: bool = True,
155
267
  workers: int | None = None,
@@ -184,6 +296,9 @@ def postprocess_site(
184
296
  git_data = None
185
297
  if (add_authors or add_json_ld) and md_index:
186
298
  repo_url, git_data = processor.build_git_map(list(md_index.values()))
299
+ # Resolve all authors ONCE in main process before spawning workers
300
+ # This prevents race conditions when workers try to write to the cache file
301
+ git_data = resolve_all_authors(git_data, default_author=default_author, repo_url=repo_url, verbose=verbose)
187
302
 
188
303
  progress = TQDM(total=len(html_files), desc="Postprocessing", unit="file", disable=not verbose) if TQDM else None
189
304
  # Enable logging only for the synchronous path; pools run without per-task log_fn to remain pickle-safe.
@@ -196,7 +311,6 @@ def postprocess_site(
196
311
  repo_url=repo_url,
197
312
  site_url=site_url,
198
313
  default_image=default_image,
199
- default_author=default_author,
200
314
  add_desc=add_desc,
201
315
  add_image=add_image,
202
316
  add_keywords=add_keywords,
@@ -250,6 +364,9 @@ def postprocess_site(
250
364
 
251
365
  print(f"✅ Postprocessing complete: {processed}/{len(html_files)} files processed")
252
366
 
367
+ if add_llms_txt:
368
+ generate_llms_txt(site_dir, docs_dir, site_url)
369
+
253
370
 
254
371
  if __name__ == "__main__":
255
372
  postprocess_site()
@@ -13,11 +13,7 @@ from urllib.parse import quote
13
13
 
14
14
  from bs4 import BeautifulSoup
15
15
 
16
- from plugin.utils import (
17
- calculate_time_difference,
18
- get_github_usernames_from_file,
19
- get_youtube_video_ids,
20
- )
16
+ from plugin.utils import calculate_time_difference, get_youtube_video_ids
21
17
 
22
18
  today = datetime.now()
23
19
  DEFAULT_CREATION_DATE = (today - timedelta(days=365)).strftime("%Y-%m-%d %H:%M:%S +0000")
@@ -30,11 +26,9 @@ CHECK_ICON = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path
30
26
  def get_git_info(
31
27
  file_path: str,
32
28
  add_authors: bool = True,
33
- default_author: str | None = None,
34
29
  git_data: dict[str, dict[str, Any]] | None = None,
35
- repo_url: str | None = None,
36
30
  ) -> dict[str, Any]:
37
- """Retrieve git information (dates + optional authors) from precomputed git data."""
31
+ """Retrieve git information (dates + pre-resolved authors) from precomputed git data."""
38
32
  file_path = str(Path(file_path).resolve())
39
33
  git_info = {
40
34
  "creation_date": DEFAULT_CREATION_DATE,
@@ -45,29 +39,12 @@ def get_git_info(
45
39
  return git_info
46
40
 
47
41
  cached = git_data[file_path]
48
- git_info.update(
49
- {
50
- "creation_date": cached.get("creation_date", DEFAULT_CREATION_DATE),
51
- "last_modified_date": cached.get("last_modified_date", DEFAULT_MODIFIED_DATE),
52
- }
53
- )
54
-
55
- if add_authors and cached.get("emails"):
56
- git_info["authors"] = sorted(
57
- [
58
- (
59
- author,
60
- info["url"],
61
- info["changes"],
62
- info["avatar"],
63
- )
64
- for author, info in get_github_usernames_from_file(
65
- file_path, default_user=default_author, emails=cached["emails"], repo_url=repo_url
66
- ).items()
67
- ],
68
- key=lambda x: x[2],
69
- reverse=True,
70
- )
42
+ git_info["creation_date"] = cached.get("creation_date", DEFAULT_CREATION_DATE)
43
+ git_info["last_modified_date"] = cached.get("last_modified_date", DEFAULT_MODIFIED_DATE)
44
+
45
+ # Authors are pre-resolved by resolve_all_authors() in the main process
46
+ if add_authors and "authors" in cached:
47
+ git_info["authors"] = cached["authors"]
71
48
 
72
49
  return git_info
73
50
 
@@ -159,7 +136,7 @@ def build_git_map(file_paths: list[str] | list[Path]) -> tuple[str | None, dict[
159
136
  str(repo_root),
160
137
  "log",
161
138
  "--name-only",
162
- "--pretty=format:%ad\t%ae",
139
+ "--pretty=format:%H\t%ad\t%ae",
163
140
  "--date=format:%Y-%m-%d %H:%M:%S %z",
164
141
  "--",
165
142
  *[str(p) for p in rel_paths],
@@ -170,17 +147,18 @@ def build_git_map(file_paths: list[str] | list[Path]) -> tuple[str | None, dict[
170
147
  except subprocess.CalledProcessError:
171
148
  return repo_url, git_data
172
149
 
150
+ current_commit = None
173
151
  current_date = None
174
152
  current_email = None
175
153
  for line in output:
176
154
  if not line.strip():
177
155
  continue
178
156
  parts = line.split("\t")
179
- if len(parts) == 2:
180
- current_date, current_email = parts
157
+ if len(parts) == 3:
158
+ current_commit, current_date, current_email = parts
181
159
  continue
182
160
 
183
- if current_date and current_email:
161
+ if current_commit and current_date and current_email:
184
162
  abs_path = (repo_root / line.strip()).resolve()
185
163
  key = str(abs_path)
186
164
  entry = git_data.setdefault(
@@ -189,11 +167,13 @@ def build_git_map(file_paths: list[str] | list[Path]) -> tuple[str | None, dict[
189
167
  "creation_date": current_date,
190
168
  "last_modified_date": current_date,
191
169
  "emails": {},
170
+ "commits": {},
192
171
  },
193
172
  )
194
173
  entry.setdefault("last_modified_date", current_date)
195
174
  entry["creation_date"] = current_date
196
175
  entry["emails"][current_email] = entry["emails"].get(current_email, 0) + 1
176
+ entry["commits"].setdefault(current_email, current_commit)
197
177
 
198
178
  return repo_url, git_data
199
179
 
@@ -309,7 +289,6 @@ def process_html(
309
289
  git_data: dict[str, dict[str, Any]] | None = None,
310
290
  repo_url: str | None = None,
311
291
  default_image: str | None = None,
312
- default_author: str | None = None,
313
292
  keywords: str | None = None,
314
293
  add_desc: bool = True,
315
294
  add_image: bool = True,
@@ -460,21 +439,22 @@ def process_html(
460
439
  let rawUrl = editBtn.href.replace('github.com', 'raw.githubusercontent.com');
461
440
  rawUrl = rawUrl.replace('/blob/', '/').replace('/tree/', '/');
462
441
 
463
- try {{
442
+ async function getContent() {{
464
443
  const response = await fetch(rawUrl);
465
444
  let markdown = await response.text();
466
-
467
445
  if (markdown.startsWith('---')) {{
468
446
  const frontMatterEnd = markdown.indexOf('\\n---\\n', 3);
469
- if (frontMatterEnd !== -1) {{
470
- markdown = markdown.substring(frontMatterEnd + 5).trim();
471
- }}
447
+ if (frontMatterEnd !== -1) markdown = markdown.substring(frontMatterEnd + 5).trim();
472
448
  }}
473
-
474
449
  const title = document.querySelector('h1')?.textContent || document.title;
475
- const content = `# ${{title}}\\n\\nSource: ${{window.location.href}}\\n\\n---\\n\\n${{markdown}}`;
450
+ return `# ${{title}}\\n\\nSource: ${{window.location.href}}\\n\\n---\\n\\n${{markdown}}`;
451
+ }}
476
452
 
477
- await navigator.clipboard.writeText(content);
453
+ try {{
454
+ const clipboardItem = new ClipboardItem({{
455
+ 'text/plain': getContent().then(text => new Blob([text], {{ type: 'text/plain' }}))
456
+ }});
457
+ await navigator.clipboard.write([clipboardItem]);
478
458
  button.innerHTML = checkIcon + ' Copied!';
479
459
  setTimeout(() => {{ button.innerHTML = originalHTML; }}, 2000);
480
460
  }} catch (err) {{
@@ -493,9 +473,7 @@ def process_html(
493
473
  needs_git = (add_authors or add_json_ld) and src_path
494
474
 
495
475
  if needs_git:
496
- git_info = get_git_info(
497
- src_path, add_authors=add_authors, default_author=default_author, git_data=git_data, repo_url=repo_url
498
- )
476
+ git_info = get_git_info(src_path, add_authors=add_authors, git_data=git_data)
499
477
 
500
478
  # Only render git footer if we have real git history (not placeholder defaults)
501
479
  has_real_git_data = (
@@ -0,0 +1,262 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+ from typing import Any
9
+ from urllib.parse import urlparse
10
+
11
+ import requests
12
+ import yaml
13
+
14
+ WARNING = "WARNING (mkdocs_ultralytics_plugin):"
15
+ TIMEOUT = 10 # seconds for network requests
16
+ DEFAULT_AVATAR_URL = "https://github.com/github.png"
17
+ _default_avatar_cache: str | None = None
18
+
19
+
20
+ def get_default_avatar() -> str:
21
+ """Get the default avatar URL, lazily fetching the resolved URL on first call."""
22
+ global _default_avatar_cache
23
+ if _default_avatar_cache is None:
24
+ try:
25
+ _default_avatar_cache = requests.head(DEFAULT_AVATAR_URL, allow_redirects=True, timeout=TIMEOUT).url
26
+ except Exception:
27
+ _default_avatar_cache = DEFAULT_AVATAR_URL # fallback to original URL
28
+ return _default_avatar_cache
29
+
30
+
31
+ def calculate_time_difference(date_string: str) -> tuple[str, str]:
32
+ """Calculate the time difference between a given date and the current date in a human-readable format.
33
+
34
+ Args:
35
+ date_string (str): Date and time string in the format "%Y-%m-%d %H:%M:%S %z".
36
+
37
+ Returns:
38
+ difference (str): Time difference in days, months, or years (e.g., "5 days", "2 months", "1 year").
39
+ pretty_date (str): Given date formatted as "Month Day, Year" (e.g., "January 01, 2023").
40
+
41
+ Examples:
42
+ >>> calculate_time_difference("2023-01-01 00:00:00 +0000")
43
+ ("5 months", "January 01, 2023")
44
+ """
45
+ date = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S %z")
46
+ pretty_date = date.strftime("%B %d, %Y")
47
+ now = datetime.now(date.tzinfo)
48
+ diff = now - date
49
+ days = diff.days
50
+
51
+ if days < 30:
52
+ difference = f"{days} day{'s' if days != 1 else ''}"
53
+ elif days < 365:
54
+ months = days // 30
55
+ difference = f"{months} month{'s' if months != 1 else ''}"
56
+ else:
57
+ years = days // 365
58
+ difference = f"{years} year{'s' if years != 1 else ''}"
59
+ return difference, pretty_date
60
+
61
+
62
+ def get_youtube_video_ids(soup) -> list[str]:
63
+ """Extract YouTube video IDs from iframe elements present in the provided BeautifulSoup object.
64
+
65
+ Args:
66
+ soup (BeautifulSoup): A BeautifulSoup object containing the HTML content.
67
+
68
+ Returns:
69
+ (List[str]): A list containing YouTube video IDs extracted from the HTML content.
70
+ """
71
+ youtube_ids = []
72
+ iframes = soup.find_all("iframe", src=True)
73
+ for iframe in iframes:
74
+ if match := re.search(r"youtube\.com/embed/([a-zA-Z0-9_-]+)", iframe["src"]):
75
+ youtube_ids.append(match[1])
76
+ return youtube_ids
77
+
78
+
79
+ def _get_cache_file() -> Path:
80
+ """Get the path to the GitHub author cache file."""
81
+ return Path("docs" if Path("docs").is_dir() else "") / "mkdocs_github_authors.yaml"
82
+
83
+
84
+ def load_author_cache() -> dict[str, dict[str, str | None]]:
85
+ """Load the GitHub author cache from disk."""
86
+ cache_file = _get_cache_file()
87
+ try:
88
+ return yaml.safe_load(cache_file.read_text()) or {} if cache_file.is_file() else {}
89
+ except Exception:
90
+ return {}
91
+
92
+
93
+ def save_author_cache(cache: dict[str, dict[str, str | None]]) -> None:
94
+ """Save the GitHub author cache to disk."""
95
+ try:
96
+ _get_cache_file().write_text(yaml.safe_dump(cache))
97
+ except Exception as e:
98
+ print(f"{WARNING} Failed to save author cache: {e}")
99
+
100
+
101
+ def _github_repo_path(repo_url: str | None) -> str | None:
102
+ """Return the owner/repo path for a GitHub repository URL."""
103
+ if not repo_url:
104
+ return None
105
+ parsed = urlparse(repo_url)
106
+ if parsed.hostname != "github.com":
107
+ return None
108
+ path = parsed.path.strip("/")
109
+ return path[:-4] if path.endswith(".git") else path or None
110
+
111
+
112
+ def resolve_github_user(
113
+ email: str,
114
+ cache: dict[str, dict[str, str | None]],
115
+ repo_url: str | None = None,
116
+ commit_sha: str | None = None,
117
+ verbose: bool = True,
118
+ ) -> dict[str, str | None]:
119
+ """Resolve a single email to GitHub username and avatar, updating cache in-place.
120
+
121
+ Args:
122
+ email (str): The email address to resolve.
123
+ cache (dict): The author cache dict (modified in-place if new entry added).
124
+ repo_url (str, optional): GitHub repository URL used for commit API fallback.
125
+ commit_sha (str, optional): Commit SHA authored by the email.
126
+ verbose (bool): Whether to print API call info.
127
+
128
+ Returns:
129
+ dict with 'username' and 'avatar' keys (values may be None if not found).
130
+ """
131
+ if not email or not email.strip():
132
+ return {"username": None, "avatar": None}
133
+
134
+ # Return complete cached results immediately. Incomplete cached entries may be refreshed from commit metadata.
135
+ if email in cache and cache[email].get("username") and cache[email].get("avatar"):
136
+ return cache[email]
137
+
138
+ # Parse username directly from GitHub noreply emails
139
+ if email.endswith("@users.noreply.github.com"):
140
+ username = email.split("+")[-1].split("@")[0]
141
+ try:
142
+ avatar = requests.head(f"https://github.com/{username}.png", allow_redirects=True, timeout=TIMEOUT).url
143
+ except Exception:
144
+ avatar = None
145
+ cache[email] = {"username": username, "avatar": avatar}
146
+ return cache[email]
147
+
148
+ # Query the commit API when git history provides a commit for this email. This resolves authors whose commit email
149
+ # is linked to a GitHub account but hidden from user search.
150
+ if repo_path := _github_repo_path(repo_url):
151
+ if commit_sha:
152
+ try:
153
+ response = requests.get(
154
+ f"https://api.github.com/repos/{repo_path}/commits/{commit_sha}", timeout=TIMEOUT
155
+ )
156
+ if response.status_code == 200:
157
+ data = response.json()
158
+ author = data.get("author") or {}
159
+ if author.get("login") and author.get("avatar_url"):
160
+ cache[email] = {"username": author["login"], "avatar": author["avatar_url"]}
161
+ return cache[email]
162
+ except Exception:
163
+ pass
164
+
165
+ # Query GitHub REST API
166
+ if verbose:
167
+ print(f"Running GitHub REST API for author {email}")
168
+ try:
169
+ response = requests.get(
170
+ f"https://api.github.com/search/users?q={email}+in:email&sort=joined&order=asc", timeout=TIMEOUT
171
+ )
172
+ if response.status_code == 200:
173
+ data = response.json()
174
+ if data.get("total_count", 0) > 0:
175
+ username = data["items"][0]["login"]
176
+ avatar = requests.head(data["items"][0]["avatar_url"], allow_redirects=True, timeout=TIMEOUT).url
177
+ cache[email] = {"username": username, "avatar": avatar}
178
+ return cache[email]
179
+ except Exception:
180
+ pass
181
+
182
+ if verbose:
183
+ print(f"{WARNING} No username found for {email}")
184
+ cache[email] = {"username": None, "avatar": None}
185
+ return cache[email]
186
+
187
+
188
+ def resolve_all_authors(
189
+ git_data: dict[str, dict[str, Any]],
190
+ default_author: str | None = None,
191
+ repo_url: str | None = None,
192
+ verbose: bool = True,
193
+ ) -> dict[str, dict[str, Any]]:
194
+ """Pre-resolve all unique emails from git_data to GitHub usernames.
195
+
196
+ This should be called ONCE in the main process before spawning workers. It collects all unique emails, resolves
197
+ them, saves the cache, and returns git_data with 'authors' pre-populated for each file.
198
+
199
+ Args:
200
+ git_data (dict): The git metadata dict from build_git_map().
201
+ default_author (str, optional): Default author email if no git info.
202
+ repo_url (str, optional): Repository URL for fallback links.
203
+ verbose (bool): Whether to print progress info.
204
+
205
+ Returns:
206
+ dict: Updated git_data with 'authors' list added to each entry.
207
+ """
208
+ if not git_data:
209
+ return git_data
210
+
211
+ # Collect all unique emails across all files, with one representative commit SHA per email.
212
+ all_emails: set[str] = set()
213
+ commits: dict[str, str] = {}
214
+ for entry in git_data.values():
215
+ all_emails.update(entry.get("emails", {}).keys())
216
+ for email, commit in entry.get("commits", {}).items():
217
+ commits.setdefault(email, commit)
218
+ if default_author:
219
+ all_emails.add(default_author)
220
+ all_emails.discard("")
221
+
222
+ if not all_emails:
223
+ return git_data
224
+
225
+ # Load cache, resolve all emails, save cache (single disk write)
226
+ cache = load_author_cache()
227
+ cache_modified = False
228
+
229
+ for email in sorted(all_emails):
230
+ cached = cache.get(email, {})
231
+ if email not in cache or (commits.get(email) and not (cached.get("username") and cached.get("avatar"))):
232
+ resolve_github_user(email, cache, repo_url=repo_url, commit_sha=commits.get(email), verbose=verbose)
233
+ cache_modified = True
234
+
235
+ if cache_modified:
236
+ save_author_cache(cache)
237
+
238
+ # Build authors list for each file entry
239
+ github_repo_url = repo_url or "https://github.com/ultralytics/ultralytics"
240
+
241
+ for file_path, entry in git_data.items():
242
+ emails = entry.get("emails", {})
243
+ if not emails and default_author:
244
+ emails = {default_author: 1}
245
+
246
+ authors = []
247
+ for email, changes in emails.items():
248
+ email = email.strip() if email else ""
249
+ if not email:
250
+ email = default_author or ""
251
+ if not email:
252
+ continue
253
+ info = cache.get(email, {"username": None, "avatar": None})
254
+ username = info.get("username")
255
+ avatar = info.get("avatar") or get_default_avatar()
256
+ user_url = f"https://github.com/{username}" if username else github_repo_url
257
+ authors.append((username or email, user_url, changes, avatar))
258
+
259
+ # Sort by number of changes (descending)
260
+ entry["authors"] = sorted(authors, key=lambda x: x[2], reverse=True)
261
+
262
+ return git_data
@@ -1,236 +0,0 @@
1
- # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
-
3
- from __future__ import annotations
4
-
5
- import re
6
- import threading
7
- from datetime import datetime
8
- from pathlib import Path
9
- from typing import Any
10
-
11
- import requests
12
- import yaml # YAML is used for its readability and consistency with MkDocs ecosystem
13
- from bs4 import BeautifulSoup
14
-
15
- WARNING = "WARNING (mkdocs_ultralytics_plugin):"
16
- DEFAULT_AVATAR = requests.head("https://github.com/github.png", allow_redirects=True).url
17
-
18
- # Shared, thread-safe cache to avoid duplicate API lookups and YAML thrash when running in parallel
19
- _AUTHOR_CACHE: dict[str, dict[str, str | None]] | None = None
20
- _AUTHOR_CACHE_MTIME: float | None = None
21
- _CACHE_LOCK = threading.Lock()
22
-
23
-
24
- def calculate_time_difference(date_string: str) -> tuple[str, str]:
25
- """Calculate the time difference between a given date and the current date in a human-readable format.
26
-
27
- Args:
28
- date_string (str): Date and time string in the format "%Y-%m-%d %H:%M:%S %z".
29
-
30
- Returns:
31
- difference (str): Time difference in days, months, or years (e.g., "5 days", "2 months", "1 year").
32
- pretty_date (str): Given date formatted as "Month Day, Year" (e.g., "January 01, 2023").
33
-
34
- Examples:
35
- >>> calculate_time_difference("2023-01-01 00:00:00 +0000")
36
- ("5 months", "January 01, 2023")
37
- """
38
- date = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S %z")
39
- pretty_date = date.strftime("%B %d, %Y")
40
- now = datetime.now(date.tzinfo)
41
- diff = now - date
42
- days = diff.days
43
-
44
- if days < 30:
45
- difference = f"{days} day{'s' if days != 1 else ''}"
46
- elif days < 365:
47
- months = days // 30
48
- difference = f"{months} month{'s' if months != 1 else ''}"
49
- else:
50
- years = days // 365
51
- difference = f"{years} year{'s' if years != 1 else ''}"
52
- return difference, pretty_date
53
-
54
-
55
- def get_youtube_video_ids(soup: BeautifulSoup) -> list[str]:
56
- """Extract YouTube video IDs from iframe elements present in the provided BeautifulSoup object.
57
-
58
- Args:
59
- soup (BeautifulSoup): A BeautifulSoup object containing the HTML content from which YouTube video IDs need to be
60
- extracted.
61
-
62
- Returns:
63
- (List[str]): A list containing YouTube video IDs extracted from the HTML content.
64
-
65
- Examples:
66
- >>> from bs4 import BeautifulSoup
67
- >>> html_content = '''
68
- ... <html>
69
- ... <body>
70
- ... <iframe src="https://www.youtube.com/embed/example_id1"></iframe>
71
- ... <iframe src="https://www.youtube.com/embed/example_id2"></iframe>
72
- ... </body>
73
- ... </html>
74
- ... '''
75
- >>> soup = BeautifulSoup(html_content, 'html.parser')
76
- >>> video_ids = get_youtube_video_ids(soup)
77
- >>> print(video_ids)
78
- ['example_id1', 'example_id2']
79
- """
80
- youtube_ids = []
81
- iframes = soup.find_all("iframe", src=True)
82
- for iframe in iframes:
83
- if match := re.search(r"youtube\.com/embed/([a-zA-Z0-9_-]+)", iframe["src"]):
84
- youtube_ids.append(match[1])
85
- return youtube_ids
86
-
87
-
88
- def get_github_username_from_email(
89
- email: str, cache: dict, file_path: str = "", verbose: bool = True
90
- ) -> tuple[str | None, str | None]:
91
- """Retrieve the GitHub username and avatar URL associated with the given email address.
92
-
93
- Args:
94
- email (str): The email address to retrieve the GitHub username for.
95
- cache (Dict): A dictionary containing cached email-GitHub username mappings.
96
- file_path (str, optional): Name of the file the user authored.
97
- verbose (bool, optional): Whether to print verbose output.
98
-
99
- Returns:
100
- username (str | None): GitHub username if found, None otherwise.
101
- avatar (str | None): Avatar URL if found, None otherwise.
102
-
103
- Notes:
104
- If the email ends with "@users.noreply.github.com", the function will parse the username directly from the
105
- email address. Uses the GitHub REST API to query the username if it's not found in the local cache. Ensure
106
- you comply with GitHub's rate limits and authentication requirements when querying their API.
107
- """
108
- # First, check if the email exists in the local cache file
109
- with _CACHE_LOCK:
110
- if email in cache:
111
- return cache[email].get("username"), cache[email].get("avatar")
112
- if not email.strip():
113
- if verbose:
114
- print(f"{WARNING} No author found for {file_path}")
115
- return None, None
116
-
117
- # If the email ends with "@users.noreply.github.com", parse the username directly
118
- if email.endswith("@users.noreply.github.com"):
119
- username = email.split("+")[-1].split("@")[0]
120
- avatar = f"https://github.com/{username}.png"
121
- avatar_url = requests.head(avatar, allow_redirects=True).url
122
- with _CACHE_LOCK:
123
- cache[email] = {
124
- "username": username,
125
- "avatar": avatar_url,
126
- }
127
- return username, avatar
128
-
129
- # Fallback to GitHub REST API when not cached
130
- url = f"https://api.github.com/search/users?q={email}+in:email&sort=joined&order=asc"
131
- if verbose:
132
- print(f"Running GitHub REST API for author {email}")
133
- response = requests.get(url)
134
- if response.status_code == 200:
135
- data = response.json()
136
- if data["total_count"] > 0:
137
- username = data["items"][0]["login"]
138
- avatar = data["items"][0]["avatar_url"] # avatar_url key is correct here
139
- avatar_url = requests.head(avatar, allow_redirects=True).url
140
- with _CACHE_LOCK:
141
- cache[email] = {
142
- "username": username,
143
- "avatar": avatar_url,
144
- }
145
- return username, avatar
146
-
147
- if verbose:
148
- print(f"{WARNING} No username found for {email}")
149
- with _CACHE_LOCK:
150
- cache[email] = {"username": None, "avatar": None}
151
- return None, None
152
-
153
-
154
- def get_github_usernames_from_file(
155
- file_path: str,
156
- default_user: str | None = None,
157
- emails: dict[str, int] | None = None,
158
- repo_url: str | None = None,
159
- force_reload: bool = False,
160
- ) -> dict[str, dict[str, Any]]:
161
- """Fetch GitHub usernames associated with a file using provided Git email counts.
162
-
163
- Args:
164
- file_path (str): The path to the file for which GitHub usernames are to be retrieved.
165
- default_user (str, optional): Default GitHub user email to use if no authors found.
166
-
167
- Returns:
168
- (Dict[str, Dict[str, any]]): A dictionary where keys are GitHub usernames or emails (if username is not
169
- found) and values are dictionaries containing:
170
- - 'email' (str): The email address of the author.
171
- - 'url' (str): The GitHub profile URL of the author.
172
- - 'changes' (int): The number of changes (commits) made by the author.
173
- - 'avatar' (str): The URL of the author's GitHub avatar.
174
-
175
- Examples:
176
- >>> print(get_github_usernames_from_file('mkdocs.yml', emails={'user@example.com': 2}))
177
- {'username1': {'email': 'user@example.com', 'url': 'https://github.com/username1', 'changes': 2, 'avatar': '...'}}
178
- """
179
- if emails is None:
180
- emails = {}
181
- else:
182
- emails = dict(emails) # shallow copy to avoid mutating caller data
183
-
184
- # If no git info found but default_user provided, use default_user
185
- if not emails and default_user:
186
- emails[default_user] = 1
187
-
188
- # Load the local cache of GitHub usernames once per process (thread-safe, reload if changed)
189
- local_cache_file = Path("docs" if Path("docs").is_dir() else "") / "mkdocs_github_authors.yaml"
190
- global _AUTHOR_CACHE, _AUTHOR_CACHE_MTIME
191
- with _CACHE_LOCK:
192
- current_mtime = local_cache_file.stat().st_mtime if local_cache_file.is_file() else None
193
- needs_reload = (
194
- force_reload
195
- or _AUTHOR_CACHE is None
196
- or (_AUTHOR_CACHE_MTIME is not None and current_mtime is not None and _AUTHOR_CACHE_MTIME != current_mtime)
197
- )
198
- if needs_reload:
199
- if local_cache_file.is_file():
200
- with local_cache_file.open("r") as f:
201
- _AUTHOR_CACHE = yaml.safe_load(f) or {}
202
- _AUTHOR_CACHE_MTIME = local_cache_file.stat().st_mtime
203
- else:
204
- _AUTHOR_CACHE = {}
205
- _AUTHOR_CACHE_MTIME = None
206
- cache = _AUTHOR_CACHE
207
-
208
- github_repo_url = repo_url or "https://github.com/ultralytics/ultralytics"
209
-
210
- info = {}
211
- cache_updated = False
212
- for email, changes in emails.items():
213
- if not email and default_user:
214
- email = default_user
215
- was_cached = email in cache
216
- prev_entry = cache.get(email)
217
- username, avatar = get_github_username_from_email(email, cache, file_path)
218
- # If we can't determine the user URL, revert to the GitHub file URL
219
- user_url = f"https://github.com/{username}" if username else github_repo_url
220
- info[username or email] = {
221
- "email": email,
222
- "url": user_url,
223
- "changes": changes,
224
- "avatar": avatar or DEFAULT_AVATAR,
225
- }
226
- cache_updated = cache_updated or (email in cache and not was_cached) or cache.get(email) != prev_entry
227
-
228
- # Save the local cache of GitHub usernames and avatar URLs if updated
229
- if cache_updated:
230
- with _CACHE_LOCK:
231
- _AUTHOR_CACHE = cache
232
- with local_cache_file.open("w") as f:
233
- yaml.safe_dump(cache, f)
234
- _AUTHOR_CACHE_MTIME = local_cache_file.stat().st_mtime
235
-
236
- return info