mkdocs-ultralytics-plugin 0.2.1__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (17) hide show
  1. {mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/PKG-INFO +1 -1
  2. {mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/PKG-INFO +1 -1
  3. {mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/plugin/__init__.py +1 -1
  4. {mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/plugin/main.py +21 -0
  5. mkdocs_ultralytics_plugin-0.2.3/plugin/postprocess.py +255 -0
  6. {mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/plugin/processor.py +126 -28
  7. {mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/plugin/utils.py +71 -69
  8. mkdocs_ultralytics_plugin-0.2.1/plugin/postprocess.py +0 -157
  9. {mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/LICENSE +0 -0
  10. {mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/README.md +0 -0
  11. {mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/SOURCES.txt +0 -0
  12. {mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/dependency_links.txt +0 -0
  13. {mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/entry_points.txt +0 -0
  14. {mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/requires.txt +0 -0
  15. {mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/top_level.txt +0 -0
  16. {mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/pyproject.toml +0 -0
  17. {mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mkdocs-ultralytics-plugin
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
5
5
  Author-email: Glenn Jocher <hello@ultralytics.com>
6
6
  Maintainer-email: Ultralytics <hello@ultralytics.com>
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mkdocs-ultralytics-plugin
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
5
5
  Author-email: Glenn Jocher <hello@ultralytics.com>
6
6
  Maintainer-email: Ultralytics <hello@ultralytics.com>
@@ -1,6 +1,6 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
- __version__ = "0.2.1"
3
+ __version__ = "0.2.3"
4
4
 
5
5
  from .main import MetaPlugin
6
6
  from .postprocess import postprocess_site
@@ -2,9 +2,12 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from pathlib import Path
6
+
5
7
  from mkdocs.config import config_options
6
8
  from mkdocs.plugins import BasePlugin
7
9
 
10
+ import plugin.processor as processor
8
11
  from plugin.processor import process_html
9
12
 
10
13
 
@@ -26,6 +29,22 @@ class MetaPlugin(BasePlugin):
26
29
  ("add_copy_llm", config_options.Type(bool, default=True)),
27
30
  )
28
31
 
32
+ def __init__(self):
33
+ super().__init__()
34
+ self.git_repo_url = None
35
+ self.git_data = None
36
+
37
+ def on_config(self, config):
38
+ """Prepare git metadata once for all pages if authors/JSON-LD are enabled."""
39
+ if not self.config.get("enabled", True):
40
+ return config
41
+
42
+ if self.config.get("add_authors") or self.config.get("add_json_ld"):
43
+ docs_dir = Path(config["docs_dir"])
44
+ md_files = [str(p) for p in docs_dir.rglob("*.md")] if docs_dir.exists() else []
45
+ self.git_repo_url, self.git_data = processor.build_git_map(md_files)
46
+ return config
47
+
29
48
  def on_post_page(self, output: str, page, config) -> str:
30
49
  """Enhance HTML output by delegating to shared processor."""
31
50
  if not self.config["enabled"]:
@@ -47,6 +66,8 @@ class MetaPlugin(BasePlugin):
47
66
  page_url=page_url,
48
67
  title=title,
49
68
  src_path=page.file.abs_src_path,
69
+ git_data=self.git_data,
70
+ repo_url=self.git_repo_url,
50
71
  default_image=self.config["default_image"],
51
72
  default_author=self.config["default_author"],
52
73
  keywords=keywords,
@@ -0,0 +1,255 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+ """Postprocess MkDocs/Zensical site by adding metadata, git info, and social features."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import os
7
+ from collections.abc import Callable
8
+ from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ try:
13
+ from ultralytics.utils import TQDM # progress bars
14
+ except ImportError:
15
+ TQDM = None
16
+
17
+ import plugin.processor as processor
18
+ from plugin.processor import process_html
19
+
20
+ # Shared worker state for process pools (avoids re-pickling large read-only data per task)
21
+ _WORKER_STATE: dict[str, Any] | None = None
22
+
23
+
24
+ def _set_worker_state(state: dict[str, Any]) -> None:
25
+ global _WORKER_STATE
26
+ _WORKER_STATE = state
27
+
28
+
29
+ def _process_file(html_file: Path) -> bool:
30
+ if _WORKER_STATE is None:
31
+ raise RuntimeError("Worker state not initialized")
32
+ return process_html_file(
33
+ html_file,
34
+ _WORKER_STATE["site_dir"],
35
+ _WORKER_STATE["md_index"],
36
+ _WORKER_STATE["git_data"],
37
+ _WORKER_STATE["repo_url"],
38
+ site_url=_WORKER_STATE["site_url"],
39
+ default_image=_WORKER_STATE["default_image"],
40
+ default_author=_WORKER_STATE["default_author"],
41
+ add_desc=_WORKER_STATE["add_desc"],
42
+ add_image=_WORKER_STATE["add_image"],
43
+ add_keywords=_WORKER_STATE["add_keywords"],
44
+ add_share_buttons=_WORKER_STATE["add_share_buttons"],
45
+ add_authors=_WORKER_STATE["add_authors"],
46
+ add_json_ld=_WORKER_STATE["add_json_ld"],
47
+ add_css=_WORKER_STATE["add_css"],
48
+ add_copy_llm=_WORKER_STATE["add_copy_llm"],
49
+ verbose=_WORKER_STATE["verbose"],
50
+ log=None,
51
+ )
52
+
53
+
54
+ def process_html_file(
55
+ html_path: Path,
56
+ site_dir: Path,
57
+ md_index: dict[str, str],
58
+ git_data: dict[str, dict[str, str | dict]] | None,
59
+ repo_url: str | None,
60
+ site_url: str = "",
61
+ default_image: str | None = None,
62
+ default_author: str | None = None,
63
+ add_desc: bool = True,
64
+ add_image: bool = True,
65
+ add_keywords: bool = True,
66
+ add_share_buttons: bool = True,
67
+ add_authors: bool = False,
68
+ add_json_ld: bool = False,
69
+ add_css: bool = True,
70
+ add_copy_llm: bool = True,
71
+ verbose: bool = False,
72
+ log: Callable[[str], None] | None = print,
73
+ ) -> bool:
74
+ """Process a single HTML file by delegating to shared processor.
75
+
76
+ Returns:
77
+ bool: True if file was successfully processed and written, False otherwise.
78
+ """
79
+ from bs4 import BeautifulSoup
80
+
81
+ try:
82
+ html = html_path.read_text(encoding="utf-8")
83
+ except (UnicodeDecodeError, FileNotFoundError) as e:
84
+ if verbose and log:
85
+ log(f"Error reading {html_path}: {e}")
86
+ return False
87
+
88
+ soup = BeautifulSoup(html, "html.parser")
89
+
90
+ # Get page URL - calculate relative path from site_dir
91
+ rel_path = html_path.relative_to(site_dir).as_posix()
92
+ page_url = f"{site_url.rstrip('/')}/{rel_path}".replace("/index.html", "/")
93
+
94
+ # Get title
95
+ title = soup.find("h1").text if soup.find("h1") else soup.title.string if soup.title else ""
96
+
97
+ # Extract keywords from existing meta tag if present
98
+ keywords = None
99
+ if meta_keywords := soup.find("meta", attrs={"name": "keywords"}):
100
+ keywords = meta_keywords.get("content")
101
+
102
+ # Find source markdown file from prebuilt index using relative path
103
+ html_rel = html_path.relative_to(site_dir).with_suffix("").as_posix()
104
+ if html_rel.endswith("/index"):
105
+ html_rel = html_rel[:-6] # Remove /index suffix
106
+ src_path = md_index.get(html_rel or "index") or md_index.get(f"{html_rel}/index")
107
+
108
+ # Process HTML
109
+ processed_html = process_html(
110
+ html=html,
111
+ page_url=page_url,
112
+ title=title,
113
+ src_path=src_path,
114
+ git_data=git_data,
115
+ repo_url=repo_url,
116
+ default_image=default_image,
117
+ default_author=default_author,
118
+ keywords=keywords,
119
+ add_desc=add_desc,
120
+ add_image=add_image,
121
+ add_keywords=add_keywords,
122
+ add_share_buttons=add_share_buttons,
123
+ add_authors=add_authors,
124
+ add_json_ld=add_json_ld,
125
+ add_css=add_css,
126
+ add_copy_llm=add_copy_llm,
127
+ )
128
+
129
+ # Write back
130
+ try:
131
+ html_path.write_text(processed_html, encoding="utf-8")
132
+ return True
133
+ except (OSError, PermissionError) as e:
134
+ if verbose and log:
135
+ log(f"Error writing {html_path}: {e}")
136
+ return False
137
+
138
+
139
+ def postprocess_site(
140
+ site_dir: str | Path = "site",
141
+ docs_dir: str | Path = "docs",
142
+ site_url: str = "",
143
+ default_image: str | None = None,
144
+ default_author: str | None = None,
145
+ add_desc: bool = True,
146
+ add_image: bool = True,
147
+ add_keywords: bool = True,
148
+ add_share_buttons: bool = True,
149
+ add_authors: bool = False,
150
+ add_json_ld: bool = False,
151
+ add_css: bool = True,
152
+ add_copy_llm: bool = True,
153
+ verbose: bool = True,
154
+ use_processes: bool = True,
155
+ workers: int | None = None,
156
+ ) -> None:
157
+ """Process all HTML files in the site directory."""
158
+ site_dir = Path(site_dir)
159
+ docs_dir = Path(docs_dir)
160
+
161
+ if not site_dir.exists():
162
+ print(f"Site directory not found: {site_dir}")
163
+ return
164
+
165
+ html_files = list(site_dir.rglob("*.html"))
166
+ if not html_files:
167
+ print(f"No HTML files found in {site_dir}")
168
+ return
169
+
170
+ worker_count = min(os.cpu_count() or 1, workers or os.cpu_count() or 1)
171
+
172
+ # Build markdown index once (O(N) instead of O(N²)) using relative paths as keys
173
+ md_index = (
174
+ {md.relative_to(docs_dir).with_suffix("").as_posix(): str(md) for md in docs_dir.rglob("*.md")}
175
+ if docs_dir.exists()
176
+ else {}
177
+ )
178
+
179
+ mode = "process" if use_processes else "thread"
180
+ print(f"Processing {len(html_files)} HTML files in {site_dir} with {worker_count} {mode} worker(s)")
181
+
182
+ processed = 0
183
+ repo_url = None
184
+ git_data = None
185
+ if (add_authors or add_json_ld) and md_index:
186
+ repo_url, git_data = processor.build_git_map(list(md_index.values()))
187
+
188
+ progress = TQDM(total=len(html_files), desc="Postprocessing", unit="file", disable=not verbose) if TQDM else None
189
+ # Enable logging only for the synchronous path; pools run without per-task log_fn to remain pickle-safe.
190
+ log_fn = (progress.write if verbose and progress else print if verbose else None) if worker_count == 1 else None
191
+
192
+ task_kwargs = dict(
193
+ site_dir=site_dir,
194
+ md_index=md_index,
195
+ git_data=git_data,
196
+ repo_url=repo_url,
197
+ site_url=site_url,
198
+ default_image=default_image,
199
+ default_author=default_author,
200
+ add_desc=add_desc,
201
+ add_image=add_image,
202
+ add_keywords=add_keywords,
203
+ add_share_buttons=add_share_buttons,
204
+ add_authors=add_authors,
205
+ add_json_ld=add_json_ld,
206
+ add_css=add_css,
207
+ add_copy_llm=add_copy_llm,
208
+ verbose=verbose,
209
+ )
210
+
211
+ if worker_count == 1:
212
+ for html_file in html_files:
213
+ success = process_html_file(html_file, **task_kwargs, log=log_fn)
214
+ processed += bool(success)
215
+ if progress:
216
+ progress.update(1)
217
+ else:
218
+ if use_processes:
219
+ state = {**task_kwargs}
220
+ executor_context = ProcessPoolExecutor(
221
+ max_workers=worker_count, initializer=_set_worker_state, initargs=(state,)
222
+ )
223
+
224
+ def submit_fn(ex, f):
225
+ return ex.submit(_process_file, f)
226
+ else:
227
+ executor_context = ThreadPoolExecutor(max_workers=worker_count)
228
+
229
+ def submit_fn(ex, f):
230
+ return ex.submit(process_html_file, f, **task_kwargs, log=log_fn)
231
+
232
+ with executor_context as executor:
233
+ future_to_file = {submit_fn(executor, html_file): html_file for html_file in html_files}
234
+
235
+ for future in as_completed(future_to_file):
236
+ html_file = future_to_file[future]
237
+ try:
238
+ success = future.result()
239
+ except Exception as e:
240
+ success = False
241
+ if verbose:
242
+ (log_fn or print)(f"Error processing {html_file}: {e}")
243
+ if success:
244
+ processed += 1
245
+ if progress:
246
+ progress.update(1)
247
+
248
+ if progress:
249
+ progress.close()
250
+
251
+ print(f"✅ Postprocessing complete: {processed}/{len(html_files)} files processed")
252
+
253
+
254
+ if __name__ == "__main__":
255
+ postprocess_site()
@@ -27,37 +27,47 @@ COPY_ICON = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d
27
27
  CHECK_ICON = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9 16.17L4.83 12l-1.42 1.41L9 19L21 7l-1.41-1.41L9 16.17z"></path></svg>'
28
28
 
29
29
 
30
- def get_git_info(file_path: str, add_authors: bool = True, default_author: str | None = None) -> dict[str, Any]:
31
- """Retrieve git information including creation/modified dates and optional authors."""
30
+ def get_git_info(
31
+ file_path: str,
32
+ add_authors: bool = True,
33
+ default_author: str | None = None,
34
+ git_data: dict[str, dict[str, Any]] | None = None,
35
+ repo_url: str | None = None,
36
+ ) -> dict[str, Any]:
37
+ """Retrieve git information (dates + optional authors) from precomputed git data."""
32
38
  file_path = str(Path(file_path).resolve())
33
39
  git_info = {
34
40
  "creation_date": DEFAULT_CREATION_DATE,
35
41
  "last_modified_date": DEFAULT_MODIFIED_DATE,
36
42
  }
37
43
 
38
- try:
39
- subprocess.check_output(["git", "rev-parse", "--is-inside-work-tree"], stderr=subprocess.DEVNULL)
40
- creation_output = subprocess.check_output(
41
- ["git", "log", "--reverse", "--pretty=format:%ai", file_path]
42
- ).decode()
43
- creation_date = creation_output.split("\n")[0] if creation_output else ""
44
- last_modified_date = subprocess.check_output(["git", "log", "-1", "--pretty=format:%ai", file_path]).decode()
45
- git_info.update(
46
- {
47
- "creation_date": creation_date or DEFAULT_CREATION_DATE,
48
- "last_modified_date": last_modified_date or DEFAULT_MODIFIED_DATE,
49
- }
50
- )
44
+ if not git_data or file_path not in git_data:
45
+ return git_info
51
46
 
52
- if add_authors:
53
- authors_info = get_github_usernames_from_file(file_path, default_user=default_author)
54
- git_info["authors"] = sorted(
55
- [(author, info["url"], info["changes"], info["avatar"]) for author, info in authors_info.items()],
56
- key=lambda x: x[2],
57
- reverse=True,
58
- )
59
- except (subprocess.CalledProcessError, FileNotFoundError):
60
- pass
47
+ cached = git_data[file_path]
48
+ git_info.update(
49
+ {
50
+ "creation_date": cached.get("creation_date", DEFAULT_CREATION_DATE),
51
+ "last_modified_date": cached.get("last_modified_date", DEFAULT_MODIFIED_DATE),
52
+ }
53
+ )
54
+
55
+ if add_authors and cached.get("emails"):
56
+ git_info["authors"] = sorted(
57
+ [
58
+ (
59
+ author,
60
+ info["url"],
61
+ info["changes"],
62
+ info["avatar"],
63
+ )
64
+ for author, info in get_github_usernames_from_file(
65
+ file_path, default_user=default_author, emails=cached["emails"], repo_url=repo_url
66
+ ).items()
67
+ ],
68
+ key=lambda x: x[2],
69
+ reverse=True,
70
+ )
61
71
 
62
72
  return git_info
63
73
 
@@ -104,6 +114,90 @@ def insert_content(soup: BeautifulSoup, content_to_insert) -> None:
104
114
  md_typeset.append(content_to_insert)
105
115
 
106
116
 
117
+ def build_git_map(file_paths: list[str] | list[Path]) -> tuple[str | None, dict[str, dict[str, Any]]]:
118
+ """Build git metadata for provided files using a single git log pass."""
119
+ git_data: dict[str, dict[str, Any]] = {}
120
+ repo_url: str | None = None
121
+
122
+ if not file_paths:
123
+ return repo_url, git_data
124
+
125
+ try:
126
+ repo_root = Path(
127
+ subprocess.check_output(["git", "rev-parse", "--show-toplevel"], stderr=subprocess.DEVNULL).decode().strip()
128
+ )
129
+ except subprocess.CalledProcessError:
130
+ return repo_url, git_data
131
+
132
+ try:
133
+ github_repo_url = subprocess.check_output(
134
+ ["git", "-C", str(repo_root), "config", "--get", "remote.origin.url"], stderr=subprocess.DEVNULL
135
+ ).decode("utf-8")
136
+ github_repo_url = github_repo_url.strip()
137
+ if github_repo_url.endswith(".git"):
138
+ github_repo_url = github_repo_url[:-4]
139
+ if github_repo_url.startswith("git@"):
140
+ github_repo_url = "https://" + github_repo_url[4:].replace(":", "/")
141
+ repo_url = github_repo_url or None
142
+ except subprocess.CalledProcessError:
143
+ repo_url = None
144
+
145
+ rel_paths = []
146
+ for fp in file_paths:
147
+ path = Path(fp)
148
+ if path.exists():
149
+ try:
150
+ rel_paths.append(path.resolve().relative_to(repo_root))
151
+ except ValueError:
152
+ continue
153
+ if not rel_paths:
154
+ return repo_url, git_data
155
+
156
+ cmd = [
157
+ "git",
158
+ "-C",
159
+ str(repo_root),
160
+ "log",
161
+ "--name-only",
162
+ "--pretty=format:%ad\t%ae",
163
+ "--date=format:%Y-%m-%d %H:%M:%S %z",
164
+ "--",
165
+ *[str(p) for p in rel_paths],
166
+ ]
167
+
168
+ try:
169
+ output = subprocess.check_output(cmd, stderr=subprocess.DEVNULL).decode().splitlines()
170
+ except subprocess.CalledProcessError:
171
+ return repo_url, git_data
172
+
173
+ current_date = None
174
+ current_email = None
175
+ for line in output:
176
+ if not line.strip():
177
+ continue
178
+ parts = line.split("\t")
179
+ if len(parts) == 2:
180
+ current_date, current_email = parts
181
+ continue
182
+
183
+ if current_date and current_email:
184
+ abs_path = (repo_root / line.strip()).resolve()
185
+ key = str(abs_path)
186
+ entry = git_data.setdefault(
187
+ key,
188
+ {
189
+ "creation_date": current_date,
190
+ "last_modified_date": current_date,
191
+ "emails": {},
192
+ },
193
+ )
194
+ entry.setdefault("last_modified_date", current_date)
195
+ entry["creation_date"] = current_date
196
+ entry["emails"][current_email] = entry["emails"].get(current_email, 0) + 1
197
+
198
+ return repo_url, git_data
199
+
200
+
107
201
  def get_css() -> str:
108
202
  """CSS for git info, share buttons, and copy button."""
109
203
  return """
@@ -212,6 +306,8 @@ def process_html(
212
306
  page_url: str,
213
307
  title: str,
214
308
  src_path: str | None = None,
309
+ git_data: dict[str, dict[str, Any]] | None = None,
310
+ repo_url: str | None = None,
215
311
  default_image: str | None = None,
216
312
  default_author: str | None = None,
217
313
  keywords: str | None = None,
@@ -389,15 +485,17 @@ def process_html(
389
485
  """
390
486
  soup.body.append(script)
391
487
 
392
- # Initialize git info with defaults
488
+ # Initialize git info with defaults and only call git when needed (authors or JSON-LD)
393
489
  git_info = {
394
490
  "creation_date": DEFAULT_CREATION_DATE,
395
491
  "last_modified_date": DEFAULT_MODIFIED_DATE,
396
492
  }
493
+ needs_git = (add_authors or add_json_ld) and src_path
397
494
 
398
- # Add git information if source path available
399
- if src_path:
400
- git_info = get_git_info(src_path, add_authors=add_authors, default_author=default_author)
495
+ if needs_git:
496
+ git_info = get_git_info(
497
+ src_path, add_authors=add_authors, default_author=default_author, git_data=git_data, repo_url=repo_url
498
+ )
401
499
 
402
500
  # Only render git footer if we have real git history (not placeholder defaults)
403
501
  has_real_git_data = (
@@ -2,12 +2,11 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import contextlib
6
5
  import re
7
- import subprocess
8
- from collections import Counter
6
+ import threading
9
7
  from datetime import datetime
10
8
  from pathlib import Path
9
+ from typing import Any
11
10
 
12
11
  import requests
13
12
  import yaml # YAML is used for its readability and consistency with MkDocs ecosystem
@@ -16,6 +15,11 @@ from bs4 import BeautifulSoup
16
15
  WARNING = "WARNING (mkdocs_ultralytics_plugin):"
17
16
  DEFAULT_AVATAR = requests.head("https://github.com/github.png", allow_redirects=True).url
18
17
 
18
+ # Shared, thread-safe cache to avoid duplicate API lookups and YAML thrash when running in parallel
19
+ _AUTHOR_CACHE: dict[str, dict[str, str | None]] | None = None
20
+ _AUTHOR_CACHE_MTIME: float | None = None
21
+ _CACHE_LOCK = threading.Lock()
22
+
19
23
 
20
24
  def calculate_time_difference(date_string: str) -> tuple[str, str]:
21
25
  """Calculate the time difference between a given date and the current date in a human-readable format.
@@ -102,9 +106,10 @@ def get_github_username_from_email(
102
106
  you comply with GitHub's rate limits and authentication requirements when querying their API.
103
107
  """
104
108
  # First, check if the email exists in the local cache file
105
- if email in cache:
106
- return cache[email].get("username"), cache[email].get("avatar")
107
- elif not email.strip():
109
+ with _CACHE_LOCK:
110
+ if email in cache:
111
+ return cache[email].get("username"), cache[email].get("avatar")
112
+ if not email.strip():
108
113
  if verbose:
109
114
  print(f"{WARNING} No author found for {file_path}")
110
115
  return None, None
@@ -113,13 +118,15 @@ def get_github_username_from_email(
113
118
  if email.endswith("@users.noreply.github.com"):
114
119
  username = email.split("+")[-1].split("@")[0]
115
120
  avatar = f"https://github.com/{username}.png"
116
- cache[email] = {
117
- "username": username,
118
- "avatar": requests.head(avatar, allow_redirects=True).url,
119
- }
121
+ avatar_url = requests.head(avatar, allow_redirects=True).url
122
+ with _CACHE_LOCK:
123
+ cache[email] = {
124
+ "username": username,
125
+ "avatar": avatar_url,
126
+ }
120
127
  return username, avatar
121
128
 
122
- # If the email is not found in the cache, query GitHub REST API
129
+ # Fallback to GitHub REST API when not cached
123
130
  url = f"https://api.github.com/search/users?q={email}+in:email&sort=joined&order=asc"
124
131
  if verbose:
125
132
  print(f"Running GitHub REST API for author {email}")
@@ -129,20 +136,29 @@ def get_github_username_from_email(
129
136
  if data["total_count"] > 0:
130
137
  username = data["items"][0]["login"]
131
138
  avatar = data["items"][0]["avatar_url"] # avatar_url key is correct here
132
- cache[email] = {
133
- "username": username,
134
- "avatar": requests.head(avatar, allow_redirects=True).url,
135
- }
139
+ avatar_url = requests.head(avatar, allow_redirects=True).url
140
+ with _CACHE_LOCK:
141
+ cache[email] = {
142
+ "username": username,
143
+ "avatar": avatar_url,
144
+ }
136
145
  return username, avatar
137
146
 
138
147
  if verbose:
139
148
  print(f"{WARNING} No username found for {email}")
140
- cache[email] = {"username": None, "avatar": None}
149
+ with _CACHE_LOCK:
150
+ cache[email] = {"username": None, "avatar": None}
141
151
  return None, None
142
152
 
143
153
 
144
- def get_github_usernames_from_file(file_path: str, default_user: str | None = None) -> dict[str, dict[str, any]]:
145
- """Fetch GitHub usernames associated with a file using Git Log and Git Blame commands.
154
+ def get_github_usernames_from_file(
155
+ file_path: str,
156
+ default_user: str | None = None,
157
+ emails: dict[str, int] | None = None,
158
+ repo_url: str | None = None,
159
+ force_reload: bool = False,
160
+ ) -> dict[str, dict[str, Any]]:
161
+ """Fetch GitHub usernames associated with a file using provided Git email counts.
146
162
 
147
163
  Args:
148
164
  file_path (str): The path to the file for which GitHub usernames are to be retrieved.
@@ -157,66 +173,47 @@ def get_github_usernames_from_file(file_path: str, default_user: str | None = No
157
173
  - 'avatar' (str): The URL of the author's GitHub avatar.
158
174
 
159
175
  Examples:
160
- >>> print(get_github_usernames_from_file('mkdocs.yml'))
161
- {'username1': {'email': 'user@example.com', 'url': 'https://github.com/username1', 'changes': 5, 'avatar': '...'}}
176
+ >>> print(get_github_usernames_from_file('mkdocs.yml', emails={'user@example.com': 2}))
177
+ {'username1': {'email': 'user@example.com', 'url': 'https://github.com/username1', 'changes': 2, 'avatar': '...'}}
162
178
  """
163
- # Fetch author emails using 'git log'
164
- try:
165
- authors_emails_log = (
166
- subprocess.check_output(["git", "log", "--pretty=format:%ae", Path(file_path).resolve()])
167
- .decode("utf-8")
168
- .split("\n")
169
- )
170
- emails = dict(Counter(authors_emails_log))
171
- except subprocess.CalledProcessError:
172
- emails = {} # Git not available or file not in git repo
173
-
174
- # Fetch author emails using 'git blame'
175
- with contextlib.suppress(Exception):
176
- authors_emails_blame = (
177
- subprocess.check_output(
178
- ["git", "blame", "--line-porcelain", Path(file_path).resolve()],
179
- stderr=subprocess.DEVNULL,
180
- )
181
- .decode("utf-8")
182
- .split("\n")
183
- )
184
- authors_emails_blame = [line.split(" ")[1] for line in authors_emails_blame if line.startswith("author-mail")]
185
- authors_emails_blame = [email.strip("<>") for email in authors_emails_blame]
186
- emails_blame = dict(Counter(authors_emails_blame))
187
-
188
- # Merge the two email lists, adding any missing authors from 'git blame' as a 1-commit change
189
- for email in emails_blame:
190
- if email not in emails:
191
- emails[email] = 1 # Only add new authors from 'git blame' with a 1-commit change
179
+ if emails is None:
180
+ emails = {}
181
+ else:
182
+ emails = dict(emails) # shallow copy to avoid mutating caller data
192
183
 
193
184
  # If no git info found but default_user provided, use default_user
194
185
  if not emails and default_user:
195
186
  emails[default_user] = 1
196
187
 
197
- # Load the local cache of GitHub usernames
188
+ # Load the local cache of GitHub usernames once per process (thread-safe, reload if changed)
198
189
  local_cache_file = Path("docs" if Path("docs").is_dir() else "") / "mkdocs_github_authors.yaml"
199
- if local_cache_file.is_file():
200
- with local_cache_file.open("r") as f:
201
- cache = yaml.safe_load(f) or {}
202
- else:
203
- cache = {}
204
-
205
- try:
206
- github_repo_url = (
207
- subprocess.check_output(["git", "config", "--get", "remote.origin.url"]).decode("utf-8").strip()
190
+ global _AUTHOR_CACHE, _AUTHOR_CACHE_MTIME
191
+ with _CACHE_LOCK:
192
+ current_mtime = local_cache_file.stat().st_mtime if local_cache_file.is_file() else None
193
+ needs_reload = (
194
+ force_reload
195
+ or _AUTHOR_CACHE is None
196
+ or (_AUTHOR_CACHE_MTIME is not None and current_mtime is not None and _AUTHOR_CACHE_MTIME != current_mtime)
208
197
  )
209
- if github_repo_url.endswith(".git"):
210
- github_repo_url = github_repo_url[:-4]
211
- if github_repo_url.startswith("git@"):
212
- github_repo_url = "https://" + github_repo_url[4:].replace(":", "/")
213
- except subprocess.CalledProcessError:
214
- github_repo_url = "https://github.com/ultralytics/ultralytics" # Fallback URL
198
+ if needs_reload:
199
+ if local_cache_file.is_file():
200
+ with local_cache_file.open("r") as f:
201
+ _AUTHOR_CACHE = yaml.safe_load(f) or {}
202
+ _AUTHOR_CACHE_MTIME = local_cache_file.stat().st_mtime
203
+ else:
204
+ _AUTHOR_CACHE = {}
205
+ _AUTHOR_CACHE_MTIME = None
206
+ cache = _AUTHOR_CACHE
207
+
208
+ github_repo_url = repo_url or "https://github.com/ultralytics/ultralytics"
215
209
 
216
210
  info = {}
211
+ cache_updated = False
217
212
  for email, changes in emails.items():
218
213
  if not email and default_user:
219
214
  email = default_user
215
+ was_cached = email in cache
216
+ prev_entry = cache.get(email)
220
217
  username, avatar = get_github_username_from_email(email, cache, file_path)
221
218
  # If we can't determine the user URL, revert to the GitHub file URL
222
219
  user_url = f"https://github.com/{username}" if username else github_repo_url
@@ -226,9 +223,14 @@ def get_github_usernames_from_file(file_path: str, default_user: str | None = No
226
223
  "changes": changes,
227
224
  "avatar": avatar or DEFAULT_AVATAR,
228
225
  }
229
-
230
- # Save the local cache of GitHub usernames and avatar URLs
231
- with local_cache_file.open("w") as f:
232
- yaml.safe_dump(cache, f)
226
+ cache_updated = cache_updated or (email in cache and not was_cached) or cache.get(email) != prev_entry
227
+
228
+ # Save the local cache of GitHub usernames and avatar URLs if updated
229
+ if cache_updated:
230
+ with _CACHE_LOCK:
231
+ _AUTHOR_CACHE = cache
232
+ with local_cache_file.open("w") as f:
233
+ yaml.safe_dump(cache, f)
234
+ _AUTHOR_CACHE_MTIME = local_cache_file.stat().st_mtime
233
235
 
234
236
  return info
@@ -1,157 +0,0 @@
1
- # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
- """Postprocess MkDocs/Zensical site by adding metadata, git info, and social features."""
3
-
4
- from __future__ import annotations
5
-
6
- from pathlib import Path
7
-
8
- from plugin.processor import process_html
9
-
10
-
11
- def process_html_file(
12
- html_path: Path,
13
- site_dir: Path,
14
- md_index: dict[str, str],
15
- site_url: str = "",
16
- default_image: str | None = None,
17
- default_author: str | None = None,
18
- add_desc: bool = True,
19
- add_image: bool = True,
20
- add_keywords: bool = True,
21
- add_share_buttons: bool = True,
22
- add_authors: bool = False,
23
- add_json_ld: bool = False,
24
- add_css: bool = True,
25
- add_copy_llm: bool = True,
26
- verbose: bool = False,
27
- ) -> bool:
28
- """Process a single HTML file by delegating to shared processor.
29
-
30
- Returns:
31
- bool: True if file was successfully processed and written, False otherwise.
32
- """
33
- from bs4 import BeautifulSoup
34
-
35
- try:
36
- html = html_path.read_text(encoding="utf-8")
37
- except (UnicodeDecodeError, FileNotFoundError) as e:
38
- if verbose:
39
- print(f"Error reading {html_path}: {e}")
40
- return False
41
-
42
- soup = BeautifulSoup(html, "html.parser")
43
-
44
- # Get page URL - calculate relative path from site_dir
45
- rel_path = html_path.relative_to(site_dir).as_posix()
46
- page_url = f"{site_url.rstrip('/')}/{rel_path}".replace("/index.html", "/")
47
-
48
- # Get title
49
- title = soup.find("h1").text if soup.find("h1") else soup.title.string if soup.title else ""
50
-
51
- # Extract keywords from existing meta tag if present
52
- keywords = None
53
- if meta_keywords := soup.find("meta", attrs={"name": "keywords"}):
54
- keywords = meta_keywords.get("content")
55
-
56
- # Find source markdown file from prebuilt index using relative path
57
- html_rel = html_path.relative_to(site_dir).with_suffix("").as_posix()
58
- if html_rel.endswith("/index"):
59
- html_rel = html_rel[:-6] # Remove /index suffix
60
- src_path = md_index.get(html_rel or "index") or md_index.get(f"{html_rel}/index")
61
-
62
- # Process HTML
63
- processed_html = process_html(
64
- html=html,
65
- page_url=page_url,
66
- title=title,
67
- src_path=src_path,
68
- default_image=default_image,
69
- default_author=default_author,
70
- keywords=keywords,
71
- add_desc=add_desc,
72
- add_image=add_image,
73
- add_keywords=add_keywords,
74
- add_share_buttons=add_share_buttons,
75
- add_authors=add_authors,
76
- add_json_ld=add_json_ld,
77
- add_css=add_css,
78
- add_copy_llm=add_copy_llm,
79
- )
80
-
81
- # Write back
82
- try:
83
- html_path.write_text(processed_html, encoding="utf-8")
84
- if verbose:
85
- print(f"Processed: {html_path.relative_to(site_dir)}")
86
- return True
87
- except (OSError, PermissionError) as e:
88
- if verbose:
89
- print(f"Error writing {html_path}: {e}")
90
- return False
91
-
92
-
93
- def postprocess_site(
94
- site_dir: str | Path = "site",
95
- docs_dir: str | Path = "docs",
96
- site_url: str = "",
97
- default_image: str | None = None,
98
- default_author: str | None = None,
99
- add_desc: bool = True,
100
- add_image: bool = True,
101
- add_keywords: bool = True,
102
- add_share_buttons: bool = True,
103
- add_authors: bool = False,
104
- add_json_ld: bool = False,
105
- add_css: bool = True,
106
- add_copy_llm: bool = True,
107
- verbose: bool = True,
108
- ) -> None:
109
- """Process all HTML files in the site directory."""
110
- site_dir = Path(site_dir)
111
- docs_dir = Path(docs_dir)
112
-
113
- if not site_dir.exists():
114
- print(f"Site directory not found: {site_dir}")
115
- return
116
-
117
- html_files = list(site_dir.rglob("*.html"))
118
- if not html_files:
119
- print(f"No HTML files found in {site_dir}")
120
- return
121
-
122
- # Build markdown index once (O(N) instead of O(N²)) using relative paths as keys
123
- md_index = {}
124
- if docs_dir.exists():
125
- for md_file in docs_dir.rglob("*.md"):
126
- rel_path = md_file.relative_to(docs_dir).with_suffix("").as_posix()
127
- md_index[rel_path] = str(md_file)
128
-
129
- print(f"Processing {len(html_files)} HTML files in {site_dir}")
130
-
131
- processed = 0
132
- for html_file in html_files:
133
- success = process_html_file(
134
- html_file,
135
- site_dir,
136
- md_index,
137
- site_url=site_url,
138
- default_image=default_image,
139
- default_author=default_author,
140
- add_desc=add_desc,
141
- add_image=add_image,
142
- add_keywords=add_keywords,
143
- add_share_buttons=add_share_buttons,
144
- add_authors=add_authors,
145
- add_json_ld=add_json_ld,
146
- add_css=add_css,
147
- add_copy_llm=add_copy_llm,
148
- verbose=verbose,
149
- )
150
- if success:
151
- processed += 1
152
-
153
- print(f"✅ Postprocessing complete: {processed}/{len(html_files)} files processed")
154
-
155
-
156
- if __name__ == "__main__":
157
- postprocess_site()