PyPI - mkdocs-ultralytics-plugin - Versions diffs - 0.2.1__tar.gz → 0.2.3__tar.gz - Mend

mkdocs-ultralytics-plugin 0.2.1tar.gz → 0.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mkdocs-ultralytics-plugin
-Version: 0.2.1
+Version: 0.2.3
 Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
 Author-email: Glenn Jocher <hello@ultralytics.com>
 Maintainer-email: Ultralytics <hello@ultralytics.com>

{mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mkdocs-ultralytics-plugin
-Version: 0.2.1
+Version: 0.2.3
 Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
 Author-email: Glenn Jocher <hello@ultralytics.com>
 Maintainer-email: Ultralytics <hello@ultralytics.com>

{mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/plugin/__init__.py RENAMED Viewed

@@ -1,6 +1,6 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
-__version__ = "0.2.1"
+__version__ = "0.2.3"
 from .main import MetaPlugin
 from .postprocess import postprocess_site

{mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/plugin/main.py RENAMED Viewed

@@ -2,9 +2,12 @@
 from __future__ import annotations
+from pathlib import Path
 from mkdocs.config import config_options
 from mkdocs.plugins import BasePlugin
+import plugin.processor as processor
 from plugin.processor import process_html
@@ -26,6 +29,22 @@ class MetaPlugin(BasePlugin):
         ("add_copy_llm", config_options.Type(bool, default=True)),
     )
+    def __init__(self):
+        super().__init__()
+        self.git_repo_url = None
+        self.git_data = None
+    def on_config(self, config):
+        """Prepare git metadata once for all pages if authors/JSON-LD are enabled."""
+        if not self.config.get("enabled", True):
+            return config
+        if self.config.get("add_authors") or self.config.get("add_json_ld"):
+            docs_dir = Path(config["docs_dir"])
+            md_files = [str(p) for p in docs_dir.rglob("*.md")] if docs_dir.exists() else []
+            self.git_repo_url, self.git_data = processor.build_git_map(md_files)
+        return config
     def on_post_page(self, output: str, page, config) -> str:
         """Enhance HTML output by delegating to shared processor."""
         if not self.config["enabled"]:
@@ -47,6 +66,8 @@ class MetaPlugin(BasePlugin):
                 page_url=page_url,
                 title=title,
                 src_path=page.file.abs_src_path,
+                git_data=self.git_data,
+                repo_url=self.git_repo_url,
                 default_image=self.config["default_image"],
                 default_author=self.config["default_author"],
                 keywords=keywords,

mkdocs_ultralytics_plugin-0.2.3/plugin/postprocess.py ADDED Viewed

@@ -0,0 +1,255 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+"""Postprocess MkDocs/Zensical site by adding metadata, git info, and social features."""
+from __future__ import annotations
+import os
+from collections.abc import Callable
+from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
+from pathlib import Path
+from typing import Any
+try:
+    from ultralytics.utils import TQDM  # progress bars
+except ImportError:
+    TQDM = None
+import plugin.processor as processor
+from plugin.processor import process_html
+# Shared worker state for process pools (avoids re-pickling large read-only data per task)
+_WORKER_STATE: dict[str, Any] | None = None
+def _set_worker_state(state: dict[str, Any]) -> None:
+    global _WORKER_STATE
+    _WORKER_STATE = state
+def _process_file(html_file: Path) -> bool:
+    if _WORKER_STATE is None:
+        raise RuntimeError("Worker state not initialized")
+    return process_html_file(
+        html_file,
+        _WORKER_STATE["site_dir"],
+        _WORKER_STATE["md_index"],
+        _WORKER_STATE["git_data"],
+        _WORKER_STATE["repo_url"],
+        site_url=_WORKER_STATE["site_url"],
+        default_image=_WORKER_STATE["default_image"],
+        default_author=_WORKER_STATE["default_author"],
+        add_desc=_WORKER_STATE["add_desc"],
+        add_image=_WORKER_STATE["add_image"],
+        add_keywords=_WORKER_STATE["add_keywords"],
+        add_share_buttons=_WORKER_STATE["add_share_buttons"],
+        add_authors=_WORKER_STATE["add_authors"],
+        add_json_ld=_WORKER_STATE["add_json_ld"],
+        add_css=_WORKER_STATE["add_css"],
+        add_copy_llm=_WORKER_STATE["add_copy_llm"],
+        verbose=_WORKER_STATE["verbose"],
+        log=None,
+    )
+def process_html_file(
+    html_path: Path,
+    site_dir: Path,
+    md_index: dict[str, str],
+    git_data: dict[str, dict[str, str | dict]] | None,
+    repo_url: str | None,
+    site_url: str = "",
+    default_image: str | None = None,
+    default_author: str | None = None,
+    add_desc: bool = True,
+    add_image: bool = True,
+    add_keywords: bool = True,
+    add_share_buttons: bool = True,
+    add_authors: bool = False,
+    add_json_ld: bool = False,
+    add_css: bool = True,
+    add_copy_llm: bool = True,
+    verbose: bool = False,
+    log: Callable[[str], None] | None = print,
+) -> bool:
+    """Process a single HTML file by delegating to shared processor.
+    Returns:
+        bool: True if file was successfully processed and written, False otherwise.
+    """
+    from bs4 import BeautifulSoup
+    try:
+        html = html_path.read_text(encoding="utf-8")
+    except (UnicodeDecodeError, FileNotFoundError) as e:
+        if verbose and log:
+            log(f"Error reading {html_path}: {e}")
+        return False
+    soup = BeautifulSoup(html, "html.parser")
+    # Get page URL - calculate relative path from site_dir
+    rel_path = html_path.relative_to(site_dir).as_posix()
+    page_url = f"{site_url.rstrip('/')}/{rel_path}".replace("/index.html", "/")
+    # Get title
+    title = soup.find("h1").text if soup.find("h1") else soup.title.string if soup.title else ""
+    # Extract keywords from existing meta tag if present
+    keywords = None
+    if meta_keywords := soup.find("meta", attrs={"name": "keywords"}):
+        keywords = meta_keywords.get("content")
+    # Find source markdown file from prebuilt index using relative path
+    html_rel = html_path.relative_to(site_dir).with_suffix("").as_posix()
+    if html_rel.endswith("/index"):
+        html_rel = html_rel[:-6]  # Remove /index suffix
+    src_path = md_index.get(html_rel or "index") or md_index.get(f"{html_rel}/index")
+    # Process HTML
+    processed_html = process_html(
+        html=html,
+        page_url=page_url,
+        title=title,
+        src_path=src_path,
+        git_data=git_data,
+        repo_url=repo_url,
+        default_image=default_image,
+        default_author=default_author,
+        keywords=keywords,
+        add_desc=add_desc,
+        add_image=add_image,
+        add_keywords=add_keywords,
+        add_share_buttons=add_share_buttons,
+        add_authors=add_authors,
+        add_json_ld=add_json_ld,
+        add_css=add_css,
+        add_copy_llm=add_copy_llm,
+    )
+    # Write back
+    try:
+        html_path.write_text(processed_html, encoding="utf-8")
+        return True
+    except (OSError, PermissionError) as e:
+        if verbose and log:
+            log(f"Error writing {html_path}: {e}")
+        return False
+def postprocess_site(
+    site_dir: str | Path = "site",
+    docs_dir: str | Path = "docs",
+    site_url: str = "",
+    default_image: str | None = None,
+    default_author: str | None = None,
+    add_desc: bool = True,
+    add_image: bool = True,
+    add_keywords: bool = True,
+    add_share_buttons: bool = True,
+    add_authors: bool = False,
+    add_json_ld: bool = False,
+    add_css: bool = True,
+    add_copy_llm: bool = True,
+    verbose: bool = True,
+    use_processes: bool = True,
+    workers: int | None = None,
+) -> None:
+    """Process all HTML files in the site directory."""
+    site_dir = Path(site_dir)
+    docs_dir = Path(docs_dir)
+    if not site_dir.exists():
+        print(f"Site directory not found: {site_dir}")
+        return
+    html_files = list(site_dir.rglob("*.html"))
+    if not html_files:
+        print(f"No HTML files found in {site_dir}")
+        return
+    worker_count = min(os.cpu_count() or 1, workers or os.cpu_count() or 1)
+    # Build markdown index once (O(N) instead of O(N²)) using relative paths as keys
+    md_index = (
+        {md.relative_to(docs_dir).with_suffix("").as_posix(): str(md) for md in docs_dir.rglob("*.md")}
+        if docs_dir.exists()
+        else {}
+    )
+    mode = "process" if use_processes else "thread"
+    print(f"Processing {len(html_files)} HTML files in {site_dir} with {worker_count} {mode} worker(s)")
+    processed = 0
+    repo_url = None
+    git_data = None
+    if (add_authors or add_json_ld) and md_index:
+        repo_url, git_data = processor.build_git_map(list(md_index.values()))
+    progress = TQDM(total=len(html_files), desc="Postprocessing", unit="file", disable=not verbose) if TQDM else None
+    # Enable logging only for the synchronous path; pools run without per-task log_fn to remain pickle-safe.
+    log_fn = (progress.write if verbose and progress else print if verbose else None) if worker_count == 1 else None
+    task_kwargs = dict(
+        site_dir=site_dir,
+        md_index=md_index,
+        git_data=git_data,
+        repo_url=repo_url,
+        site_url=site_url,
+        default_image=default_image,
+        default_author=default_author,
+        add_desc=add_desc,
+        add_image=add_image,
+        add_keywords=add_keywords,
+        add_share_buttons=add_share_buttons,
+        add_authors=add_authors,
+        add_json_ld=add_json_ld,
+        add_css=add_css,
+        add_copy_llm=add_copy_llm,
+        verbose=verbose,
+    )
+    if worker_count == 1:
+        for html_file in html_files:
+            success = process_html_file(html_file, **task_kwargs, log=log_fn)
+            processed += bool(success)
+            if progress:
+                progress.update(1)
+    else:
+        if use_processes:
+            state = {**task_kwargs}
+            executor_context = ProcessPoolExecutor(
+                max_workers=worker_count, initializer=_set_worker_state, initargs=(state,)
+            )
+            def submit_fn(ex, f):
+                return ex.submit(_process_file, f)
+        else:
+            executor_context = ThreadPoolExecutor(max_workers=worker_count)
+            def submit_fn(ex, f):
+                return ex.submit(process_html_file, f, **task_kwargs, log=log_fn)
+        with executor_context as executor:
+            future_to_file = {submit_fn(executor, html_file): html_file for html_file in html_files}
+            for future in as_completed(future_to_file):
+                html_file = future_to_file[future]
+                try:
+                    success = future.result()
+                except Exception as e:
+                    success = False
+                    if verbose:
+                        (log_fn or print)(f"Error processing {html_file}: {e}")
+                if success:
+                    processed += 1
+                if progress:
+                    progress.update(1)
+    if progress:
+        progress.close()
+    print(f"✅ Postprocessing complete: {processed}/{len(html_files)} files processed")
+if __name__ == "__main__":
+    postprocess_site()

{mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/plugin/processor.py RENAMED Viewed

@@ -27,37 +27,47 @@ COPY_ICON = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d
 CHECK_ICON = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9 16.17L4.83 12l-1.42 1.41L9 19L21 7l-1.41-1.41L9 16.17z"></path></svg>'
-def get_git_info(file_path: str, add_authors: bool = True, default_author: str | None = None) -> dict[str, Any]:
-    """Retrieve git information including creation/modified dates and optional authors."""
+def get_git_info(
+    file_path: str,
+    add_authors: bool = True,
+    default_author: str | None = None,
+    git_data: dict[str, dict[str, Any]] | None = None,
+    repo_url: str | None = None,
+) -> dict[str, Any]:
+    """Retrieve git information (dates + optional authors) from precomputed git data."""
     file_path = str(Path(file_path).resolve())
     git_info = {
         "creation_date": DEFAULT_CREATION_DATE,
         "last_modified_date": DEFAULT_MODIFIED_DATE,
     }
-    try:
-        subprocess.check_output(["git", "rev-parse", "--is-inside-work-tree"], stderr=subprocess.DEVNULL)
-        creation_output = subprocess.check_output(
-            ["git", "log", "--reverse", "--pretty=format:%ai", file_path]
-        ).decode()
-        creation_date = creation_output.split("\n")[0] if creation_output else ""
-        last_modified_date = subprocess.check_output(["git", "log", "-1", "--pretty=format:%ai", file_path]).decode()
-        git_info.update(
-            {
-                "creation_date": creation_date or DEFAULT_CREATION_DATE,
-                "last_modified_date": last_modified_date or DEFAULT_MODIFIED_DATE,
-            }
-        )
+    if not git_data or file_path not in git_data:
+        return git_info
-        if add_authors:
-            authors_info = get_github_usernames_from_file(file_path, default_user=default_author)
-            git_info["authors"] = sorted(
-                [(author, info["url"], info["changes"], info["avatar"]) for author, info in authors_info.items()],
-                key=lambda x: x[2],
-                reverse=True,
-            )
-    except (subprocess.CalledProcessError, FileNotFoundError):
-        pass
+    cached = git_data[file_path]
+    git_info.update(
+        {
+            "creation_date": cached.get("creation_date", DEFAULT_CREATION_DATE),
+            "last_modified_date": cached.get("last_modified_date", DEFAULT_MODIFIED_DATE),
+        }
+    )
+    if add_authors and cached.get("emails"):
+        git_info["authors"] = sorted(
+            [
+                (
+                    author,
+                    info["url"],
+                    info["changes"],
+                    info["avatar"],
+                )
+                for author, info in get_github_usernames_from_file(
+                    file_path, default_user=default_author, emails=cached["emails"], repo_url=repo_url
+                ).items()
+            ],
+            key=lambda x: x[2],
+            reverse=True,
+        )
     return git_info
@@ -104,6 +114,90 @@ def insert_content(soup: BeautifulSoup, content_to_insert) -> None:
         md_typeset.append(content_to_insert)
+def build_git_map(file_paths: list[str] | list[Path]) -> tuple[str | None, dict[str, dict[str, Any]]]:
+    """Build git metadata for provided files using a single git log pass."""
+    git_data: dict[str, dict[str, Any]] = {}
+    repo_url: str | None = None
+    if not file_paths:
+        return repo_url, git_data
+    try:
+        repo_root = Path(
+            subprocess.check_output(["git", "rev-parse", "--show-toplevel"], stderr=subprocess.DEVNULL).decode().strip()
+        )
+    except subprocess.CalledProcessError:
+        return repo_url, git_data
+    try:
+        github_repo_url = subprocess.check_output(
+            ["git", "-C", str(repo_root), "config", "--get", "remote.origin.url"], stderr=subprocess.DEVNULL
+        ).decode("utf-8")
+        github_repo_url = github_repo_url.strip()
+        if github_repo_url.endswith(".git"):
+            github_repo_url = github_repo_url[:-4]
+        if github_repo_url.startswith("git@"):
+            github_repo_url = "https://" + github_repo_url[4:].replace(":", "/")
+        repo_url = github_repo_url or None
+    except subprocess.CalledProcessError:
+        repo_url = None
+    rel_paths = []
+    for fp in file_paths:
+        path = Path(fp)
+        if path.exists():
+            try:
+                rel_paths.append(path.resolve().relative_to(repo_root))
+            except ValueError:
+                continue
+    if not rel_paths:
+        return repo_url, git_data
+    cmd = [
+        "git",
+        "-C",
+        str(repo_root),
+        "log",
+        "--name-only",
+        "--pretty=format:%ad\t%ae",
+        "--date=format:%Y-%m-%d %H:%M:%S %z",
+        "--",
+        *[str(p) for p in rel_paths],
+    ]
+    try:
+        output = subprocess.check_output(cmd, stderr=subprocess.DEVNULL).decode().splitlines()
+    except subprocess.CalledProcessError:
+        return repo_url, git_data
+    current_date = None
+    current_email = None
+    for line in output:
+        if not line.strip():
+            continue
+        parts = line.split("\t")
+        if len(parts) == 2:
+            current_date, current_email = parts
+            continue
+        if current_date and current_email:
+            abs_path = (repo_root / line.strip()).resolve()
+            key = str(abs_path)
+            entry = git_data.setdefault(
+                key,
+                {
+                    "creation_date": current_date,
+                    "last_modified_date": current_date,
+                    "emails": {},
+                },
+            )
+            entry.setdefault("last_modified_date", current_date)
+            entry["creation_date"] = current_date
+            entry["emails"][current_email] = entry["emails"].get(current_email, 0) + 1
+    return repo_url, git_data
 def get_css() -> str:
     """CSS for git info, share buttons, and copy button."""
     return """
@@ -212,6 +306,8 @@ def process_html(
     page_url: str,
     title: str,
     src_path: str | None = None,
+    git_data: dict[str, dict[str, Any]] | None = None,
+    repo_url: str | None = None,
     default_image: str | None = None,
     default_author: str | None = None,
     keywords: str | None = None,
@@ -389,15 +485,17 @@ def process_html(
             """
             soup.body.append(script)
-    # Initialize git info with defaults
+    # Initialize git info with defaults and only call git when needed (authors or JSON-LD)
     git_info = {
         "creation_date": DEFAULT_CREATION_DATE,
         "last_modified_date": DEFAULT_MODIFIED_DATE,
     }
+    needs_git = (add_authors or add_json_ld) and src_path
-    # Add git information if source path available
-    if src_path:
-        git_info = get_git_info(src_path, add_authors=add_authors, default_author=default_author)
+    if needs_git:
+        git_info = get_git_info(
+            src_path, add_authors=add_authors, default_author=default_author, git_data=git_data, repo_url=repo_url
+        )
         # Only render git footer if we have real git history (not placeholder defaults)
         has_real_git_data = (

{mkdocs_ultralytics_plugin-0.2.1 → mkdocs_ultralytics_plugin-0.2.3}/plugin/utils.py RENAMED Viewed

@@ -2,12 +2,11 @@
 from __future__ import annotations
-import contextlib
 import re
-import subprocess
-from collections import Counter
+import threading
 from datetime import datetime
 from pathlib import Path
+from typing import Any
 import requests
 import yaml  # YAML is used for its readability and consistency with MkDocs ecosystem
@@ -16,6 +15,11 @@ from bs4 import BeautifulSoup
 WARNING = "WARNING (mkdocs_ultralytics_plugin):"
 DEFAULT_AVATAR = requests.head("https://github.com/github.png", allow_redirects=True).url
+# Shared, thread-safe cache to avoid duplicate API lookups and YAML thrash when running in parallel
+_AUTHOR_CACHE: dict[str, dict[str, str | None]] | None = None
+_AUTHOR_CACHE_MTIME: float | None = None
+_CACHE_LOCK = threading.Lock()
 def calculate_time_difference(date_string: str) -> tuple[str, str]:
     """Calculate the time difference between a given date and the current date in a human-readable format.
@@ -102,9 +106,10 @@ def get_github_username_from_email(
         you comply with GitHub's rate limits and authentication requirements when querying their API.
     """
     # First, check if the email exists in the local cache file
-    if email in cache:
-        return cache[email].get("username"), cache[email].get("avatar")
-    elif not email.strip():
+    with _CACHE_LOCK:
+        if email in cache:
+            return cache[email].get("username"), cache[email].get("avatar")
+    if not email.strip():
         if verbose:
             print(f"{WARNING} No author found for {file_path}")
         return None, None
@@ -113,13 +118,15 @@ def get_github_username_from_email(
     if email.endswith("@users.noreply.github.com"):
         username = email.split("+")[-1].split("@")[0]
         avatar = f"https://github.com/{username}.png"
-        cache[email] = {
-            "username": username,
-            "avatar": requests.head(avatar, allow_redirects=True).url,
-        }
+        avatar_url = requests.head(avatar, allow_redirects=True).url
+        with _CACHE_LOCK:
+            cache[email] = {
+                "username": username,
+                "avatar": avatar_url,
+            }
         return username, avatar
-    # If the email is not found in the cache, query GitHub REST API
+    # Fallback to GitHub REST API when not cached
     url = f"https://api.github.com/search/users?q={email}+in:email&sort=joined&order=asc"
     if verbose:
         print(f"Running GitHub REST API for author {email}")
@@ -129,20 +136,29 @@ def get_github_username_from_email(
         if data["total_count"] > 0:
             username = data["items"][0]["login"]
             avatar = data["items"][0]["avatar_url"]  # avatar_url key is correct here
-            cache[email] = {
-                "username": username,
-                "avatar": requests.head(avatar, allow_redirects=True).url,
-            }
+            avatar_url = requests.head(avatar, allow_redirects=True).url
+            with _CACHE_LOCK:
+                cache[email] = {
+                    "username": username,
+                    "avatar": avatar_url,
+                }
             return username, avatar
     if verbose:
         print(f"{WARNING} No username found for {email}")
-    cache[email] = {"username": None, "avatar": None}
+    with _CACHE_LOCK:
+        cache[email] = {"username": None, "avatar": None}
     return None, None
-def get_github_usernames_from_file(file_path: str, default_user: str | None = None) -> dict[str, dict[str, any]]:
-    """Fetch GitHub usernames associated with a file using Git Log and Git Blame commands.
+def get_github_usernames_from_file(
+    file_path: str,
+    default_user: str | None = None,
+    emails: dict[str, int] | None = None,
+    repo_url: str | None = None,
+    force_reload: bool = False,
+) -> dict[str, dict[str, Any]]:
+    """Fetch GitHub usernames associated with a file using provided Git email counts.
     Args:
         file_path (str): The path to the file for which GitHub usernames are to be retrieved.
@@ -157,66 +173,47 @@ def get_github_usernames_from_file(file_path: str, default_user: str | None = No
             - 'avatar' (str): The URL of the author's GitHub avatar.
     Examples:
-        >>> print(get_github_usernames_from_file('mkdocs.yml'))
-        {'username1': {'email': 'user@example.com', 'url': 'https://github.com/username1', 'changes': 5, 'avatar': '...'}}
+        >>> print(get_github_usernames_from_file('mkdocs.yml', emails={'user@example.com': 2}))
+        {'username1': {'email': 'user@example.com', 'url': 'https://github.com/username1', 'changes': 2, 'avatar': '...'}}
     """
-    # Fetch author emails using 'git log'
-    try:
-        authors_emails_log = (
-            subprocess.check_output(["git", "log", "--pretty=format:%ae", Path(file_path).resolve()])
-            .decode("utf-8")
-            .split("\n")
-        )
-        emails = dict(Counter(authors_emails_log))
-    except subprocess.CalledProcessError:
-        emails = {}  # Git not available or file not in git repo
-    # Fetch author emails using 'git blame'
-    with contextlib.suppress(Exception):
-        authors_emails_blame = (
-            subprocess.check_output(
-                ["git", "blame", "--line-porcelain", Path(file_path).resolve()],
-                stderr=subprocess.DEVNULL,
-            )
-            .decode("utf-8")
-            .split("\n")
-        )
-        authors_emails_blame = [line.split(" ")[1] for line in authors_emails_blame if line.startswith("author-mail")]
-        authors_emails_blame = [email.strip("<>") for email in authors_emails_blame]
-        emails_blame = dict(Counter(authors_emails_blame))
-        # Merge the two email lists, adding any missing authors from 'git blame' as a 1-commit change
-        for email in emails_blame:
-            if email not in emails:
-                emails[email] = 1  # Only add new authors from 'git blame' with a 1-commit change
+    if emails is None:
+        emails = {}
+    else:
+        emails = dict(emails)  # shallow copy to avoid mutating caller data
     # If no git info found but default_user provided, use default_user
     if not emails and default_user:
         emails[default_user] = 1
-    # Load the local cache of GitHub usernames
+    # Load the local cache of GitHub usernames once per process (thread-safe, reload if changed)
     local_cache_file = Path("docs" if Path("docs").is_dir() else "") / "mkdocs_github_authors.yaml"
-    if local_cache_file.is_file():
-        with local_cache_file.open("r") as f:
-            cache = yaml.safe_load(f) or {}
-    else:
-        cache = {}
-    try:
-        github_repo_url = (
-            subprocess.check_output(["git", "config", "--get", "remote.origin.url"]).decode("utf-8").strip()
+    global _AUTHOR_CACHE, _AUTHOR_CACHE_MTIME
+    with _CACHE_LOCK:
+        current_mtime = local_cache_file.stat().st_mtime if local_cache_file.is_file() else None
+        needs_reload = (
+            force_reload
+            or _AUTHOR_CACHE is None
+            or (_AUTHOR_CACHE_MTIME is not None and current_mtime is not None and _AUTHOR_CACHE_MTIME != current_mtime)
         )
-        if github_repo_url.endswith(".git"):
-            github_repo_url = github_repo_url[:-4]
-        if github_repo_url.startswith("git@"):
-            github_repo_url = "https://" + github_repo_url[4:].replace(":", "/")
-    except subprocess.CalledProcessError:
-        github_repo_url = "https://github.com/ultralytics/ultralytics"  # Fallback URL
+        if needs_reload:
+            if local_cache_file.is_file():
+                with local_cache_file.open("r") as f:
+                    _AUTHOR_CACHE = yaml.safe_load(f) or {}
+                _AUTHOR_CACHE_MTIME = local_cache_file.stat().st_mtime
+            else:
+                _AUTHOR_CACHE = {}
+                _AUTHOR_CACHE_MTIME = None
+        cache = _AUTHOR_CACHE
+    github_repo_url = repo_url or "https://github.com/ultralytics/ultralytics"
     info = {}
+    cache_updated = False
     for email, changes in emails.items():
         if not email and default_user:
             email = default_user
+        was_cached = email in cache
+        prev_entry = cache.get(email)
         username, avatar = get_github_username_from_email(email, cache, file_path)
         # If we can't determine the user URL, revert to the GitHub file URL
         user_url = f"https://github.com/{username}" if username else github_repo_url
@@ -226,9 +223,14 @@ def get_github_usernames_from_file(file_path: str, default_user: str | None = No
             "changes": changes,
             "avatar": avatar or DEFAULT_AVATAR,
         }
-    # Save the local cache of GitHub usernames and avatar URLs
-    with local_cache_file.open("w") as f:
-        yaml.safe_dump(cache, f)
+        cache_updated = cache_updated or (email in cache and not was_cached) or cache.get(email) != prev_entry
+    # Save the local cache of GitHub usernames and avatar URLs if updated
+    if cache_updated:
+        with _CACHE_LOCK:
+            _AUTHOR_CACHE = cache
+            with local_cache_file.open("w") as f:
+                yaml.safe_dump(cache, f)
+            _AUTHOR_CACHE_MTIME = local_cache_file.stat().st_mtime
     return info

mkdocs_ultralytics_plugin-0.2.1/plugin/postprocess.py DELETED Viewed

@@ -1,157 +0,0 @@
-# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
-"""Postprocess MkDocs/Zensical site by adding metadata, git info, and social features."""
-from __future__ import annotations
-from pathlib import Path
-from plugin.processor import process_html
-def process_html_file(
-    html_path: Path,
-    site_dir: Path,
-    md_index: dict[str, str],
-    site_url: str = "",
-    default_image: str | None = None,
-    default_author: str | None = None,
-    add_desc: bool = True,
-    add_image: bool = True,
-    add_keywords: bool = True,
-    add_share_buttons: bool = True,
-    add_authors: bool = False,
-    add_json_ld: bool = False,
-    add_css: bool = True,
-    add_copy_llm: bool = True,
-    verbose: bool = False,
-) -> bool:
-    """Process a single HTML file by delegating to shared processor.
-    Returns:
-        bool: True if file was successfully processed and written, False otherwise.
-    """
-    from bs4 import BeautifulSoup
-    try:
-        html = html_path.read_text(encoding="utf-8")
-    except (UnicodeDecodeError, FileNotFoundError) as e:
-        if verbose:
-            print(f"Error reading {html_path}: {e}")
-        return False
-    soup = BeautifulSoup(html, "html.parser")
-    # Get page URL - calculate relative path from site_dir
-    rel_path = html_path.relative_to(site_dir).as_posix()
-    page_url = f"{site_url.rstrip('/')}/{rel_path}".replace("/index.html", "/")
-    # Get title
-    title = soup.find("h1").text if soup.find("h1") else soup.title.string if soup.title else ""
-    # Extract keywords from existing meta tag if present
-    keywords = None
-    if meta_keywords := soup.find("meta", attrs={"name": "keywords"}):
-        keywords = meta_keywords.get("content")
-    # Find source markdown file from prebuilt index using relative path
-    html_rel = html_path.relative_to(site_dir).with_suffix("").as_posix()
-    if html_rel.endswith("/index"):
-        html_rel = html_rel[:-6]  # Remove /index suffix
-    src_path = md_index.get(html_rel or "index") or md_index.get(f"{html_rel}/index")
-    # Process HTML
-    processed_html = process_html(
-        html=html,
-        page_url=page_url,
-        title=title,
-        src_path=src_path,
-        default_image=default_image,
-        default_author=default_author,
-        keywords=keywords,
-        add_desc=add_desc,
-        add_image=add_image,
-        add_keywords=add_keywords,
-        add_share_buttons=add_share_buttons,
-        add_authors=add_authors,
-        add_json_ld=add_json_ld,
-        add_css=add_css,
-        add_copy_llm=add_copy_llm,
-    )
-    # Write back
-    try:
-        html_path.write_text(processed_html, encoding="utf-8")
-        if verbose:
-            print(f"Processed: {html_path.relative_to(site_dir)}")
-        return True
-    except (OSError, PermissionError) as e:
-        if verbose:
-            print(f"Error writing {html_path}: {e}")
-        return False
-def postprocess_site(
-    site_dir: str | Path = "site",
-    docs_dir: str | Path = "docs",
-    site_url: str = "",
-    default_image: str | None = None,
-    default_author: str | None = None,
-    add_desc: bool = True,
-    add_image: bool = True,
-    add_keywords: bool = True,
-    add_share_buttons: bool = True,
-    add_authors: bool = False,
-    add_json_ld: bool = False,
-    add_css: bool = True,
-    add_copy_llm: bool = True,
-    verbose: bool = True,
-) -> None:
-    """Process all HTML files in the site directory."""
-    site_dir = Path(site_dir)
-    docs_dir = Path(docs_dir)
-    if not site_dir.exists():
-        print(f"Site directory not found: {site_dir}")
-        return
-    html_files = list(site_dir.rglob("*.html"))
-    if not html_files:
-        print(f"No HTML files found in {site_dir}")
-        return
-    # Build markdown index once (O(N) instead of O(N²)) using relative paths as keys
-    md_index = {}
-    if docs_dir.exists():
-        for md_file in docs_dir.rglob("*.md"):
-            rel_path = md_file.relative_to(docs_dir).with_suffix("").as_posix()
-            md_index[rel_path] = str(md_file)
-    print(f"Processing {len(html_files)} HTML files in {site_dir}")
-    processed = 0
-    for html_file in html_files:
-        success = process_html_file(
-            html_file,
-            site_dir,
-            md_index,
-            site_url=site_url,
-            default_image=default_image,
-            default_author=default_author,
-            add_desc=add_desc,
-            add_image=add_image,
-            add_keywords=add_keywords,
-            add_share_buttons=add_share_buttons,
-            add_authors=add_authors,
-            add_json_ld=add_json_ld,
-            add_css=add_css,
-            add_copy_llm=add_copy_llm,
-            verbose=verbose,
-        )
-        if success:
-            processed += 1
-    print(f"✅ Postprocessing complete: {processed}/{len(html_files)} files processed")
-if __name__ == "__main__":
-    postprocess_site()