PyPI - mkdocs-ultralytics-plugin - Versions diffs - 0.2.2__tar.gz → 0.2.3__tar.gz - Mend

mkdocs-ultralytics-plugin 0.2.2tar.gz → 0.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mkdocs-ultralytics-plugin
-Version: 0.2.2
+Version: 0.2.3
 Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
 Author-email: Glenn Jocher <hello@ultralytics.com>
 Maintainer-email: Ultralytics <hello@ultralytics.com>

{mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mkdocs-ultralytics-plugin
-Version: 0.2.2
+Version: 0.2.3
 Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
 Author-email: Glenn Jocher <hello@ultralytics.com>
 Maintainer-email: Ultralytics <hello@ultralytics.com>

{mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/plugin/__init__.py RENAMED Viewed

@@ -1,6 +1,6 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
-__version__ = "0.2.2"
+__version__ = "0.2.3"
 from .main import MetaPlugin
 from .postprocess import postprocess_site

{mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/plugin/postprocess.py RENAMED Viewed

@@ -3,8 +3,11 @@
 from __future__ import annotations
+import os
 from collections.abc import Callable
+from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
 from pathlib import Path
+from typing import Any
 try:
     from ultralytics.utils import TQDM  # progress bars
@@ -14,6 +17,39 @@ except ImportError:
 import plugin.processor as processor
 from plugin.processor import process_html
+# Shared worker state for process pools (avoids re-pickling large read-only data per task)
+_WORKER_STATE: dict[str, Any] | None = None
+def _set_worker_state(state: dict[str, Any]) -> None:
+    global _WORKER_STATE
+    _WORKER_STATE = state
+def _process_file(html_file: Path) -> bool:
+    if _WORKER_STATE is None:
+        raise RuntimeError("Worker state not initialized")
+    return process_html_file(
+        html_file,
+        _WORKER_STATE["site_dir"],
+        _WORKER_STATE["md_index"],
+        _WORKER_STATE["git_data"],
+        _WORKER_STATE["repo_url"],
+        site_url=_WORKER_STATE["site_url"],
+        default_image=_WORKER_STATE["default_image"],
+        default_author=_WORKER_STATE["default_author"],
+        add_desc=_WORKER_STATE["add_desc"],
+        add_image=_WORKER_STATE["add_image"],
+        add_keywords=_WORKER_STATE["add_keywords"],
+        add_share_buttons=_WORKER_STATE["add_share_buttons"],
+        add_authors=_WORKER_STATE["add_authors"],
+        add_json_ld=_WORKER_STATE["add_json_ld"],
+        add_css=_WORKER_STATE["add_css"],
+        add_copy_llm=_WORKER_STATE["add_copy_llm"],
+        verbose=_WORKER_STATE["verbose"],
+        log=None,
+    )
 def process_html_file(
     html_path: Path,
@@ -115,6 +151,8 @@ def postprocess_site(
     add_css: bool = True,
     add_copy_llm: bool = True,
     verbose: bool = True,
+    use_processes: bool = True,
+    workers: int | None = None,
 ) -> None:
     """Process all HTML files in the site directory."""
     site_dir = Path(site_dir)
@@ -129,14 +167,17 @@ def postprocess_site(
         print(f"No HTML files found in {site_dir}")
         return
+    worker_count = min(os.cpu_count() or 1, workers or os.cpu_count() or 1)
     # Build markdown index once (O(N) instead of O(N²)) using relative paths as keys
-    md_index = {}
-    if docs_dir.exists():
-        for md_file in docs_dir.rglob("*.md"):
-            rel_path = md_file.relative_to(docs_dir).with_suffix("").as_posix()
-            md_index[rel_path] = str(md_file)
+    md_index = (
+        {md.relative_to(docs_dir).with_suffix("").as_posix(): str(md) for md in docs_dir.rglob("*.md")}
+        if docs_dir.exists()
+        else {}
+    )
-    print(f"Processing {len(html_files)} HTML files in {site_dir}")
+    mode = "process" if use_processes else "thread"
+    print(f"Processing {len(html_files)} HTML files in {site_dir} with {worker_count} {mode} worker(s)")
     processed = 0
     repo_url = None
@@ -144,32 +185,66 @@ def postprocess_site(
     if (add_authors or add_json_ld) and md_index:
         repo_url, git_data = processor.build_git_map(list(md_index.values()))
-    progress = TQDM(html_files, desc="Postprocessing", unit="file", disable=not verbose) if TQDM else None
-    log_fn = (progress.write if verbose and progress else print) if verbose else None
-    iterator = progress if progress else html_files
-    for html_file in iterator:
-        success = process_html_file(
-            html_file,
-            site_dir,
-            md_index,
-            git_data,
-            repo_url,
-            site_url=site_url,
-            default_image=default_image,
-            default_author=default_author,
-            add_desc=add_desc,
-            add_image=add_image,
-            add_keywords=add_keywords,
-            add_share_buttons=add_share_buttons,
-            add_authors=add_authors,
-            add_json_ld=add_json_ld,
-            add_css=add_css,
-            add_copy_llm=add_copy_llm,
-            verbose=verbose,
-            log=log_fn,
-        )
-        if success:
-            processed += 1
+    progress = TQDM(total=len(html_files), desc="Postprocessing", unit="file", disable=not verbose) if TQDM else None
+    # Enable logging only for the synchronous path; pools run without per-task log_fn to remain pickle-safe.
+    log_fn = (progress.write if verbose and progress else print if verbose else None) if worker_count == 1 else None
+    task_kwargs = dict(
+        site_dir=site_dir,
+        md_index=md_index,
+        git_data=git_data,
+        repo_url=repo_url,
+        site_url=site_url,
+        default_image=default_image,
+        default_author=default_author,
+        add_desc=add_desc,
+        add_image=add_image,
+        add_keywords=add_keywords,
+        add_share_buttons=add_share_buttons,
+        add_authors=add_authors,
+        add_json_ld=add_json_ld,
+        add_css=add_css,
+        add_copy_llm=add_copy_llm,
+        verbose=verbose,
+    )
+    if worker_count == 1:
+        for html_file in html_files:
+            success = process_html_file(html_file, **task_kwargs, log=log_fn)
+            processed += bool(success)
+            if progress:
+                progress.update(1)
+    else:
+        if use_processes:
+            state = {**task_kwargs}
+            executor_context = ProcessPoolExecutor(
+                max_workers=worker_count, initializer=_set_worker_state, initargs=(state,)
+            )
+            def submit_fn(ex, f):
+                return ex.submit(_process_file, f)
+        else:
+            executor_context = ThreadPoolExecutor(max_workers=worker_count)
+            def submit_fn(ex, f):
+                return ex.submit(process_html_file, f, **task_kwargs, log=log_fn)
+        with executor_context as executor:
+            future_to_file = {submit_fn(executor, html_file): html_file for html_file in html_files}
+            for future in as_completed(future_to_file):
+                html_file = future_to_file[future]
+                try:
+                    success = future.result()
+                except Exception as e:
+                    success = False
+                    if verbose:
+                        (log_fn or print)(f"Error processing {html_file}: {e}")
+                if success:
+                    processed += 1
+                if progress:
+                    progress.update(1)
     if progress:
         progress.close()

{mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/plugin/utils.py RENAMED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
 import re
+import threading
 from datetime import datetime
 from pathlib import Path
 from typing import Any
@@ -14,6 +15,11 @@ from bs4 import BeautifulSoup
 WARNING = "WARNING (mkdocs_ultralytics_plugin):"
 DEFAULT_AVATAR = requests.head("https://github.com/github.png", allow_redirects=True).url
+# Shared, thread-safe cache to avoid duplicate API lookups and YAML thrash when running in parallel
+_AUTHOR_CACHE: dict[str, dict[str, str | None]] | None = None
+_AUTHOR_CACHE_MTIME: float | None = None
+_CACHE_LOCK = threading.Lock()
 def calculate_time_difference(date_string: str) -> tuple[str, str]:
     """Calculate the time difference between a given date and the current date in a human-readable format.
@@ -100,9 +106,10 @@ def get_github_username_from_email(
         you comply with GitHub's rate limits and authentication requirements when querying their API.
     """
     # First, check if the email exists in the local cache file
-    if email in cache:
-        return cache[email].get("username"), cache[email].get("avatar")
-    elif not email.strip():
+    with _CACHE_LOCK:
+        if email in cache:
+            return cache[email].get("username"), cache[email].get("avatar")
+    if not email.strip():
         if verbose:
             print(f"{WARNING} No author found for {file_path}")
         return None, None
@@ -111,13 +118,15 @@ def get_github_username_from_email(
     if email.endswith("@users.noreply.github.com"):
         username = email.split("+")[-1].split("@")[0]
         avatar = f"https://github.com/{username}.png"
-        cache[email] = {
-            "username": username,
-            "avatar": requests.head(avatar, allow_redirects=True).url,
-        }
+        avatar_url = requests.head(avatar, allow_redirects=True).url
+        with _CACHE_LOCK:
+            cache[email] = {
+                "username": username,
+                "avatar": avatar_url,
+            }
         return username, avatar
-    # If the email is not found in the cache, query GitHub REST API
+    # Fallback to GitHub REST API when not cached
     url = f"https://api.github.com/search/users?q={email}+in:email&sort=joined&order=asc"
     if verbose:
         print(f"Running GitHub REST API for author {email}")
@@ -127,15 +136,18 @@ def get_github_username_from_email(
         if data["total_count"] > 0:
             username = data["items"][0]["login"]
             avatar = data["items"][0]["avatar_url"]  # avatar_url key is correct here
-            cache[email] = {
-                "username": username,
-                "avatar": requests.head(avatar, allow_redirects=True).url,
-            }
+            avatar_url = requests.head(avatar, allow_redirects=True).url
+            with _CACHE_LOCK:
+                cache[email] = {
+                    "username": username,
+                    "avatar": avatar_url,
+                }
             return username, avatar
     if verbose:
         print(f"{WARNING} No username found for {email}")
-    cache[email] = {"username": None, "avatar": None}
+    with _CACHE_LOCK:
+        cache[email] = {"username": None, "avatar": None}
     return None, None
@@ -144,6 +156,7 @@ def get_github_usernames_from_file(
     default_user: str | None = None,
     emails: dict[str, int] | None = None,
     repo_url: str | None = None,
+    force_reload: bool = False,
 ) -> dict[str, dict[str, Any]]:
     """Fetch GitHub usernames associated with a file using provided Git email counts.
@@ -172,20 +185,35 @@ def get_github_usernames_from_file(
     if not emails and default_user:
         emails[default_user] = 1
-    # Load the local cache of GitHub usernames
+    # Load the local cache of GitHub usernames once per process (thread-safe, reload if changed)
     local_cache_file = Path("docs" if Path("docs").is_dir() else "") / "mkdocs_github_authors.yaml"
-    if local_cache_file.is_file():
-        with local_cache_file.open("r") as f:
-            cache = yaml.safe_load(f) or {}
-    else:
-        cache = {}
+    global _AUTHOR_CACHE, _AUTHOR_CACHE_MTIME
+    with _CACHE_LOCK:
+        current_mtime = local_cache_file.stat().st_mtime if local_cache_file.is_file() else None
+        needs_reload = (
+            force_reload
+            or _AUTHOR_CACHE is None
+            or (_AUTHOR_CACHE_MTIME is not None and current_mtime is not None and _AUTHOR_CACHE_MTIME != current_mtime)
+        )
+        if needs_reload:
+            if local_cache_file.is_file():
+                with local_cache_file.open("r") as f:
+                    _AUTHOR_CACHE = yaml.safe_load(f) or {}
+                _AUTHOR_CACHE_MTIME = local_cache_file.stat().st_mtime
+            else:
+                _AUTHOR_CACHE = {}
+                _AUTHOR_CACHE_MTIME = None
+        cache = _AUTHOR_CACHE
     github_repo_url = repo_url or "https://github.com/ultralytics/ultralytics"
     info = {}
+    cache_updated = False
     for email, changes in emails.items():
         if not email and default_user:
             email = default_user
+        was_cached = email in cache
+        prev_entry = cache.get(email)
         username, avatar = get_github_username_from_email(email, cache, file_path)
         # If we can't determine the user URL, revert to the GitHub file URL
         user_url = f"https://github.com/{username}" if username else github_repo_url
@@ -195,9 +223,14 @@ def get_github_usernames_from_file(
             "changes": changes,
             "avatar": avatar or DEFAULT_AVATAR,
         }
-    # Save the local cache of GitHub usernames and avatar URLs
-    with local_cache_file.open("w") as f:
-        yaml.safe_dump(cache, f)
+        cache_updated = cache_updated or (email in cache and not was_cached) or cache.get(email) != prev_entry
+    # Save the local cache of GitHub usernames and avatar URLs if updated
+    if cache_updated:
+        with _CACHE_LOCK:
+            _AUTHOR_CACHE = cache
+            with local_cache_file.open("w") as f:
+                yaml.safe_dump(cache, f)
+            _AUTHOR_CACHE_MTIME = local_cache_file.stat().st_mtime
     return info