mkdocs-ultralytics-plugin 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (16) hide show
  1. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/PKG-INFO +1 -1
  2. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/PKG-INFO +1 -1
  3. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/plugin/__init__.py +1 -1
  4. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/plugin/postprocess.py +107 -32
  5. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/plugin/utils.py +56 -23
  6. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/LICENSE +0 -0
  7. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/README.md +0 -0
  8. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/SOURCES.txt +0 -0
  9. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/dependency_links.txt +0 -0
  10. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/entry_points.txt +0 -0
  11. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/requires.txt +0 -0
  12. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/top_level.txt +0 -0
  13. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/plugin/main.py +0 -0
  14. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/plugin/processor.py +0 -0
  15. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/pyproject.toml +0 -0
  16. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mkdocs-ultralytics-plugin
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
5
5
  Author-email: Glenn Jocher <hello@ultralytics.com>
6
6
  Maintainer-email: Ultralytics <hello@ultralytics.com>
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mkdocs-ultralytics-plugin
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
5
5
  Author-email: Glenn Jocher <hello@ultralytics.com>
6
6
  Maintainer-email: Ultralytics <hello@ultralytics.com>
@@ -1,6 +1,6 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
- __version__ = "0.2.2"
3
+ __version__ = "0.2.3"
4
4
 
5
5
  from .main import MetaPlugin
6
6
  from .postprocess import postprocess_site
@@ -3,8 +3,11 @@
3
3
 
4
4
  from __future__ import annotations
5
5
 
6
+ import os
6
7
  from collections.abc import Callable
8
+ from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
7
9
  from pathlib import Path
10
+ from typing import Any
8
11
 
9
12
  try:
10
13
  from ultralytics.utils import TQDM # progress bars
@@ -14,6 +17,39 @@ except ImportError:
14
17
  import plugin.processor as processor
15
18
  from plugin.processor import process_html
16
19
 
20
+ # Shared worker state for process pools (avoids re-pickling large read-only data per task)
21
+ _WORKER_STATE: dict[str, Any] | None = None
22
+
23
+
24
+ def _set_worker_state(state: dict[str, Any]) -> None:
25
+ global _WORKER_STATE
26
+ _WORKER_STATE = state
27
+
28
+
29
+ def _process_file(html_file: Path) -> bool:
30
+ if _WORKER_STATE is None:
31
+ raise RuntimeError("Worker state not initialized")
32
+ return process_html_file(
33
+ html_file,
34
+ _WORKER_STATE["site_dir"],
35
+ _WORKER_STATE["md_index"],
36
+ _WORKER_STATE["git_data"],
37
+ _WORKER_STATE["repo_url"],
38
+ site_url=_WORKER_STATE["site_url"],
39
+ default_image=_WORKER_STATE["default_image"],
40
+ default_author=_WORKER_STATE["default_author"],
41
+ add_desc=_WORKER_STATE["add_desc"],
42
+ add_image=_WORKER_STATE["add_image"],
43
+ add_keywords=_WORKER_STATE["add_keywords"],
44
+ add_share_buttons=_WORKER_STATE["add_share_buttons"],
45
+ add_authors=_WORKER_STATE["add_authors"],
46
+ add_json_ld=_WORKER_STATE["add_json_ld"],
47
+ add_css=_WORKER_STATE["add_css"],
48
+ add_copy_llm=_WORKER_STATE["add_copy_llm"],
49
+ verbose=_WORKER_STATE["verbose"],
50
+ log=None,
51
+ )
52
+
17
53
 
18
54
  def process_html_file(
19
55
  html_path: Path,
@@ -115,6 +151,8 @@ def postprocess_site(
115
151
  add_css: bool = True,
116
152
  add_copy_llm: bool = True,
117
153
  verbose: bool = True,
154
+ use_processes: bool = True,
155
+ workers: int | None = None,
118
156
  ) -> None:
119
157
  """Process all HTML files in the site directory."""
120
158
  site_dir = Path(site_dir)
@@ -129,14 +167,17 @@ def postprocess_site(
129
167
  print(f"No HTML files found in {site_dir}")
130
168
  return
131
169
 
170
+ worker_count = min(os.cpu_count() or 1, workers or os.cpu_count() or 1)
171
+
132
172
  # Build markdown index once (O(N) instead of O(N²)) using relative paths as keys
133
- md_index = {}
134
- if docs_dir.exists():
135
- for md_file in docs_dir.rglob("*.md"):
136
- rel_path = md_file.relative_to(docs_dir).with_suffix("").as_posix()
137
- md_index[rel_path] = str(md_file)
173
+ md_index = (
174
+ {md.relative_to(docs_dir).with_suffix("").as_posix(): str(md) for md in docs_dir.rglob("*.md")}
175
+ if docs_dir.exists()
176
+ else {}
177
+ )
138
178
 
139
- print(f"Processing {len(html_files)} HTML files in {site_dir}")
179
+ mode = "process" if use_processes else "thread"
180
+ print(f"Processing {len(html_files)} HTML files in {site_dir} with {worker_count} {mode} worker(s)")
140
181
 
141
182
  processed = 0
142
183
  repo_url = None
@@ -144,32 +185,66 @@ def postprocess_site(
144
185
  if (add_authors or add_json_ld) and md_index:
145
186
  repo_url, git_data = processor.build_git_map(list(md_index.values()))
146
187
 
147
- progress = TQDM(html_files, desc="Postprocessing", unit="file", disable=not verbose) if TQDM else None
148
- log_fn = (progress.write if verbose and progress else print) if verbose else None
149
- iterator = progress if progress else html_files
150
- for html_file in iterator:
151
- success = process_html_file(
152
- html_file,
153
- site_dir,
154
- md_index,
155
- git_data,
156
- repo_url,
157
- site_url=site_url,
158
- default_image=default_image,
159
- default_author=default_author,
160
- add_desc=add_desc,
161
- add_image=add_image,
162
- add_keywords=add_keywords,
163
- add_share_buttons=add_share_buttons,
164
- add_authors=add_authors,
165
- add_json_ld=add_json_ld,
166
- add_css=add_css,
167
- add_copy_llm=add_copy_llm,
168
- verbose=verbose,
169
- log=log_fn,
170
- )
171
- if success:
172
- processed += 1
188
+ progress = TQDM(total=len(html_files), desc="Postprocessing", unit="file", disable=not verbose) if TQDM else None
189
+ # Enable logging only for the synchronous path; pools run without per-task log_fn to remain pickle-safe.
190
+ log_fn = (progress.write if verbose and progress else print if verbose else None) if worker_count == 1 else None
191
+
192
+ task_kwargs = dict(
193
+ site_dir=site_dir,
194
+ md_index=md_index,
195
+ git_data=git_data,
196
+ repo_url=repo_url,
197
+ site_url=site_url,
198
+ default_image=default_image,
199
+ default_author=default_author,
200
+ add_desc=add_desc,
201
+ add_image=add_image,
202
+ add_keywords=add_keywords,
203
+ add_share_buttons=add_share_buttons,
204
+ add_authors=add_authors,
205
+ add_json_ld=add_json_ld,
206
+ add_css=add_css,
207
+ add_copy_llm=add_copy_llm,
208
+ verbose=verbose,
209
+ )
210
+
211
+ if worker_count == 1:
212
+ for html_file in html_files:
213
+ success = process_html_file(html_file, **task_kwargs, log=log_fn)
214
+ processed += bool(success)
215
+ if progress:
216
+ progress.update(1)
217
+ else:
218
+ if use_processes:
219
+ state = {**task_kwargs}
220
+ executor_context = ProcessPoolExecutor(
221
+ max_workers=worker_count, initializer=_set_worker_state, initargs=(state,)
222
+ )
223
+
224
+ def submit_fn(ex, f):
225
+ return ex.submit(_process_file, f)
226
+ else:
227
+ executor_context = ThreadPoolExecutor(max_workers=worker_count)
228
+
229
+ def submit_fn(ex, f):
230
+ return ex.submit(process_html_file, f, **task_kwargs, log=log_fn)
231
+
232
+ with executor_context as executor:
233
+ future_to_file = {submit_fn(executor, html_file): html_file for html_file in html_files}
234
+
235
+ for future in as_completed(future_to_file):
236
+ html_file = future_to_file[future]
237
+ try:
238
+ success = future.result()
239
+ except Exception as e:
240
+ success = False
241
+ if verbose:
242
+ (log_fn or print)(f"Error processing {html_file}: {e}")
243
+ if success:
244
+ processed += 1
245
+ if progress:
246
+ progress.update(1)
247
+
173
248
  if progress:
174
249
  progress.close()
175
250
 
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import re
6
+ import threading
6
7
  from datetime import datetime
7
8
  from pathlib import Path
8
9
  from typing import Any
@@ -14,6 +15,11 @@ from bs4 import BeautifulSoup
14
15
  WARNING = "WARNING (mkdocs_ultralytics_plugin):"
15
16
  DEFAULT_AVATAR = requests.head("https://github.com/github.png", allow_redirects=True).url
16
17
 
18
+ # Shared, thread-safe cache to avoid duplicate API lookups and YAML thrash when running in parallel
19
+ _AUTHOR_CACHE: dict[str, dict[str, str | None]] | None = None
20
+ _AUTHOR_CACHE_MTIME: float | None = None
21
+ _CACHE_LOCK = threading.Lock()
22
+
17
23
 
18
24
  def calculate_time_difference(date_string: str) -> tuple[str, str]:
19
25
  """Calculate the time difference between a given date and the current date in a human-readable format.
@@ -100,9 +106,10 @@ def get_github_username_from_email(
100
106
  you comply with GitHub's rate limits and authentication requirements when querying their API.
101
107
  """
102
108
  # First, check if the email exists in the local cache file
103
- if email in cache:
104
- return cache[email].get("username"), cache[email].get("avatar")
105
- elif not email.strip():
109
+ with _CACHE_LOCK:
110
+ if email in cache:
111
+ return cache[email].get("username"), cache[email].get("avatar")
112
+ if not email.strip():
106
113
  if verbose:
107
114
  print(f"{WARNING} No author found for {file_path}")
108
115
  return None, None
@@ -111,13 +118,15 @@ def get_github_username_from_email(
111
118
  if email.endswith("@users.noreply.github.com"):
112
119
  username = email.split("+")[-1].split("@")[0]
113
120
  avatar = f"https://github.com/{username}.png"
114
- cache[email] = {
115
- "username": username,
116
- "avatar": requests.head(avatar, allow_redirects=True).url,
117
- }
121
+ avatar_url = requests.head(avatar, allow_redirects=True).url
122
+ with _CACHE_LOCK:
123
+ cache[email] = {
124
+ "username": username,
125
+ "avatar": avatar_url,
126
+ }
118
127
  return username, avatar
119
128
 
120
- # If the email is not found in the cache, query GitHub REST API
129
+ # Fallback to GitHub REST API when not cached
121
130
  url = f"https://api.github.com/search/users?q={email}+in:email&sort=joined&order=asc"
122
131
  if verbose:
123
132
  print(f"Running GitHub REST API for author {email}")
@@ -127,15 +136,18 @@ def get_github_username_from_email(
127
136
  if data["total_count"] > 0:
128
137
  username = data["items"][0]["login"]
129
138
  avatar = data["items"][0]["avatar_url"] # avatar_url key is correct here
130
- cache[email] = {
131
- "username": username,
132
- "avatar": requests.head(avatar, allow_redirects=True).url,
133
- }
139
+ avatar_url = requests.head(avatar, allow_redirects=True).url
140
+ with _CACHE_LOCK:
141
+ cache[email] = {
142
+ "username": username,
143
+ "avatar": avatar_url,
144
+ }
134
145
  return username, avatar
135
146
 
136
147
  if verbose:
137
148
  print(f"{WARNING} No username found for {email}")
138
- cache[email] = {"username": None, "avatar": None}
149
+ with _CACHE_LOCK:
150
+ cache[email] = {"username": None, "avatar": None}
139
151
  return None, None
140
152
 
141
153
 
@@ -144,6 +156,7 @@ def get_github_usernames_from_file(
144
156
  default_user: str | None = None,
145
157
  emails: dict[str, int] | None = None,
146
158
  repo_url: str | None = None,
159
+ force_reload: bool = False,
147
160
  ) -> dict[str, dict[str, Any]]:
148
161
  """Fetch GitHub usernames associated with a file using provided Git email counts.
149
162
 
@@ -172,20 +185,35 @@ def get_github_usernames_from_file(
172
185
  if not emails and default_user:
173
186
  emails[default_user] = 1
174
187
 
175
- # Load the local cache of GitHub usernames
188
+ # Load the local cache of GitHub usernames once per process (thread-safe, reload if changed)
176
189
  local_cache_file = Path("docs" if Path("docs").is_dir() else "") / "mkdocs_github_authors.yaml"
177
- if local_cache_file.is_file():
178
- with local_cache_file.open("r") as f:
179
- cache = yaml.safe_load(f) or {}
180
- else:
181
- cache = {}
190
+ global _AUTHOR_CACHE, _AUTHOR_CACHE_MTIME
191
+ with _CACHE_LOCK:
192
+ current_mtime = local_cache_file.stat().st_mtime if local_cache_file.is_file() else None
193
+ needs_reload = (
194
+ force_reload
195
+ or _AUTHOR_CACHE is None
196
+ or (_AUTHOR_CACHE_MTIME is not None and current_mtime is not None and _AUTHOR_CACHE_MTIME != current_mtime)
197
+ )
198
+ if needs_reload:
199
+ if local_cache_file.is_file():
200
+ with local_cache_file.open("r") as f:
201
+ _AUTHOR_CACHE = yaml.safe_load(f) or {}
202
+ _AUTHOR_CACHE_MTIME = local_cache_file.stat().st_mtime
203
+ else:
204
+ _AUTHOR_CACHE = {}
205
+ _AUTHOR_CACHE_MTIME = None
206
+ cache = _AUTHOR_CACHE
182
207
 
183
208
  github_repo_url = repo_url or "https://github.com/ultralytics/ultralytics"
184
209
 
185
210
  info = {}
211
+ cache_updated = False
186
212
  for email, changes in emails.items():
187
213
  if not email and default_user:
188
214
  email = default_user
215
+ was_cached = email in cache
216
+ prev_entry = cache.get(email)
189
217
  username, avatar = get_github_username_from_email(email, cache, file_path)
190
218
  # If we can't determine the user URL, revert to the GitHub file URL
191
219
  user_url = f"https://github.com/{username}" if username else github_repo_url
@@ -195,9 +223,14 @@ def get_github_usernames_from_file(
195
223
  "changes": changes,
196
224
  "avatar": avatar or DEFAULT_AVATAR,
197
225
  }
198
-
199
- # Save the local cache of GitHub usernames and avatar URLs
200
- with local_cache_file.open("w") as f:
201
- yaml.safe_dump(cache, f)
226
+ cache_updated = cache_updated or (email in cache and not was_cached) or cache.get(email) != prev_entry
227
+
228
+ # Save the local cache of GitHub usernames and avatar URLs if updated
229
+ if cache_updated:
230
+ with _CACHE_LOCK:
231
+ _AUTHOR_CACHE = cache
232
+ with local_cache_file.open("w") as f:
233
+ yaml.safe_dump(cache, f)
234
+ _AUTHOR_CACHE_MTIME = local_cache_file.stat().st_mtime
202
235
 
203
236
  return info