mkdocs-ultralytics-plugin 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/PKG-INFO +1 -1
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/PKG-INFO +1 -1
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/plugin/__init__.py +1 -1
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/plugin/postprocess.py +107 -32
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/plugin/utils.py +56 -23
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/LICENSE +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/README.md +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/SOURCES.txt +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/dependency_links.txt +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/entry_points.txt +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/requires.txt +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/mkdocs_ultralytics_plugin.egg-info/top_level.txt +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/plugin/main.py +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/plugin/processor.py +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/pyproject.toml +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.3}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mkdocs-ultralytics-plugin
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
|
|
5
5
|
Author-email: Glenn Jocher <hello@ultralytics.com>
|
|
6
6
|
Maintainer-email: Ultralytics <hello@ultralytics.com>
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mkdocs-ultralytics-plugin
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
|
|
5
5
|
Author-email: Glenn Jocher <hello@ultralytics.com>
|
|
6
6
|
Maintainer-email: Ultralytics <hello@ultralytics.com>
|
|
@@ -3,8 +3,11 @@
|
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
|
+
import os
|
|
6
7
|
from collections.abc import Callable
|
|
8
|
+
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
|
|
7
9
|
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
8
11
|
|
|
9
12
|
try:
|
|
10
13
|
from ultralytics.utils import TQDM # progress bars
|
|
@@ -14,6 +17,39 @@ except ImportError:
|
|
|
14
17
|
import plugin.processor as processor
|
|
15
18
|
from plugin.processor import process_html
|
|
16
19
|
|
|
20
|
+
# Shared worker state for process pools (avoids re-pickling large read-only data per task)
|
|
21
|
+
_WORKER_STATE: dict[str, Any] | None = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _set_worker_state(state: dict[str, Any]) -> None:
|
|
25
|
+
global _WORKER_STATE
|
|
26
|
+
_WORKER_STATE = state
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _process_file(html_file: Path) -> bool:
|
|
30
|
+
if _WORKER_STATE is None:
|
|
31
|
+
raise RuntimeError("Worker state not initialized")
|
|
32
|
+
return process_html_file(
|
|
33
|
+
html_file,
|
|
34
|
+
_WORKER_STATE["site_dir"],
|
|
35
|
+
_WORKER_STATE["md_index"],
|
|
36
|
+
_WORKER_STATE["git_data"],
|
|
37
|
+
_WORKER_STATE["repo_url"],
|
|
38
|
+
site_url=_WORKER_STATE["site_url"],
|
|
39
|
+
default_image=_WORKER_STATE["default_image"],
|
|
40
|
+
default_author=_WORKER_STATE["default_author"],
|
|
41
|
+
add_desc=_WORKER_STATE["add_desc"],
|
|
42
|
+
add_image=_WORKER_STATE["add_image"],
|
|
43
|
+
add_keywords=_WORKER_STATE["add_keywords"],
|
|
44
|
+
add_share_buttons=_WORKER_STATE["add_share_buttons"],
|
|
45
|
+
add_authors=_WORKER_STATE["add_authors"],
|
|
46
|
+
add_json_ld=_WORKER_STATE["add_json_ld"],
|
|
47
|
+
add_css=_WORKER_STATE["add_css"],
|
|
48
|
+
add_copy_llm=_WORKER_STATE["add_copy_llm"],
|
|
49
|
+
verbose=_WORKER_STATE["verbose"],
|
|
50
|
+
log=None,
|
|
51
|
+
)
|
|
52
|
+
|
|
17
53
|
|
|
18
54
|
def process_html_file(
|
|
19
55
|
html_path: Path,
|
|
@@ -115,6 +151,8 @@ def postprocess_site(
|
|
|
115
151
|
add_css: bool = True,
|
|
116
152
|
add_copy_llm: bool = True,
|
|
117
153
|
verbose: bool = True,
|
|
154
|
+
use_processes: bool = True,
|
|
155
|
+
workers: int | None = None,
|
|
118
156
|
) -> None:
|
|
119
157
|
"""Process all HTML files in the site directory."""
|
|
120
158
|
site_dir = Path(site_dir)
|
|
@@ -129,14 +167,17 @@ def postprocess_site(
|
|
|
129
167
|
print(f"No HTML files found in {site_dir}")
|
|
130
168
|
return
|
|
131
169
|
|
|
170
|
+
worker_count = min(os.cpu_count() or 1, workers or os.cpu_count() or 1)
|
|
171
|
+
|
|
132
172
|
# Build markdown index once (O(N) instead of O(N²)) using relative paths as keys
|
|
133
|
-
md_index =
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
173
|
+
md_index = (
|
|
174
|
+
{md.relative_to(docs_dir).with_suffix("").as_posix(): str(md) for md in docs_dir.rglob("*.md")}
|
|
175
|
+
if docs_dir.exists()
|
|
176
|
+
else {}
|
|
177
|
+
)
|
|
138
178
|
|
|
139
|
-
|
|
179
|
+
mode = "process" if use_processes else "thread"
|
|
180
|
+
print(f"Processing {len(html_files)} HTML files in {site_dir} with {worker_count} {mode} worker(s)")
|
|
140
181
|
|
|
141
182
|
processed = 0
|
|
142
183
|
repo_url = None
|
|
@@ -144,32 +185,66 @@ def postprocess_site(
|
|
|
144
185
|
if (add_authors or add_json_ld) and md_index:
|
|
145
186
|
repo_url, git_data = processor.build_git_map(list(md_index.values()))
|
|
146
187
|
|
|
147
|
-
progress = TQDM(html_files, desc="Postprocessing", unit="file", disable=not verbose) if TQDM else None
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
188
|
+
progress = TQDM(total=len(html_files), desc="Postprocessing", unit="file", disable=not verbose) if TQDM else None
|
|
189
|
+
# Enable logging only for the synchronous path; pools run without per-task log_fn to remain pickle-safe.
|
|
190
|
+
log_fn = (progress.write if verbose and progress else print if verbose else None) if worker_count == 1 else None
|
|
191
|
+
|
|
192
|
+
task_kwargs = dict(
|
|
193
|
+
site_dir=site_dir,
|
|
194
|
+
md_index=md_index,
|
|
195
|
+
git_data=git_data,
|
|
196
|
+
repo_url=repo_url,
|
|
197
|
+
site_url=site_url,
|
|
198
|
+
default_image=default_image,
|
|
199
|
+
default_author=default_author,
|
|
200
|
+
add_desc=add_desc,
|
|
201
|
+
add_image=add_image,
|
|
202
|
+
add_keywords=add_keywords,
|
|
203
|
+
add_share_buttons=add_share_buttons,
|
|
204
|
+
add_authors=add_authors,
|
|
205
|
+
add_json_ld=add_json_ld,
|
|
206
|
+
add_css=add_css,
|
|
207
|
+
add_copy_llm=add_copy_llm,
|
|
208
|
+
verbose=verbose,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
if worker_count == 1:
|
|
212
|
+
for html_file in html_files:
|
|
213
|
+
success = process_html_file(html_file, **task_kwargs, log=log_fn)
|
|
214
|
+
processed += bool(success)
|
|
215
|
+
if progress:
|
|
216
|
+
progress.update(1)
|
|
217
|
+
else:
|
|
218
|
+
if use_processes:
|
|
219
|
+
state = {**task_kwargs}
|
|
220
|
+
executor_context = ProcessPoolExecutor(
|
|
221
|
+
max_workers=worker_count, initializer=_set_worker_state, initargs=(state,)
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
def submit_fn(ex, f):
|
|
225
|
+
return ex.submit(_process_file, f)
|
|
226
|
+
else:
|
|
227
|
+
executor_context = ThreadPoolExecutor(max_workers=worker_count)
|
|
228
|
+
|
|
229
|
+
def submit_fn(ex, f):
|
|
230
|
+
return ex.submit(process_html_file, f, **task_kwargs, log=log_fn)
|
|
231
|
+
|
|
232
|
+
with executor_context as executor:
|
|
233
|
+
future_to_file = {submit_fn(executor, html_file): html_file for html_file in html_files}
|
|
234
|
+
|
|
235
|
+
for future in as_completed(future_to_file):
|
|
236
|
+
html_file = future_to_file[future]
|
|
237
|
+
try:
|
|
238
|
+
success = future.result()
|
|
239
|
+
except Exception as e:
|
|
240
|
+
success = False
|
|
241
|
+
if verbose:
|
|
242
|
+
(log_fn or print)(f"Error processing {html_file}: {e}")
|
|
243
|
+
if success:
|
|
244
|
+
processed += 1
|
|
245
|
+
if progress:
|
|
246
|
+
progress.update(1)
|
|
247
|
+
|
|
173
248
|
if progress:
|
|
174
249
|
progress.close()
|
|
175
250
|
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import re
|
|
6
|
+
import threading
|
|
6
7
|
from datetime import datetime
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from typing import Any
|
|
@@ -14,6 +15,11 @@ from bs4 import BeautifulSoup
|
|
|
14
15
|
WARNING = "WARNING (mkdocs_ultralytics_plugin):"
|
|
15
16
|
DEFAULT_AVATAR = requests.head("https://github.com/github.png", allow_redirects=True).url
|
|
16
17
|
|
|
18
|
+
# Shared, thread-safe cache to avoid duplicate API lookups and YAML thrash when running in parallel
|
|
19
|
+
_AUTHOR_CACHE: dict[str, dict[str, str | None]] | None = None
|
|
20
|
+
_AUTHOR_CACHE_MTIME: float | None = None
|
|
21
|
+
_CACHE_LOCK = threading.Lock()
|
|
22
|
+
|
|
17
23
|
|
|
18
24
|
def calculate_time_difference(date_string: str) -> tuple[str, str]:
|
|
19
25
|
"""Calculate the time difference between a given date and the current date in a human-readable format.
|
|
@@ -100,9 +106,10 @@ def get_github_username_from_email(
|
|
|
100
106
|
you comply with GitHub's rate limits and authentication requirements when querying their API.
|
|
101
107
|
"""
|
|
102
108
|
# First, check if the email exists in the local cache file
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
109
|
+
with _CACHE_LOCK:
|
|
110
|
+
if email in cache:
|
|
111
|
+
return cache[email].get("username"), cache[email].get("avatar")
|
|
112
|
+
if not email.strip():
|
|
106
113
|
if verbose:
|
|
107
114
|
print(f"{WARNING} No author found for {file_path}")
|
|
108
115
|
return None, None
|
|
@@ -111,13 +118,15 @@ def get_github_username_from_email(
|
|
|
111
118
|
if email.endswith("@users.noreply.github.com"):
|
|
112
119
|
username = email.split("+")[-1].split("@")[0]
|
|
113
120
|
avatar = f"https://github.com/{username}.png"
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
121
|
+
avatar_url = requests.head(avatar, allow_redirects=True).url
|
|
122
|
+
with _CACHE_LOCK:
|
|
123
|
+
cache[email] = {
|
|
124
|
+
"username": username,
|
|
125
|
+
"avatar": avatar_url,
|
|
126
|
+
}
|
|
118
127
|
return username, avatar
|
|
119
128
|
|
|
120
|
-
#
|
|
129
|
+
# Fallback to GitHub REST API when not cached
|
|
121
130
|
url = f"https://api.github.com/search/users?q={email}+in:email&sort=joined&order=asc"
|
|
122
131
|
if verbose:
|
|
123
132
|
print(f"Running GitHub REST API for author {email}")
|
|
@@ -127,15 +136,18 @@ def get_github_username_from_email(
|
|
|
127
136
|
if data["total_count"] > 0:
|
|
128
137
|
username = data["items"][0]["login"]
|
|
129
138
|
avatar = data["items"][0]["avatar_url"] # avatar_url key is correct here
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
139
|
+
avatar_url = requests.head(avatar, allow_redirects=True).url
|
|
140
|
+
with _CACHE_LOCK:
|
|
141
|
+
cache[email] = {
|
|
142
|
+
"username": username,
|
|
143
|
+
"avatar": avatar_url,
|
|
144
|
+
}
|
|
134
145
|
return username, avatar
|
|
135
146
|
|
|
136
147
|
if verbose:
|
|
137
148
|
print(f"{WARNING} No username found for {email}")
|
|
138
|
-
|
|
149
|
+
with _CACHE_LOCK:
|
|
150
|
+
cache[email] = {"username": None, "avatar": None}
|
|
139
151
|
return None, None
|
|
140
152
|
|
|
141
153
|
|
|
@@ -144,6 +156,7 @@ def get_github_usernames_from_file(
|
|
|
144
156
|
default_user: str | None = None,
|
|
145
157
|
emails: dict[str, int] | None = None,
|
|
146
158
|
repo_url: str | None = None,
|
|
159
|
+
force_reload: bool = False,
|
|
147
160
|
) -> dict[str, dict[str, Any]]:
|
|
148
161
|
"""Fetch GitHub usernames associated with a file using provided Git email counts.
|
|
149
162
|
|
|
@@ -172,20 +185,35 @@ def get_github_usernames_from_file(
|
|
|
172
185
|
if not emails and default_user:
|
|
173
186
|
emails[default_user] = 1
|
|
174
187
|
|
|
175
|
-
# Load the local cache of GitHub usernames
|
|
188
|
+
# Load the local cache of GitHub usernames once per process (thread-safe, reload if changed)
|
|
176
189
|
local_cache_file = Path("docs" if Path("docs").is_dir() else "") / "mkdocs_github_authors.yaml"
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
190
|
+
global _AUTHOR_CACHE, _AUTHOR_CACHE_MTIME
|
|
191
|
+
with _CACHE_LOCK:
|
|
192
|
+
current_mtime = local_cache_file.stat().st_mtime if local_cache_file.is_file() else None
|
|
193
|
+
needs_reload = (
|
|
194
|
+
force_reload
|
|
195
|
+
or _AUTHOR_CACHE is None
|
|
196
|
+
or (_AUTHOR_CACHE_MTIME is not None and current_mtime is not None and _AUTHOR_CACHE_MTIME != current_mtime)
|
|
197
|
+
)
|
|
198
|
+
if needs_reload:
|
|
199
|
+
if local_cache_file.is_file():
|
|
200
|
+
with local_cache_file.open("r") as f:
|
|
201
|
+
_AUTHOR_CACHE = yaml.safe_load(f) or {}
|
|
202
|
+
_AUTHOR_CACHE_MTIME = local_cache_file.stat().st_mtime
|
|
203
|
+
else:
|
|
204
|
+
_AUTHOR_CACHE = {}
|
|
205
|
+
_AUTHOR_CACHE_MTIME = None
|
|
206
|
+
cache = _AUTHOR_CACHE
|
|
182
207
|
|
|
183
208
|
github_repo_url = repo_url or "https://github.com/ultralytics/ultralytics"
|
|
184
209
|
|
|
185
210
|
info = {}
|
|
211
|
+
cache_updated = False
|
|
186
212
|
for email, changes in emails.items():
|
|
187
213
|
if not email and default_user:
|
|
188
214
|
email = default_user
|
|
215
|
+
was_cached = email in cache
|
|
216
|
+
prev_entry = cache.get(email)
|
|
189
217
|
username, avatar = get_github_username_from_email(email, cache, file_path)
|
|
190
218
|
# If we can't determine the user URL, revert to the GitHub file URL
|
|
191
219
|
user_url = f"https://github.com/{username}" if username else github_repo_url
|
|
@@ -195,9 +223,14 @@ def get_github_usernames_from_file(
|
|
|
195
223
|
"changes": changes,
|
|
196
224
|
"avatar": avatar or DEFAULT_AVATAR,
|
|
197
225
|
}
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
226
|
+
cache_updated = cache_updated or (email in cache and not was_cached) or cache.get(email) != prev_entry
|
|
227
|
+
|
|
228
|
+
# Save the local cache of GitHub usernames and avatar URLs if updated
|
|
229
|
+
if cache_updated:
|
|
230
|
+
with _CACHE_LOCK:
|
|
231
|
+
_AUTHOR_CACHE = cache
|
|
232
|
+
with local_cache_file.open("w") as f:
|
|
233
|
+
yaml.safe_dump(cache, f)
|
|
234
|
+
_AUTHOR_CACHE_MTIME = local_cache_file.stat().st_mtime
|
|
202
235
|
|
|
203
236
|
return info
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|