mkdocs-ultralytics-plugin 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/PKG-INFO +1 -1
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/PKG-INFO +1 -1
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/plugin/__init__.py +1 -1
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/plugin/postprocess.py +109 -34
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/plugin/processor.py +9 -35
- mkdocs_ultralytics_plugin-0.2.4/plugin/utils.py +223 -0
- mkdocs_ultralytics_plugin-0.2.2/plugin/utils.py +0 -203
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/LICENSE +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/README.md +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/SOURCES.txt +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/dependency_links.txt +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/entry_points.txt +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/requires.txt +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/top_level.txt +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/plugin/main.py +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/pyproject.toml +0 -0
- {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mkdocs-ultralytics-plugin
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
|
|
5
5
|
Author-email: Glenn Jocher <hello@ultralytics.com>
|
|
6
6
|
Maintainer-email: Ultralytics <hello@ultralytics.com>
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mkdocs-ultralytics-plugin
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
|
|
5
5
|
Author-email: Glenn Jocher <hello@ultralytics.com>
|
|
6
6
|
Maintainer-email: Ultralytics <hello@ultralytics.com>
|
|
@@ -3,8 +3,11 @@
|
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
|
+
import os
|
|
6
7
|
from collections.abc import Callable
|
|
8
|
+
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
|
|
7
9
|
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
8
11
|
|
|
9
12
|
try:
|
|
10
13
|
from ultralytics.utils import TQDM # progress bars
|
|
@@ -13,6 +16,39 @@ except ImportError:
|
|
|
13
16
|
|
|
14
17
|
import plugin.processor as processor
|
|
15
18
|
from plugin.processor import process_html
|
|
19
|
+
from plugin.utils import resolve_all_authors
|
|
20
|
+
|
|
21
|
+
# Shared worker state for process pools (avoids re-pickling large read-only data per task)
|
|
22
|
+
_WORKER_STATE: dict[str, Any] | None = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _set_worker_state(state: dict[str, Any]) -> None:
|
|
26
|
+
global _WORKER_STATE
|
|
27
|
+
_WORKER_STATE = state
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _process_file(html_file: Path) -> bool:
|
|
31
|
+
if _WORKER_STATE is None:
|
|
32
|
+
raise RuntimeError("Worker state not initialized")
|
|
33
|
+
return process_html_file(
|
|
34
|
+
html_file,
|
|
35
|
+
_WORKER_STATE["site_dir"],
|
|
36
|
+
_WORKER_STATE["md_index"],
|
|
37
|
+
_WORKER_STATE["git_data"],
|
|
38
|
+
_WORKER_STATE["repo_url"],
|
|
39
|
+
site_url=_WORKER_STATE["site_url"],
|
|
40
|
+
default_image=_WORKER_STATE["default_image"],
|
|
41
|
+
add_desc=_WORKER_STATE["add_desc"],
|
|
42
|
+
add_image=_WORKER_STATE["add_image"],
|
|
43
|
+
add_keywords=_WORKER_STATE["add_keywords"],
|
|
44
|
+
add_share_buttons=_WORKER_STATE["add_share_buttons"],
|
|
45
|
+
add_authors=_WORKER_STATE["add_authors"],
|
|
46
|
+
add_json_ld=_WORKER_STATE["add_json_ld"],
|
|
47
|
+
add_css=_WORKER_STATE["add_css"],
|
|
48
|
+
add_copy_llm=_WORKER_STATE["add_copy_llm"],
|
|
49
|
+
verbose=_WORKER_STATE["verbose"],
|
|
50
|
+
log=None,
|
|
51
|
+
)
|
|
16
52
|
|
|
17
53
|
|
|
18
54
|
def process_html_file(
|
|
@@ -23,7 +59,6 @@ def process_html_file(
|
|
|
23
59
|
repo_url: str | None,
|
|
24
60
|
site_url: str = "",
|
|
25
61
|
default_image: str | None = None,
|
|
26
|
-
default_author: str | None = None,
|
|
27
62
|
add_desc: bool = True,
|
|
28
63
|
add_image: bool = True,
|
|
29
64
|
add_keywords: bool = True,
|
|
@@ -78,7 +113,6 @@ def process_html_file(
|
|
|
78
113
|
git_data=git_data,
|
|
79
114
|
repo_url=repo_url,
|
|
80
115
|
default_image=default_image,
|
|
81
|
-
default_author=default_author,
|
|
82
116
|
keywords=keywords,
|
|
83
117
|
add_desc=add_desc,
|
|
84
118
|
add_image=add_image,
|
|
@@ -115,6 +149,8 @@ def postprocess_site(
|
|
|
115
149
|
add_css: bool = True,
|
|
116
150
|
add_copy_llm: bool = True,
|
|
117
151
|
verbose: bool = True,
|
|
152
|
+
use_processes: bool = True,
|
|
153
|
+
workers: int | None = None,
|
|
118
154
|
) -> None:
|
|
119
155
|
"""Process all HTML files in the site directory."""
|
|
120
156
|
site_dir = Path(site_dir)
|
|
@@ -129,47 +165,86 @@ def postprocess_site(
|
|
|
129
165
|
print(f"No HTML files found in {site_dir}")
|
|
130
166
|
return
|
|
131
167
|
|
|
168
|
+
worker_count = min(os.cpu_count() or 1, workers or os.cpu_count() or 1)
|
|
169
|
+
|
|
132
170
|
# Build markdown index once (O(N) instead of O(N²)) using relative paths as keys
|
|
133
|
-
md_index =
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
171
|
+
md_index = (
|
|
172
|
+
{md.relative_to(docs_dir).with_suffix("").as_posix(): str(md) for md in docs_dir.rglob("*.md")}
|
|
173
|
+
if docs_dir.exists()
|
|
174
|
+
else {}
|
|
175
|
+
)
|
|
138
176
|
|
|
139
|
-
|
|
177
|
+
mode = "process" if use_processes else "thread"
|
|
178
|
+
print(f"Processing {len(html_files)} HTML files in {site_dir} with {worker_count} {mode} worker(s)")
|
|
140
179
|
|
|
141
180
|
processed = 0
|
|
142
181
|
repo_url = None
|
|
143
182
|
git_data = None
|
|
144
183
|
if (add_authors or add_json_ld) and md_index:
|
|
145
184
|
repo_url, git_data = processor.build_git_map(list(md_index.values()))
|
|
185
|
+
# Resolve all authors ONCE in main process before spawning workers
|
|
186
|
+
# This prevents race conditions when workers try to write to the cache file
|
|
187
|
+
git_data = resolve_all_authors(git_data, default_author=default_author, repo_url=repo_url, verbose=verbose)
|
|
188
|
+
|
|
189
|
+
progress = TQDM(total=len(html_files), desc="Postprocessing", unit="file", disable=not verbose) if TQDM else None
|
|
190
|
+
# Enable logging only for the synchronous path; pools run without per-task log_fn to remain pickle-safe.
|
|
191
|
+
log_fn = (progress.write if verbose and progress else print if verbose else None) if worker_count == 1 else None
|
|
192
|
+
|
|
193
|
+
task_kwargs = dict(
|
|
194
|
+
site_dir=site_dir,
|
|
195
|
+
md_index=md_index,
|
|
196
|
+
git_data=git_data,
|
|
197
|
+
repo_url=repo_url,
|
|
198
|
+
site_url=site_url,
|
|
199
|
+
default_image=default_image,
|
|
200
|
+
add_desc=add_desc,
|
|
201
|
+
add_image=add_image,
|
|
202
|
+
add_keywords=add_keywords,
|
|
203
|
+
add_share_buttons=add_share_buttons,
|
|
204
|
+
add_authors=add_authors,
|
|
205
|
+
add_json_ld=add_json_ld,
|
|
206
|
+
add_css=add_css,
|
|
207
|
+
add_copy_llm=add_copy_llm,
|
|
208
|
+
verbose=verbose,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
if worker_count == 1:
|
|
212
|
+
for html_file in html_files:
|
|
213
|
+
success = process_html_file(html_file, **task_kwargs, log=log_fn)
|
|
214
|
+
processed += bool(success)
|
|
215
|
+
if progress:
|
|
216
|
+
progress.update(1)
|
|
217
|
+
else:
|
|
218
|
+
if use_processes:
|
|
219
|
+
state = {**task_kwargs}
|
|
220
|
+
executor_context = ProcessPoolExecutor(
|
|
221
|
+
max_workers=worker_count, initializer=_set_worker_state, initargs=(state,)
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
def submit_fn(ex, f):
|
|
225
|
+
return ex.submit(_process_file, f)
|
|
226
|
+
else:
|
|
227
|
+
executor_context = ThreadPoolExecutor(max_workers=worker_count)
|
|
228
|
+
|
|
229
|
+
def submit_fn(ex, f):
|
|
230
|
+
return ex.submit(process_html_file, f, **task_kwargs, log=log_fn)
|
|
231
|
+
|
|
232
|
+
with executor_context as executor:
|
|
233
|
+
future_to_file = {submit_fn(executor, html_file): html_file for html_file in html_files}
|
|
234
|
+
|
|
235
|
+
for future in as_completed(future_to_file):
|
|
236
|
+
html_file = future_to_file[future]
|
|
237
|
+
try:
|
|
238
|
+
success = future.result()
|
|
239
|
+
except Exception as e:
|
|
240
|
+
success = False
|
|
241
|
+
if verbose:
|
|
242
|
+
(log_fn or print)(f"Error processing {html_file}: {e}")
|
|
243
|
+
if success:
|
|
244
|
+
processed += 1
|
|
245
|
+
if progress:
|
|
246
|
+
progress.update(1)
|
|
146
247
|
|
|
147
|
-
progress = TQDM(html_files, desc="Postprocessing", unit="file", disable=not verbose) if TQDM else None
|
|
148
|
-
log_fn = (progress.write if verbose and progress else print) if verbose else None
|
|
149
|
-
iterator = progress if progress else html_files
|
|
150
|
-
for html_file in iterator:
|
|
151
|
-
success = process_html_file(
|
|
152
|
-
html_file,
|
|
153
|
-
site_dir,
|
|
154
|
-
md_index,
|
|
155
|
-
git_data,
|
|
156
|
-
repo_url,
|
|
157
|
-
site_url=site_url,
|
|
158
|
-
default_image=default_image,
|
|
159
|
-
default_author=default_author,
|
|
160
|
-
add_desc=add_desc,
|
|
161
|
-
add_image=add_image,
|
|
162
|
-
add_keywords=add_keywords,
|
|
163
|
-
add_share_buttons=add_share_buttons,
|
|
164
|
-
add_authors=add_authors,
|
|
165
|
-
add_json_ld=add_json_ld,
|
|
166
|
-
add_css=add_css,
|
|
167
|
-
add_copy_llm=add_copy_llm,
|
|
168
|
-
verbose=verbose,
|
|
169
|
-
log=log_fn,
|
|
170
|
-
)
|
|
171
|
-
if success:
|
|
172
|
-
processed += 1
|
|
173
248
|
if progress:
|
|
174
249
|
progress.close()
|
|
175
250
|
|
|
@@ -13,11 +13,7 @@ from urllib.parse import quote
|
|
|
13
13
|
|
|
14
14
|
from bs4 import BeautifulSoup
|
|
15
15
|
|
|
16
|
-
from plugin.utils import
|
|
17
|
-
calculate_time_difference,
|
|
18
|
-
get_github_usernames_from_file,
|
|
19
|
-
get_youtube_video_ids,
|
|
20
|
-
)
|
|
16
|
+
from plugin.utils import calculate_time_difference, get_youtube_video_ids
|
|
21
17
|
|
|
22
18
|
today = datetime.now()
|
|
23
19
|
DEFAULT_CREATION_DATE = (today - timedelta(days=365)).strftime("%Y-%m-%d %H:%M:%S +0000")
|
|
@@ -30,11 +26,9 @@ CHECK_ICON = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path
|
|
|
30
26
|
def get_git_info(
|
|
31
27
|
file_path: str,
|
|
32
28
|
add_authors: bool = True,
|
|
33
|
-
default_author: str | None = None,
|
|
34
29
|
git_data: dict[str, dict[str, Any]] | None = None,
|
|
35
|
-
repo_url: str | None = None,
|
|
36
30
|
) -> dict[str, Any]:
|
|
37
|
-
"""Retrieve git information (dates +
|
|
31
|
+
"""Retrieve git information (dates + pre-resolved authors) from precomputed git data."""
|
|
38
32
|
file_path = str(Path(file_path).resolve())
|
|
39
33
|
git_info = {
|
|
40
34
|
"creation_date": DEFAULT_CREATION_DATE,
|
|
@@ -45,29 +39,12 @@ def get_git_info(
|
|
|
45
39
|
return git_info
|
|
46
40
|
|
|
47
41
|
cached = git_data[file_path]
|
|
48
|
-
git_info.
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
if add_authors and cached.get("emails"):
|
|
56
|
-
git_info["authors"] = sorted(
|
|
57
|
-
[
|
|
58
|
-
(
|
|
59
|
-
author,
|
|
60
|
-
info["url"],
|
|
61
|
-
info["changes"],
|
|
62
|
-
info["avatar"],
|
|
63
|
-
)
|
|
64
|
-
for author, info in get_github_usernames_from_file(
|
|
65
|
-
file_path, default_user=default_author, emails=cached["emails"], repo_url=repo_url
|
|
66
|
-
).items()
|
|
67
|
-
],
|
|
68
|
-
key=lambda x: x[2],
|
|
69
|
-
reverse=True,
|
|
70
|
-
)
|
|
42
|
+
git_info["creation_date"] = cached.get("creation_date", DEFAULT_CREATION_DATE)
|
|
43
|
+
git_info["last_modified_date"] = cached.get("last_modified_date", DEFAULT_MODIFIED_DATE)
|
|
44
|
+
|
|
45
|
+
# Authors are pre-resolved by resolve_all_authors() in the main process
|
|
46
|
+
if add_authors and "authors" in cached:
|
|
47
|
+
git_info["authors"] = cached["authors"]
|
|
71
48
|
|
|
72
49
|
return git_info
|
|
73
50
|
|
|
@@ -309,7 +286,6 @@ def process_html(
|
|
|
309
286
|
git_data: dict[str, dict[str, Any]] | None = None,
|
|
310
287
|
repo_url: str | None = None,
|
|
311
288
|
default_image: str | None = None,
|
|
312
|
-
default_author: str | None = None,
|
|
313
289
|
keywords: str | None = None,
|
|
314
290
|
add_desc: bool = True,
|
|
315
291
|
add_image: bool = True,
|
|
@@ -493,9 +469,7 @@ def process_html(
|
|
|
493
469
|
needs_git = (add_authors or add_json_ld) and src_path
|
|
494
470
|
|
|
495
471
|
if needs_git:
|
|
496
|
-
git_info = get_git_info(
|
|
497
|
-
src_path, add_authors=add_authors, default_author=default_author, git_data=git_data, repo_url=repo_url
|
|
498
|
-
)
|
|
472
|
+
git_info = get_git_info(src_path, add_authors=add_authors, git_data=git_data)
|
|
499
473
|
|
|
500
474
|
# Only render git footer if we have real git history (not placeholder defaults)
|
|
501
475
|
has_real_git_data = (
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import requests
|
|
11
|
+
import yaml
|
|
12
|
+
|
|
13
|
+
WARNING = "WARNING (mkdocs_ultralytics_plugin):"
|
|
14
|
+
TIMEOUT = 10 # seconds for network requests
|
|
15
|
+
DEFAULT_AVATAR_URL = "https://github.com/github.png"
|
|
16
|
+
_default_avatar_cache: str | None = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_default_avatar() -> str:
|
|
20
|
+
"""Get the default avatar URL, lazily fetching the resolved URL on first call."""
|
|
21
|
+
global _default_avatar_cache
|
|
22
|
+
if _default_avatar_cache is None:
|
|
23
|
+
try:
|
|
24
|
+
_default_avatar_cache = requests.head(DEFAULT_AVATAR_URL, allow_redirects=True, timeout=TIMEOUT).url
|
|
25
|
+
except Exception:
|
|
26
|
+
_default_avatar_cache = DEFAULT_AVATAR_URL # fallback to original URL
|
|
27
|
+
return _default_avatar_cache
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def calculate_time_difference(date_string: str) -> tuple[str, str]:
|
|
31
|
+
"""Calculate the time difference between a given date and the current date in a human-readable format.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
date_string (str): Date and time string in the format "%Y-%m-%d %H:%M:%S %z".
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
difference (str): Time difference in days, months, or years (e.g., "5 days", "2 months", "1 year").
|
|
38
|
+
pretty_date (str): Given date formatted as "Month Day, Year" (e.g., "January 01, 2023").
|
|
39
|
+
|
|
40
|
+
Examples:
|
|
41
|
+
>>> calculate_time_difference("2023-01-01 00:00:00 +0000")
|
|
42
|
+
("5 months", "January 01, 2023")
|
|
43
|
+
"""
|
|
44
|
+
date = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S %z")
|
|
45
|
+
pretty_date = date.strftime("%B %d, %Y")
|
|
46
|
+
now = datetime.now(date.tzinfo)
|
|
47
|
+
diff = now - date
|
|
48
|
+
days = diff.days
|
|
49
|
+
|
|
50
|
+
if days < 30:
|
|
51
|
+
difference = f"{days} day{'s' if days != 1 else ''}"
|
|
52
|
+
elif days < 365:
|
|
53
|
+
months = days // 30
|
|
54
|
+
difference = f"{months} month{'s' if months != 1 else ''}"
|
|
55
|
+
else:
|
|
56
|
+
years = days // 365
|
|
57
|
+
difference = f"{years} year{'s' if years != 1 else ''}"
|
|
58
|
+
return difference, pretty_date
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def get_youtube_video_ids(soup) -> list[str]:
|
|
62
|
+
"""Extract YouTube video IDs from iframe elements present in the provided BeautifulSoup object.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
soup (BeautifulSoup): A BeautifulSoup object containing the HTML content.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
(List[str]): A list containing YouTube video IDs extracted from the HTML content.
|
|
69
|
+
"""
|
|
70
|
+
youtube_ids = []
|
|
71
|
+
iframes = soup.find_all("iframe", src=True)
|
|
72
|
+
for iframe in iframes:
|
|
73
|
+
if match := re.search(r"youtube\.com/embed/([a-zA-Z0-9_-]+)", iframe["src"]):
|
|
74
|
+
youtube_ids.append(match[1])
|
|
75
|
+
return youtube_ids
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _get_cache_file() -> Path:
|
|
79
|
+
"""Get the path to the GitHub author cache file."""
|
|
80
|
+
return Path("docs" if Path("docs").is_dir() else "") / "mkdocs_github_authors.yaml"
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def load_author_cache() -> dict[str, dict[str, str | None]]:
|
|
84
|
+
"""Load the GitHub author cache from disk."""
|
|
85
|
+
cache_file = _get_cache_file()
|
|
86
|
+
try:
|
|
87
|
+
return yaml.safe_load(cache_file.read_text()) or {} if cache_file.is_file() else {}
|
|
88
|
+
except Exception:
|
|
89
|
+
return {}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def save_author_cache(cache: dict[str, dict[str, str | None]]) -> None:
|
|
93
|
+
"""Save the GitHub author cache to disk."""
|
|
94
|
+
try:
|
|
95
|
+
_get_cache_file().write_text(yaml.safe_dump(cache))
|
|
96
|
+
except Exception as e:
|
|
97
|
+
print(f"{WARNING} Failed to save author cache: {e}")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def resolve_github_user(
|
|
101
|
+
email: str, cache: dict[str, dict[str, str | None]], verbose: bool = True
|
|
102
|
+
) -> dict[str, str | None]:
|
|
103
|
+
"""Resolve a single email to GitHub username and avatar, updating cache in-place.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
email (str): The email address to resolve.
|
|
107
|
+
cache (dict): The author cache dict (modified in-place if new entry added).
|
|
108
|
+
verbose (bool): Whether to print API call info.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
dict with 'username' and 'avatar' keys (values may be None if not found).
|
|
112
|
+
"""
|
|
113
|
+
if not email or not email.strip():
|
|
114
|
+
return {"username": None, "avatar": None}
|
|
115
|
+
|
|
116
|
+
# Return cached result if available
|
|
117
|
+
if email in cache:
|
|
118
|
+
return cache[email]
|
|
119
|
+
|
|
120
|
+
# Parse username directly from GitHub noreply emails
|
|
121
|
+
if email.endswith("@users.noreply.github.com"):
|
|
122
|
+
username = email.split("+")[-1].split("@")[0]
|
|
123
|
+
try:
|
|
124
|
+
avatar = requests.head(f"https://github.com/{username}.png", allow_redirects=True, timeout=TIMEOUT).url
|
|
125
|
+
except Exception:
|
|
126
|
+
avatar = None
|
|
127
|
+
cache[email] = {"username": username, "avatar": avatar}
|
|
128
|
+
return cache[email]
|
|
129
|
+
|
|
130
|
+
# Query GitHub REST API
|
|
131
|
+
if verbose:
|
|
132
|
+
print(f"Running GitHub REST API for author {email}")
|
|
133
|
+
try:
|
|
134
|
+
response = requests.get(
|
|
135
|
+
f"https://api.github.com/search/users?q={email}+in:email&sort=joined&order=asc", timeout=TIMEOUT
|
|
136
|
+
)
|
|
137
|
+
if response.status_code == 200:
|
|
138
|
+
data = response.json()
|
|
139
|
+
if data.get("total_count", 0) > 0:
|
|
140
|
+
username = data["items"][0]["login"]
|
|
141
|
+
avatar = requests.head(data["items"][0]["avatar_url"], allow_redirects=True, timeout=TIMEOUT).url
|
|
142
|
+
cache[email] = {"username": username, "avatar": avatar}
|
|
143
|
+
return cache[email]
|
|
144
|
+
except Exception:
|
|
145
|
+
pass
|
|
146
|
+
|
|
147
|
+
if verbose:
|
|
148
|
+
print(f"{WARNING} No username found for {email}")
|
|
149
|
+
cache[email] = {"username": None, "avatar": None}
|
|
150
|
+
return cache[email]
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def resolve_all_authors(
|
|
154
|
+
git_data: dict[str, dict[str, Any]],
|
|
155
|
+
default_author: str | None = None,
|
|
156
|
+
repo_url: str | None = None,
|
|
157
|
+
verbose: bool = True,
|
|
158
|
+
) -> dict[str, dict[str, Any]]:
|
|
159
|
+
"""Pre-resolve all unique emails from git_data to GitHub usernames.
|
|
160
|
+
|
|
161
|
+
This should be called ONCE in the main process before spawning workers. It collects all unique emails, resolves
|
|
162
|
+
them, saves the cache, and returns git_data with 'authors' pre-populated for each file.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
git_data (dict): The git metadata dict from build_git_map().
|
|
166
|
+
default_author (str, optional): Default author email if no git info.
|
|
167
|
+
repo_url (str, optional): Repository URL for fallback links.
|
|
168
|
+
verbose (bool): Whether to print progress info.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
dict: Updated git_data with 'authors' list added to each entry.
|
|
172
|
+
"""
|
|
173
|
+
if not git_data:
|
|
174
|
+
return git_data
|
|
175
|
+
|
|
176
|
+
# Collect all unique emails across all files
|
|
177
|
+
all_emails: set[str] = set()
|
|
178
|
+
for entry in git_data.values():
|
|
179
|
+
all_emails.update(entry.get("emails", {}).keys())
|
|
180
|
+
if default_author:
|
|
181
|
+
all_emails.add(default_author)
|
|
182
|
+
all_emails.discard("")
|
|
183
|
+
|
|
184
|
+
if not all_emails:
|
|
185
|
+
return git_data
|
|
186
|
+
|
|
187
|
+
# Load cache, resolve all emails, save cache (single disk write)
|
|
188
|
+
cache = load_author_cache()
|
|
189
|
+
cache_modified = False
|
|
190
|
+
|
|
191
|
+
for email in sorted(all_emails):
|
|
192
|
+
if email not in cache:
|
|
193
|
+
resolve_github_user(email, cache, verbose=verbose)
|
|
194
|
+
cache_modified = True
|
|
195
|
+
|
|
196
|
+
if cache_modified:
|
|
197
|
+
save_author_cache(cache)
|
|
198
|
+
|
|
199
|
+
# Build authors list for each file entry
|
|
200
|
+
github_repo_url = repo_url or "https://github.com/ultralytics/ultralytics"
|
|
201
|
+
|
|
202
|
+
for file_path, entry in git_data.items():
|
|
203
|
+
emails = entry.get("emails", {})
|
|
204
|
+
if not emails and default_author:
|
|
205
|
+
emails = {default_author: 1}
|
|
206
|
+
|
|
207
|
+
authors = []
|
|
208
|
+
for email, changes in emails.items():
|
|
209
|
+
email = email.strip() if email else ""
|
|
210
|
+
if not email:
|
|
211
|
+
email = default_author or ""
|
|
212
|
+
if not email:
|
|
213
|
+
continue
|
|
214
|
+
info = cache.get(email, {"username": None, "avatar": None})
|
|
215
|
+
username = info.get("username")
|
|
216
|
+
avatar = info.get("avatar") or get_default_avatar()
|
|
217
|
+
user_url = f"https://github.com/{username}" if username else github_repo_url
|
|
218
|
+
authors.append((username or email, user_url, changes, avatar))
|
|
219
|
+
|
|
220
|
+
# Sort by number of changes (descending)
|
|
221
|
+
entry["authors"] = sorted(authors, key=lambda x: x[2], reverse=True)
|
|
222
|
+
|
|
223
|
+
return git_data
|
|
@@ -1,203 +0,0 @@
|
|
|
1
|
-
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import re
|
|
6
|
-
from datetime import datetime
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
from typing import Any
|
|
9
|
-
|
|
10
|
-
import requests
|
|
11
|
-
import yaml # YAML is used for its readability and consistency with MkDocs ecosystem
|
|
12
|
-
from bs4 import BeautifulSoup
|
|
13
|
-
|
|
14
|
-
WARNING = "WARNING (mkdocs_ultralytics_plugin):"
|
|
15
|
-
DEFAULT_AVATAR = requests.head("https://github.com/github.png", allow_redirects=True).url
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def calculate_time_difference(date_string: str) -> tuple[str, str]:
|
|
19
|
-
"""Calculate the time difference between a given date and the current date in a human-readable format.
|
|
20
|
-
|
|
21
|
-
Args:
|
|
22
|
-
date_string (str): Date and time string in the format "%Y-%m-%d %H:%M:%S %z".
|
|
23
|
-
|
|
24
|
-
Returns:
|
|
25
|
-
difference (str): Time difference in days, months, or years (e.g., "5 days", "2 months", "1 year").
|
|
26
|
-
pretty_date (str): Given date formatted as "Month Day, Year" (e.g., "January 01, 2023").
|
|
27
|
-
|
|
28
|
-
Examples:
|
|
29
|
-
>>> calculate_time_difference("2023-01-01 00:00:00 +0000")
|
|
30
|
-
("5 months", "January 01, 2023")
|
|
31
|
-
"""
|
|
32
|
-
date = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S %z")
|
|
33
|
-
pretty_date = date.strftime("%B %d, %Y")
|
|
34
|
-
now = datetime.now(date.tzinfo)
|
|
35
|
-
diff = now - date
|
|
36
|
-
days = diff.days
|
|
37
|
-
|
|
38
|
-
if days < 30:
|
|
39
|
-
difference = f"{days} day{'s' if days != 1 else ''}"
|
|
40
|
-
elif days < 365:
|
|
41
|
-
months = days // 30
|
|
42
|
-
difference = f"{months} month{'s' if months != 1 else ''}"
|
|
43
|
-
else:
|
|
44
|
-
years = days // 365
|
|
45
|
-
difference = f"{years} year{'s' if years != 1 else ''}"
|
|
46
|
-
return difference, pretty_date
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def get_youtube_video_ids(soup: BeautifulSoup) -> list[str]:
|
|
50
|
-
"""Extract YouTube video IDs from iframe elements present in the provided BeautifulSoup object.
|
|
51
|
-
|
|
52
|
-
Args:
|
|
53
|
-
soup (BeautifulSoup): A BeautifulSoup object containing the HTML content from which YouTube video IDs need to be
|
|
54
|
-
extracted.
|
|
55
|
-
|
|
56
|
-
Returns:
|
|
57
|
-
(List[str]): A list containing YouTube video IDs extracted from the HTML content.
|
|
58
|
-
|
|
59
|
-
Examples:
|
|
60
|
-
>>> from bs4 import BeautifulSoup
|
|
61
|
-
>>> html_content = '''
|
|
62
|
-
... <html>
|
|
63
|
-
... <body>
|
|
64
|
-
... <iframe src="https://www.youtube.com/embed/example_id1"></iframe>
|
|
65
|
-
... <iframe src="https://www.youtube.com/embed/example_id2"></iframe>
|
|
66
|
-
... </body>
|
|
67
|
-
... </html>
|
|
68
|
-
... '''
|
|
69
|
-
>>> soup = BeautifulSoup(html_content, 'html.parser')
|
|
70
|
-
>>> video_ids = get_youtube_video_ids(soup)
|
|
71
|
-
>>> print(video_ids)
|
|
72
|
-
['example_id1', 'example_id2']
|
|
73
|
-
"""
|
|
74
|
-
youtube_ids = []
|
|
75
|
-
iframes = soup.find_all("iframe", src=True)
|
|
76
|
-
for iframe in iframes:
|
|
77
|
-
if match := re.search(r"youtube\.com/embed/([a-zA-Z0-9_-]+)", iframe["src"]):
|
|
78
|
-
youtube_ids.append(match[1])
|
|
79
|
-
return youtube_ids
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def get_github_username_from_email(
|
|
83
|
-
email: str, cache: dict, file_path: str = "", verbose: bool = True
|
|
84
|
-
) -> tuple[str | None, str | None]:
|
|
85
|
-
"""Retrieve the GitHub username and avatar URL associated with the given email address.
|
|
86
|
-
|
|
87
|
-
Args:
|
|
88
|
-
email (str): The email address to retrieve the GitHub username for.
|
|
89
|
-
cache (Dict): A dictionary containing cached email-GitHub username mappings.
|
|
90
|
-
file_path (str, optional): Name of the file the user authored.
|
|
91
|
-
verbose (bool, optional): Whether to print verbose output.
|
|
92
|
-
|
|
93
|
-
Returns:
|
|
94
|
-
username (str | None): GitHub username if found, None otherwise.
|
|
95
|
-
avatar (str | None): Avatar URL if found, None otherwise.
|
|
96
|
-
|
|
97
|
-
Notes:
|
|
98
|
-
If the email ends with "@users.noreply.github.com", the function will parse the username directly from the
|
|
99
|
-
email address. Uses the GitHub REST API to query the username if it's not found in the local cache. Ensure
|
|
100
|
-
you comply with GitHub's rate limits and authentication requirements when querying their API.
|
|
101
|
-
"""
|
|
102
|
-
# First, check if the email exists in the local cache file
|
|
103
|
-
if email in cache:
|
|
104
|
-
return cache[email].get("username"), cache[email].get("avatar")
|
|
105
|
-
elif not email.strip():
|
|
106
|
-
if verbose:
|
|
107
|
-
print(f"{WARNING} No author found for {file_path}")
|
|
108
|
-
return None, None
|
|
109
|
-
|
|
110
|
-
# If the email ends with "@users.noreply.github.com", parse the username directly
|
|
111
|
-
if email.endswith("@users.noreply.github.com"):
|
|
112
|
-
username = email.split("+")[-1].split("@")[0]
|
|
113
|
-
avatar = f"https://github.com/{username}.png"
|
|
114
|
-
cache[email] = {
|
|
115
|
-
"username": username,
|
|
116
|
-
"avatar": requests.head(avatar, allow_redirects=True).url,
|
|
117
|
-
}
|
|
118
|
-
return username, avatar
|
|
119
|
-
|
|
120
|
-
# If the email is not found in the cache, query GitHub REST API
|
|
121
|
-
url = f"https://api.github.com/search/users?q={email}+in:email&sort=joined&order=asc"
|
|
122
|
-
if verbose:
|
|
123
|
-
print(f"Running GitHub REST API for author {email}")
|
|
124
|
-
response = requests.get(url)
|
|
125
|
-
if response.status_code == 200:
|
|
126
|
-
data = response.json()
|
|
127
|
-
if data["total_count"] > 0:
|
|
128
|
-
username = data["items"][0]["login"]
|
|
129
|
-
avatar = data["items"][0]["avatar_url"] # avatar_url key is correct here
|
|
130
|
-
cache[email] = {
|
|
131
|
-
"username": username,
|
|
132
|
-
"avatar": requests.head(avatar, allow_redirects=True).url,
|
|
133
|
-
}
|
|
134
|
-
return username, avatar
|
|
135
|
-
|
|
136
|
-
if verbose:
|
|
137
|
-
print(f"{WARNING} No username found for {email}")
|
|
138
|
-
cache[email] = {"username": None, "avatar": None}
|
|
139
|
-
return None, None
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
def get_github_usernames_from_file(
|
|
143
|
-
file_path: str,
|
|
144
|
-
default_user: str | None = None,
|
|
145
|
-
emails: dict[str, int] | None = None,
|
|
146
|
-
repo_url: str | None = None,
|
|
147
|
-
) -> dict[str, dict[str, Any]]:
|
|
148
|
-
"""Fetch GitHub usernames associated with a file using provided Git email counts.
|
|
149
|
-
|
|
150
|
-
Args:
|
|
151
|
-
file_path (str): The path to the file for which GitHub usernames are to be retrieved.
|
|
152
|
-
default_user (str, optional): Default GitHub user email to use if no authors found.
|
|
153
|
-
|
|
154
|
-
Returns:
|
|
155
|
-
(Dict[str, Dict[str, any]]): A dictionary where keys are GitHub usernames or emails (if username is not
|
|
156
|
-
found) and values are dictionaries containing:
|
|
157
|
-
- 'email' (str): The email address of the author.
|
|
158
|
-
- 'url' (str): The GitHub profile URL of the author.
|
|
159
|
-
- 'changes' (int): The number of changes (commits) made by the author.
|
|
160
|
-
- 'avatar' (str): The URL of the author's GitHub avatar.
|
|
161
|
-
|
|
162
|
-
Examples:
|
|
163
|
-
>>> print(get_github_usernames_from_file('mkdocs.yml', emails={'user@example.com': 2}))
|
|
164
|
-
{'username1': {'email': 'user@example.com', 'url': 'https://github.com/username1', 'changes': 2, 'avatar': '...'}}
|
|
165
|
-
"""
|
|
166
|
-
if emails is None:
|
|
167
|
-
emails = {}
|
|
168
|
-
else:
|
|
169
|
-
emails = dict(emails) # shallow copy to avoid mutating caller data
|
|
170
|
-
|
|
171
|
-
# If no git info found but default_user provided, use default_user
|
|
172
|
-
if not emails and default_user:
|
|
173
|
-
emails[default_user] = 1
|
|
174
|
-
|
|
175
|
-
# Load the local cache of GitHub usernames
|
|
176
|
-
local_cache_file = Path("docs" if Path("docs").is_dir() else "") / "mkdocs_github_authors.yaml"
|
|
177
|
-
if local_cache_file.is_file():
|
|
178
|
-
with local_cache_file.open("r") as f:
|
|
179
|
-
cache = yaml.safe_load(f) or {}
|
|
180
|
-
else:
|
|
181
|
-
cache = {}
|
|
182
|
-
|
|
183
|
-
github_repo_url = repo_url or "https://github.com/ultralytics/ultralytics"
|
|
184
|
-
|
|
185
|
-
info = {}
|
|
186
|
-
for email, changes in emails.items():
|
|
187
|
-
if not email and default_user:
|
|
188
|
-
email = default_user
|
|
189
|
-
username, avatar = get_github_username_from_email(email, cache, file_path)
|
|
190
|
-
# If we can't determine the user URL, revert to the GitHub file URL
|
|
191
|
-
user_url = f"https://github.com/{username}" if username else github_repo_url
|
|
192
|
-
info[username or email] = {
|
|
193
|
-
"email": email,
|
|
194
|
-
"url": user_url,
|
|
195
|
-
"changes": changes,
|
|
196
|
-
"avatar": avatar or DEFAULT_AVATAR,
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
# Save the local cache of GitHub usernames and avatar URLs
|
|
200
|
-
with local_cache_file.open("w") as f:
|
|
201
|
-
yaml.safe_dump(cache, f)
|
|
202
|
-
|
|
203
|
-
return info
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|