mkdocs-ultralytics-plugin 0.2.3__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/PKG-INFO +1 -1
- {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/PKG-INFO +1 -1
- {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/plugin/__init__.py +1 -1
- {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/plugin/postprocess.py +4 -4
- {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/plugin/processor.py +9 -35
- mkdocs_ultralytics_plugin-0.2.4/plugin/utils.py +223 -0
- mkdocs_ultralytics_plugin-0.2.3/plugin/utils.py +0 -236
- {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/LICENSE +0 -0
- {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/README.md +0 -0
- {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/SOURCES.txt +0 -0
- {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/dependency_links.txt +0 -0
- {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/entry_points.txt +0 -0
- {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/requires.txt +0 -0
- {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/top_level.txt +0 -0
- {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/plugin/main.py +0 -0
- {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/pyproject.toml +0 -0
- {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mkdocs-ultralytics-plugin
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
|
|
5
5
|
Author-email: Glenn Jocher <hello@ultralytics.com>
|
|
6
6
|
Maintainer-email: Ultralytics <hello@ultralytics.com>
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mkdocs-ultralytics-plugin
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
|
|
5
5
|
Author-email: Glenn Jocher <hello@ultralytics.com>
|
|
6
6
|
Maintainer-email: Ultralytics <hello@ultralytics.com>
|
|
@@ -16,6 +16,7 @@ except ImportError:
|
|
|
16
16
|
|
|
17
17
|
import plugin.processor as processor
|
|
18
18
|
from plugin.processor import process_html
|
|
19
|
+
from plugin.utils import resolve_all_authors
|
|
19
20
|
|
|
20
21
|
# Shared worker state for process pools (avoids re-pickling large read-only data per task)
|
|
21
22
|
_WORKER_STATE: dict[str, Any] | None = None
|
|
@@ -37,7 +38,6 @@ def _process_file(html_file: Path) -> bool:
|
|
|
37
38
|
_WORKER_STATE["repo_url"],
|
|
38
39
|
site_url=_WORKER_STATE["site_url"],
|
|
39
40
|
default_image=_WORKER_STATE["default_image"],
|
|
40
|
-
default_author=_WORKER_STATE["default_author"],
|
|
41
41
|
add_desc=_WORKER_STATE["add_desc"],
|
|
42
42
|
add_image=_WORKER_STATE["add_image"],
|
|
43
43
|
add_keywords=_WORKER_STATE["add_keywords"],
|
|
@@ -59,7 +59,6 @@ def process_html_file(
|
|
|
59
59
|
repo_url: str | None,
|
|
60
60
|
site_url: str = "",
|
|
61
61
|
default_image: str | None = None,
|
|
62
|
-
default_author: str | None = None,
|
|
63
62
|
add_desc: bool = True,
|
|
64
63
|
add_image: bool = True,
|
|
65
64
|
add_keywords: bool = True,
|
|
@@ -114,7 +113,6 @@ def process_html_file(
|
|
|
114
113
|
git_data=git_data,
|
|
115
114
|
repo_url=repo_url,
|
|
116
115
|
default_image=default_image,
|
|
117
|
-
default_author=default_author,
|
|
118
116
|
keywords=keywords,
|
|
119
117
|
add_desc=add_desc,
|
|
120
118
|
add_image=add_image,
|
|
@@ -184,6 +182,9 @@ def postprocess_site(
|
|
|
184
182
|
git_data = None
|
|
185
183
|
if (add_authors or add_json_ld) and md_index:
|
|
186
184
|
repo_url, git_data = processor.build_git_map(list(md_index.values()))
|
|
185
|
+
# Resolve all authors ONCE in main process before spawning workers
|
|
186
|
+
# This prevents race conditions when workers try to write to the cache file
|
|
187
|
+
git_data = resolve_all_authors(git_data, default_author=default_author, repo_url=repo_url, verbose=verbose)
|
|
187
188
|
|
|
188
189
|
progress = TQDM(total=len(html_files), desc="Postprocessing", unit="file", disable=not verbose) if TQDM else None
|
|
189
190
|
# Enable logging only for the synchronous path; pools run without per-task log_fn to remain pickle-safe.
|
|
@@ -196,7 +197,6 @@ def postprocess_site(
|
|
|
196
197
|
repo_url=repo_url,
|
|
197
198
|
site_url=site_url,
|
|
198
199
|
default_image=default_image,
|
|
199
|
-
default_author=default_author,
|
|
200
200
|
add_desc=add_desc,
|
|
201
201
|
add_image=add_image,
|
|
202
202
|
add_keywords=add_keywords,
|
|
@@ -13,11 +13,7 @@ from urllib.parse import quote
|
|
|
13
13
|
|
|
14
14
|
from bs4 import BeautifulSoup
|
|
15
15
|
|
|
16
|
-
from plugin.utils import
|
|
17
|
-
calculate_time_difference,
|
|
18
|
-
get_github_usernames_from_file,
|
|
19
|
-
get_youtube_video_ids,
|
|
20
|
-
)
|
|
16
|
+
from plugin.utils import calculate_time_difference, get_youtube_video_ids
|
|
21
17
|
|
|
22
18
|
today = datetime.now()
|
|
23
19
|
DEFAULT_CREATION_DATE = (today - timedelta(days=365)).strftime("%Y-%m-%d %H:%M:%S +0000")
|
|
@@ -30,11 +26,9 @@ CHECK_ICON = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path
|
|
|
30
26
|
def get_git_info(
|
|
31
27
|
file_path: str,
|
|
32
28
|
add_authors: bool = True,
|
|
33
|
-
default_author: str | None = None,
|
|
34
29
|
git_data: dict[str, dict[str, Any]] | None = None,
|
|
35
|
-
repo_url: str | None = None,
|
|
36
30
|
) -> dict[str, Any]:
|
|
37
|
-
"""Retrieve git information (dates +
|
|
31
|
+
"""Retrieve git information (dates + pre-resolved authors) from precomputed git data."""
|
|
38
32
|
file_path = str(Path(file_path).resolve())
|
|
39
33
|
git_info = {
|
|
40
34
|
"creation_date": DEFAULT_CREATION_DATE,
|
|
@@ -45,29 +39,12 @@ def get_git_info(
|
|
|
45
39
|
return git_info
|
|
46
40
|
|
|
47
41
|
cached = git_data[file_path]
|
|
48
|
-
git_info.
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
if add_authors and cached.get("emails"):
|
|
56
|
-
git_info["authors"] = sorted(
|
|
57
|
-
[
|
|
58
|
-
(
|
|
59
|
-
author,
|
|
60
|
-
info["url"],
|
|
61
|
-
info["changes"],
|
|
62
|
-
info["avatar"],
|
|
63
|
-
)
|
|
64
|
-
for author, info in get_github_usernames_from_file(
|
|
65
|
-
file_path, default_user=default_author, emails=cached["emails"], repo_url=repo_url
|
|
66
|
-
).items()
|
|
67
|
-
],
|
|
68
|
-
key=lambda x: x[2],
|
|
69
|
-
reverse=True,
|
|
70
|
-
)
|
|
42
|
+
git_info["creation_date"] = cached.get("creation_date", DEFAULT_CREATION_DATE)
|
|
43
|
+
git_info["last_modified_date"] = cached.get("last_modified_date", DEFAULT_MODIFIED_DATE)
|
|
44
|
+
|
|
45
|
+
# Authors are pre-resolved by resolve_all_authors() in the main process
|
|
46
|
+
if add_authors and "authors" in cached:
|
|
47
|
+
git_info["authors"] = cached["authors"]
|
|
71
48
|
|
|
72
49
|
return git_info
|
|
73
50
|
|
|
@@ -309,7 +286,6 @@ def process_html(
|
|
|
309
286
|
git_data: dict[str, dict[str, Any]] | None = None,
|
|
310
287
|
repo_url: str | None = None,
|
|
311
288
|
default_image: str | None = None,
|
|
312
|
-
default_author: str | None = None,
|
|
313
289
|
keywords: str | None = None,
|
|
314
290
|
add_desc: bool = True,
|
|
315
291
|
add_image: bool = True,
|
|
@@ -493,9 +469,7 @@ def process_html(
|
|
|
493
469
|
needs_git = (add_authors or add_json_ld) and src_path
|
|
494
470
|
|
|
495
471
|
if needs_git:
|
|
496
|
-
git_info = get_git_info(
|
|
497
|
-
src_path, add_authors=add_authors, default_author=default_author, git_data=git_data, repo_url=repo_url
|
|
498
|
-
)
|
|
472
|
+
git_info = get_git_info(src_path, add_authors=add_authors, git_data=git_data)
|
|
499
473
|
|
|
500
474
|
# Only render git footer if we have real git history (not placeholder defaults)
|
|
501
475
|
has_real_git_data = (
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import requests
|
|
11
|
+
import yaml
|
|
12
|
+
|
|
13
|
+
WARNING = "WARNING (mkdocs_ultralytics_plugin):"
|
|
14
|
+
TIMEOUT = 10 # seconds for network requests
|
|
15
|
+
DEFAULT_AVATAR_URL = "https://github.com/github.png"
|
|
16
|
+
_default_avatar_cache: str | None = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_default_avatar() -> str:
|
|
20
|
+
"""Get the default avatar URL, lazily fetching the resolved URL on first call."""
|
|
21
|
+
global _default_avatar_cache
|
|
22
|
+
if _default_avatar_cache is None:
|
|
23
|
+
try:
|
|
24
|
+
_default_avatar_cache = requests.head(DEFAULT_AVATAR_URL, allow_redirects=True, timeout=TIMEOUT).url
|
|
25
|
+
except Exception:
|
|
26
|
+
_default_avatar_cache = DEFAULT_AVATAR_URL # fallback to original URL
|
|
27
|
+
return _default_avatar_cache
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def calculate_time_difference(date_string: str) -> tuple[str, str]:
|
|
31
|
+
"""Calculate the time difference between a given date and the current date in a human-readable format.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
date_string (str): Date and time string in the format "%Y-%m-%d %H:%M:%S %z".
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
difference (str): Time difference in days, months, or years (e.g., "5 days", "2 months", "1 year").
|
|
38
|
+
pretty_date (str): Given date formatted as "Month Day, Year" (e.g., "January 01, 2023").
|
|
39
|
+
|
|
40
|
+
Examples:
|
|
41
|
+
>>> calculate_time_difference("2023-01-01 00:00:00 +0000")
|
|
42
|
+
("5 months", "January 01, 2023")
|
|
43
|
+
"""
|
|
44
|
+
date = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S %z")
|
|
45
|
+
pretty_date = date.strftime("%B %d, %Y")
|
|
46
|
+
now = datetime.now(date.tzinfo)
|
|
47
|
+
diff = now - date
|
|
48
|
+
days = diff.days
|
|
49
|
+
|
|
50
|
+
if days < 30:
|
|
51
|
+
difference = f"{days} day{'s' if days != 1 else ''}"
|
|
52
|
+
elif days < 365:
|
|
53
|
+
months = days // 30
|
|
54
|
+
difference = f"{months} month{'s' if months != 1 else ''}"
|
|
55
|
+
else:
|
|
56
|
+
years = days // 365
|
|
57
|
+
difference = f"{years} year{'s' if years != 1 else ''}"
|
|
58
|
+
return difference, pretty_date
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def get_youtube_video_ids(soup) -> list[str]:
|
|
62
|
+
"""Extract YouTube video IDs from iframe elements present in the provided BeautifulSoup object.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
soup (BeautifulSoup): A BeautifulSoup object containing the HTML content.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
(List[str]): A list containing YouTube video IDs extracted from the HTML content.
|
|
69
|
+
"""
|
|
70
|
+
youtube_ids = []
|
|
71
|
+
iframes = soup.find_all("iframe", src=True)
|
|
72
|
+
for iframe in iframes:
|
|
73
|
+
if match := re.search(r"youtube\.com/embed/([a-zA-Z0-9_-]+)", iframe["src"]):
|
|
74
|
+
youtube_ids.append(match[1])
|
|
75
|
+
return youtube_ids
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _get_cache_file() -> Path:
|
|
79
|
+
"""Get the path to the GitHub author cache file."""
|
|
80
|
+
return Path("docs" if Path("docs").is_dir() else "") / "mkdocs_github_authors.yaml"
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def load_author_cache() -> dict[str, dict[str, str | None]]:
|
|
84
|
+
"""Load the GitHub author cache from disk."""
|
|
85
|
+
cache_file = _get_cache_file()
|
|
86
|
+
try:
|
|
87
|
+
return yaml.safe_load(cache_file.read_text()) or {} if cache_file.is_file() else {}
|
|
88
|
+
except Exception:
|
|
89
|
+
return {}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def save_author_cache(cache: dict[str, dict[str, str | None]]) -> None:
|
|
93
|
+
"""Save the GitHub author cache to disk."""
|
|
94
|
+
try:
|
|
95
|
+
_get_cache_file().write_text(yaml.safe_dump(cache))
|
|
96
|
+
except Exception as e:
|
|
97
|
+
print(f"{WARNING} Failed to save author cache: {e}")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def resolve_github_user(
|
|
101
|
+
email: str, cache: dict[str, dict[str, str | None]], verbose: bool = True
|
|
102
|
+
) -> dict[str, str | None]:
|
|
103
|
+
"""Resolve a single email to GitHub username and avatar, updating cache in-place.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
email (str): The email address to resolve.
|
|
107
|
+
cache (dict): The author cache dict (modified in-place if new entry added).
|
|
108
|
+
verbose (bool): Whether to print API call info.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
dict with 'username' and 'avatar' keys (values may be None if not found).
|
|
112
|
+
"""
|
|
113
|
+
if not email or not email.strip():
|
|
114
|
+
return {"username": None, "avatar": None}
|
|
115
|
+
|
|
116
|
+
# Return cached result if available
|
|
117
|
+
if email in cache:
|
|
118
|
+
return cache[email]
|
|
119
|
+
|
|
120
|
+
# Parse username directly from GitHub noreply emails
|
|
121
|
+
if email.endswith("@users.noreply.github.com"):
|
|
122
|
+
username = email.split("+")[-1].split("@")[0]
|
|
123
|
+
try:
|
|
124
|
+
avatar = requests.head(f"https://github.com/{username}.png", allow_redirects=True, timeout=TIMEOUT).url
|
|
125
|
+
except Exception:
|
|
126
|
+
avatar = None
|
|
127
|
+
cache[email] = {"username": username, "avatar": avatar}
|
|
128
|
+
return cache[email]
|
|
129
|
+
|
|
130
|
+
# Query GitHub REST API
|
|
131
|
+
if verbose:
|
|
132
|
+
print(f"Running GitHub REST API for author {email}")
|
|
133
|
+
try:
|
|
134
|
+
response = requests.get(
|
|
135
|
+
f"https://api.github.com/search/users?q={email}+in:email&sort=joined&order=asc", timeout=TIMEOUT
|
|
136
|
+
)
|
|
137
|
+
if response.status_code == 200:
|
|
138
|
+
data = response.json()
|
|
139
|
+
if data.get("total_count", 0) > 0:
|
|
140
|
+
username = data["items"][0]["login"]
|
|
141
|
+
avatar = requests.head(data["items"][0]["avatar_url"], allow_redirects=True, timeout=TIMEOUT).url
|
|
142
|
+
cache[email] = {"username": username, "avatar": avatar}
|
|
143
|
+
return cache[email]
|
|
144
|
+
except Exception:
|
|
145
|
+
pass
|
|
146
|
+
|
|
147
|
+
if verbose:
|
|
148
|
+
print(f"{WARNING} No username found for {email}")
|
|
149
|
+
cache[email] = {"username": None, "avatar": None}
|
|
150
|
+
return cache[email]
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def resolve_all_authors(
|
|
154
|
+
git_data: dict[str, dict[str, Any]],
|
|
155
|
+
default_author: str | None = None,
|
|
156
|
+
repo_url: str | None = None,
|
|
157
|
+
verbose: bool = True,
|
|
158
|
+
) -> dict[str, dict[str, Any]]:
|
|
159
|
+
"""Pre-resolve all unique emails from git_data to GitHub usernames.
|
|
160
|
+
|
|
161
|
+
This should be called ONCE in the main process before spawning workers. It collects all unique emails, resolves
|
|
162
|
+
them, saves the cache, and returns git_data with 'authors' pre-populated for each file.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
git_data (dict): The git metadata dict from build_git_map().
|
|
166
|
+
default_author (str, optional): Default author email if no git info.
|
|
167
|
+
repo_url (str, optional): Repository URL for fallback links.
|
|
168
|
+
verbose (bool): Whether to print progress info.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
dict: Updated git_data with 'authors' list added to each entry.
|
|
172
|
+
"""
|
|
173
|
+
if not git_data:
|
|
174
|
+
return git_data
|
|
175
|
+
|
|
176
|
+
# Collect all unique emails across all files
|
|
177
|
+
all_emails: set[str] = set()
|
|
178
|
+
for entry in git_data.values():
|
|
179
|
+
all_emails.update(entry.get("emails", {}).keys())
|
|
180
|
+
if default_author:
|
|
181
|
+
all_emails.add(default_author)
|
|
182
|
+
all_emails.discard("")
|
|
183
|
+
|
|
184
|
+
if not all_emails:
|
|
185
|
+
return git_data
|
|
186
|
+
|
|
187
|
+
# Load cache, resolve all emails, save cache (single disk write)
|
|
188
|
+
cache = load_author_cache()
|
|
189
|
+
cache_modified = False
|
|
190
|
+
|
|
191
|
+
for email in sorted(all_emails):
|
|
192
|
+
if email not in cache:
|
|
193
|
+
resolve_github_user(email, cache, verbose=verbose)
|
|
194
|
+
cache_modified = True
|
|
195
|
+
|
|
196
|
+
if cache_modified:
|
|
197
|
+
save_author_cache(cache)
|
|
198
|
+
|
|
199
|
+
# Build authors list for each file entry
|
|
200
|
+
github_repo_url = repo_url or "https://github.com/ultralytics/ultralytics"
|
|
201
|
+
|
|
202
|
+
for file_path, entry in git_data.items():
|
|
203
|
+
emails = entry.get("emails", {})
|
|
204
|
+
if not emails and default_author:
|
|
205
|
+
emails = {default_author: 1}
|
|
206
|
+
|
|
207
|
+
authors = []
|
|
208
|
+
for email, changes in emails.items():
|
|
209
|
+
email = email.strip() if email else ""
|
|
210
|
+
if not email:
|
|
211
|
+
email = default_author or ""
|
|
212
|
+
if not email:
|
|
213
|
+
continue
|
|
214
|
+
info = cache.get(email, {"username": None, "avatar": None})
|
|
215
|
+
username = info.get("username")
|
|
216
|
+
avatar = info.get("avatar") or get_default_avatar()
|
|
217
|
+
user_url = f"https://github.com/{username}" if username else github_repo_url
|
|
218
|
+
authors.append((username or email, user_url, changes, avatar))
|
|
219
|
+
|
|
220
|
+
# Sort by number of changes (descending)
|
|
221
|
+
entry["authors"] = sorted(authors, key=lambda x: x[2], reverse=True)
|
|
222
|
+
|
|
223
|
+
return git_data
|
|
@@ -1,236 +0,0 @@
|
|
|
1
|
-
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import re
|
|
6
|
-
import threading
|
|
7
|
-
from datetime import datetime
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
10
|
-
|
|
11
|
-
import requests
|
|
12
|
-
import yaml # YAML is used for its readability and consistency with MkDocs ecosystem
|
|
13
|
-
from bs4 import BeautifulSoup
|
|
14
|
-
|
|
15
|
-
WARNING = "WARNING (mkdocs_ultralytics_plugin):"
|
|
16
|
-
DEFAULT_AVATAR = requests.head("https://github.com/github.png", allow_redirects=True).url
|
|
17
|
-
|
|
18
|
-
# Shared, thread-safe cache to avoid duplicate API lookups and YAML thrash when running in parallel
|
|
19
|
-
_AUTHOR_CACHE: dict[str, dict[str, str | None]] | None = None
|
|
20
|
-
_AUTHOR_CACHE_MTIME: float | None = None
|
|
21
|
-
_CACHE_LOCK = threading.Lock()
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def calculate_time_difference(date_string: str) -> tuple[str, str]:
|
|
25
|
-
"""Calculate the time difference between a given date and the current date in a human-readable format.
|
|
26
|
-
|
|
27
|
-
Args:
|
|
28
|
-
date_string (str): Date and time string in the format "%Y-%m-%d %H:%M:%S %z".
|
|
29
|
-
|
|
30
|
-
Returns:
|
|
31
|
-
difference (str): Time difference in days, months, or years (e.g., "5 days", "2 months", "1 year").
|
|
32
|
-
pretty_date (str): Given date formatted as "Month Day, Year" (e.g., "January 01, 2023").
|
|
33
|
-
|
|
34
|
-
Examples:
|
|
35
|
-
>>> calculate_time_difference("2023-01-01 00:00:00 +0000")
|
|
36
|
-
("5 months", "January 01, 2023")
|
|
37
|
-
"""
|
|
38
|
-
date = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S %z")
|
|
39
|
-
pretty_date = date.strftime("%B %d, %Y")
|
|
40
|
-
now = datetime.now(date.tzinfo)
|
|
41
|
-
diff = now - date
|
|
42
|
-
days = diff.days
|
|
43
|
-
|
|
44
|
-
if days < 30:
|
|
45
|
-
difference = f"{days} day{'s' if days != 1 else ''}"
|
|
46
|
-
elif days < 365:
|
|
47
|
-
months = days // 30
|
|
48
|
-
difference = f"{months} month{'s' if months != 1 else ''}"
|
|
49
|
-
else:
|
|
50
|
-
years = days // 365
|
|
51
|
-
difference = f"{years} year{'s' if years != 1 else ''}"
|
|
52
|
-
return difference, pretty_date
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def get_youtube_video_ids(soup: BeautifulSoup) -> list[str]:
|
|
56
|
-
"""Extract YouTube video IDs from iframe elements present in the provided BeautifulSoup object.
|
|
57
|
-
|
|
58
|
-
Args:
|
|
59
|
-
soup (BeautifulSoup): A BeautifulSoup object containing the HTML content from which YouTube video IDs need to be
|
|
60
|
-
extracted.
|
|
61
|
-
|
|
62
|
-
Returns:
|
|
63
|
-
(List[str]): A list containing YouTube video IDs extracted from the HTML content.
|
|
64
|
-
|
|
65
|
-
Examples:
|
|
66
|
-
>>> from bs4 import BeautifulSoup
|
|
67
|
-
>>> html_content = '''
|
|
68
|
-
... <html>
|
|
69
|
-
... <body>
|
|
70
|
-
... <iframe src="https://www.youtube.com/embed/example_id1"></iframe>
|
|
71
|
-
... <iframe src="https://www.youtube.com/embed/example_id2"></iframe>
|
|
72
|
-
... </body>
|
|
73
|
-
... </html>
|
|
74
|
-
... '''
|
|
75
|
-
>>> soup = BeautifulSoup(html_content, 'html.parser')
|
|
76
|
-
>>> video_ids = get_youtube_video_ids(soup)
|
|
77
|
-
>>> print(video_ids)
|
|
78
|
-
['example_id1', 'example_id2']
|
|
79
|
-
"""
|
|
80
|
-
youtube_ids = []
|
|
81
|
-
iframes = soup.find_all("iframe", src=True)
|
|
82
|
-
for iframe in iframes:
|
|
83
|
-
if match := re.search(r"youtube\.com/embed/([a-zA-Z0-9_-]+)", iframe["src"]):
|
|
84
|
-
youtube_ids.append(match[1])
|
|
85
|
-
return youtube_ids
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
def get_github_username_from_email(
|
|
89
|
-
email: str, cache: dict, file_path: str = "", verbose: bool = True
|
|
90
|
-
) -> tuple[str | None, str | None]:
|
|
91
|
-
"""Retrieve the GitHub username and avatar URL associated with the given email address.
|
|
92
|
-
|
|
93
|
-
Args:
|
|
94
|
-
email (str): The email address to retrieve the GitHub username for.
|
|
95
|
-
cache (Dict): A dictionary containing cached email-GitHub username mappings.
|
|
96
|
-
file_path (str, optional): Name of the file the user authored.
|
|
97
|
-
verbose (bool, optional): Whether to print verbose output.
|
|
98
|
-
|
|
99
|
-
Returns:
|
|
100
|
-
username (str | None): GitHub username if found, None otherwise.
|
|
101
|
-
avatar (str | None): Avatar URL if found, None otherwise.
|
|
102
|
-
|
|
103
|
-
Notes:
|
|
104
|
-
If the email ends with "@users.noreply.github.com", the function will parse the username directly from the
|
|
105
|
-
email address. Uses the GitHub REST API to query the username if it's not found in the local cache. Ensure
|
|
106
|
-
you comply with GitHub's rate limits and authentication requirements when querying their API.
|
|
107
|
-
"""
|
|
108
|
-
# First, check if the email exists in the local cache file
|
|
109
|
-
with _CACHE_LOCK:
|
|
110
|
-
if email in cache:
|
|
111
|
-
return cache[email].get("username"), cache[email].get("avatar")
|
|
112
|
-
if not email.strip():
|
|
113
|
-
if verbose:
|
|
114
|
-
print(f"{WARNING} No author found for {file_path}")
|
|
115
|
-
return None, None
|
|
116
|
-
|
|
117
|
-
# If the email ends with "@users.noreply.github.com", parse the username directly
|
|
118
|
-
if email.endswith("@users.noreply.github.com"):
|
|
119
|
-
username = email.split("+")[-1].split("@")[0]
|
|
120
|
-
avatar = f"https://github.com/{username}.png"
|
|
121
|
-
avatar_url = requests.head(avatar, allow_redirects=True).url
|
|
122
|
-
with _CACHE_LOCK:
|
|
123
|
-
cache[email] = {
|
|
124
|
-
"username": username,
|
|
125
|
-
"avatar": avatar_url,
|
|
126
|
-
}
|
|
127
|
-
return username, avatar
|
|
128
|
-
|
|
129
|
-
# Fallback to GitHub REST API when not cached
|
|
130
|
-
url = f"https://api.github.com/search/users?q={email}+in:email&sort=joined&order=asc"
|
|
131
|
-
if verbose:
|
|
132
|
-
print(f"Running GitHub REST API for author {email}")
|
|
133
|
-
response = requests.get(url)
|
|
134
|
-
if response.status_code == 200:
|
|
135
|
-
data = response.json()
|
|
136
|
-
if data["total_count"] > 0:
|
|
137
|
-
username = data["items"][0]["login"]
|
|
138
|
-
avatar = data["items"][0]["avatar_url"] # avatar_url key is correct here
|
|
139
|
-
avatar_url = requests.head(avatar, allow_redirects=True).url
|
|
140
|
-
with _CACHE_LOCK:
|
|
141
|
-
cache[email] = {
|
|
142
|
-
"username": username,
|
|
143
|
-
"avatar": avatar_url,
|
|
144
|
-
}
|
|
145
|
-
return username, avatar
|
|
146
|
-
|
|
147
|
-
if verbose:
|
|
148
|
-
print(f"{WARNING} No username found for {email}")
|
|
149
|
-
with _CACHE_LOCK:
|
|
150
|
-
cache[email] = {"username": None, "avatar": None}
|
|
151
|
-
return None, None
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
def get_github_usernames_from_file(
|
|
155
|
-
file_path: str,
|
|
156
|
-
default_user: str | None = None,
|
|
157
|
-
emails: dict[str, int] | None = None,
|
|
158
|
-
repo_url: str | None = None,
|
|
159
|
-
force_reload: bool = False,
|
|
160
|
-
) -> dict[str, dict[str, Any]]:
|
|
161
|
-
"""Fetch GitHub usernames associated with a file using provided Git email counts.
|
|
162
|
-
|
|
163
|
-
Args:
|
|
164
|
-
file_path (str): The path to the file for which GitHub usernames are to be retrieved.
|
|
165
|
-
default_user (str, optional): Default GitHub user email to use if no authors found.
|
|
166
|
-
|
|
167
|
-
Returns:
|
|
168
|
-
(Dict[str, Dict[str, any]]): A dictionary where keys are GitHub usernames or emails (if username is not
|
|
169
|
-
found) and values are dictionaries containing:
|
|
170
|
-
- 'email' (str): The email address of the author.
|
|
171
|
-
- 'url' (str): The GitHub profile URL of the author.
|
|
172
|
-
- 'changes' (int): The number of changes (commits) made by the author.
|
|
173
|
-
- 'avatar' (str): The URL of the author's GitHub avatar.
|
|
174
|
-
|
|
175
|
-
Examples:
|
|
176
|
-
>>> print(get_github_usernames_from_file('mkdocs.yml', emails={'user@example.com': 2}))
|
|
177
|
-
{'username1': {'email': 'user@example.com', 'url': 'https://github.com/username1', 'changes': 2, 'avatar': '...'}}
|
|
178
|
-
"""
|
|
179
|
-
if emails is None:
|
|
180
|
-
emails = {}
|
|
181
|
-
else:
|
|
182
|
-
emails = dict(emails) # shallow copy to avoid mutating caller data
|
|
183
|
-
|
|
184
|
-
# If no git info found but default_user provided, use default_user
|
|
185
|
-
if not emails and default_user:
|
|
186
|
-
emails[default_user] = 1
|
|
187
|
-
|
|
188
|
-
# Load the local cache of GitHub usernames once per process (thread-safe, reload if changed)
|
|
189
|
-
local_cache_file = Path("docs" if Path("docs").is_dir() else "") / "mkdocs_github_authors.yaml"
|
|
190
|
-
global _AUTHOR_CACHE, _AUTHOR_CACHE_MTIME
|
|
191
|
-
with _CACHE_LOCK:
|
|
192
|
-
current_mtime = local_cache_file.stat().st_mtime if local_cache_file.is_file() else None
|
|
193
|
-
needs_reload = (
|
|
194
|
-
force_reload
|
|
195
|
-
or _AUTHOR_CACHE is None
|
|
196
|
-
or (_AUTHOR_CACHE_MTIME is not None and current_mtime is not None and _AUTHOR_CACHE_MTIME != current_mtime)
|
|
197
|
-
)
|
|
198
|
-
if needs_reload:
|
|
199
|
-
if local_cache_file.is_file():
|
|
200
|
-
with local_cache_file.open("r") as f:
|
|
201
|
-
_AUTHOR_CACHE = yaml.safe_load(f) or {}
|
|
202
|
-
_AUTHOR_CACHE_MTIME = local_cache_file.stat().st_mtime
|
|
203
|
-
else:
|
|
204
|
-
_AUTHOR_CACHE = {}
|
|
205
|
-
_AUTHOR_CACHE_MTIME = None
|
|
206
|
-
cache = _AUTHOR_CACHE
|
|
207
|
-
|
|
208
|
-
github_repo_url = repo_url or "https://github.com/ultralytics/ultralytics"
|
|
209
|
-
|
|
210
|
-
info = {}
|
|
211
|
-
cache_updated = False
|
|
212
|
-
for email, changes in emails.items():
|
|
213
|
-
if not email and default_user:
|
|
214
|
-
email = default_user
|
|
215
|
-
was_cached = email in cache
|
|
216
|
-
prev_entry = cache.get(email)
|
|
217
|
-
username, avatar = get_github_username_from_email(email, cache, file_path)
|
|
218
|
-
# If we can't determine the user URL, revert to the GitHub file URL
|
|
219
|
-
user_url = f"https://github.com/{username}" if username else github_repo_url
|
|
220
|
-
info[username or email] = {
|
|
221
|
-
"email": email,
|
|
222
|
-
"url": user_url,
|
|
223
|
-
"changes": changes,
|
|
224
|
-
"avatar": avatar or DEFAULT_AVATAR,
|
|
225
|
-
}
|
|
226
|
-
cache_updated = cache_updated or (email in cache and not was_cached) or cache.get(email) != prev_entry
|
|
227
|
-
|
|
228
|
-
# Save the local cache of GitHub usernames and avatar URLs if updated
|
|
229
|
-
if cache_updated:
|
|
230
|
-
with _CACHE_LOCK:
|
|
231
|
-
_AUTHOR_CACHE = cache
|
|
232
|
-
with local_cache_file.open("w") as f:
|
|
233
|
-
yaml.safe_dump(cache, f)
|
|
234
|
-
_AUTHOR_CACHE_MTIME = local_cache_file.stat().st_mtime
|
|
235
|
-
|
|
236
|
-
return info
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|