mkdocs-ultralytics-plugin 0.2.3__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (17) hide show
  1. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/PKG-INFO +1 -1
  2. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/PKG-INFO +1 -1
  3. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/plugin/__init__.py +1 -1
  4. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/plugin/postprocess.py +4 -4
  5. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/plugin/processor.py +9 -35
  6. mkdocs_ultralytics_plugin-0.2.4/plugin/utils.py +223 -0
  7. mkdocs_ultralytics_plugin-0.2.3/plugin/utils.py +0 -236
  8. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/LICENSE +0 -0
  9. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/README.md +0 -0
  10. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/SOURCES.txt +0 -0
  11. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/dependency_links.txt +0 -0
  12. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/entry_points.txt +0 -0
  13. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/requires.txt +0 -0
  14. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/top_level.txt +0 -0
  15. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/plugin/main.py +0 -0
  16. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/pyproject.toml +0 -0
  17. {mkdocs_ultralytics_plugin-0.2.3 → mkdocs_ultralytics_plugin-0.2.4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mkdocs-ultralytics-plugin
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
5
5
  Author-email: Glenn Jocher <hello@ultralytics.com>
6
6
  Maintainer-email: Ultralytics <hello@ultralytics.com>
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mkdocs-ultralytics-plugin
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
5
5
  Author-email: Glenn Jocher <hello@ultralytics.com>
6
6
  Maintainer-email: Ultralytics <hello@ultralytics.com>
@@ -1,6 +1,6 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
- __version__ = "0.2.3"
3
+ __version__ = "0.2.4"
4
4
 
5
5
  from .main import MetaPlugin
6
6
  from .postprocess import postprocess_site
@@ -16,6 +16,7 @@ except ImportError:
16
16
 
17
17
  import plugin.processor as processor
18
18
  from plugin.processor import process_html
19
+ from plugin.utils import resolve_all_authors
19
20
 
20
21
  # Shared worker state for process pools (avoids re-pickling large read-only data per task)
21
22
  _WORKER_STATE: dict[str, Any] | None = None
@@ -37,7 +38,6 @@ def _process_file(html_file: Path) -> bool:
37
38
  _WORKER_STATE["repo_url"],
38
39
  site_url=_WORKER_STATE["site_url"],
39
40
  default_image=_WORKER_STATE["default_image"],
40
- default_author=_WORKER_STATE["default_author"],
41
41
  add_desc=_WORKER_STATE["add_desc"],
42
42
  add_image=_WORKER_STATE["add_image"],
43
43
  add_keywords=_WORKER_STATE["add_keywords"],
@@ -59,7 +59,6 @@ def process_html_file(
59
59
  repo_url: str | None,
60
60
  site_url: str = "",
61
61
  default_image: str | None = None,
62
- default_author: str | None = None,
63
62
  add_desc: bool = True,
64
63
  add_image: bool = True,
65
64
  add_keywords: bool = True,
@@ -114,7 +113,6 @@ def process_html_file(
114
113
  git_data=git_data,
115
114
  repo_url=repo_url,
116
115
  default_image=default_image,
117
- default_author=default_author,
118
116
  keywords=keywords,
119
117
  add_desc=add_desc,
120
118
  add_image=add_image,
@@ -184,6 +182,9 @@ def postprocess_site(
184
182
  git_data = None
185
183
  if (add_authors or add_json_ld) and md_index:
186
184
  repo_url, git_data = processor.build_git_map(list(md_index.values()))
185
+ # Resolve all authors ONCE in main process before spawning workers
186
+ # This prevents race conditions when workers try to write to the cache file
187
+ git_data = resolve_all_authors(git_data, default_author=default_author, repo_url=repo_url, verbose=verbose)
187
188
 
188
189
  progress = TQDM(total=len(html_files), desc="Postprocessing", unit="file", disable=not verbose) if TQDM else None
189
190
  # Enable logging only for the synchronous path; pools run without per-task log_fn to remain pickle-safe.
@@ -196,7 +197,6 @@ def postprocess_site(
196
197
  repo_url=repo_url,
197
198
  site_url=site_url,
198
199
  default_image=default_image,
199
- default_author=default_author,
200
200
  add_desc=add_desc,
201
201
  add_image=add_image,
202
202
  add_keywords=add_keywords,
@@ -13,11 +13,7 @@ from urllib.parse import quote
13
13
 
14
14
  from bs4 import BeautifulSoup
15
15
 
16
- from plugin.utils import (
17
- calculate_time_difference,
18
- get_github_usernames_from_file,
19
- get_youtube_video_ids,
20
- )
16
+ from plugin.utils import calculate_time_difference, get_youtube_video_ids
21
17
 
22
18
  today = datetime.now()
23
19
  DEFAULT_CREATION_DATE = (today - timedelta(days=365)).strftime("%Y-%m-%d %H:%M:%S +0000")
@@ -30,11 +26,9 @@ CHECK_ICON = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path
30
26
  def get_git_info(
31
27
  file_path: str,
32
28
  add_authors: bool = True,
33
- default_author: str | None = None,
34
29
  git_data: dict[str, dict[str, Any]] | None = None,
35
- repo_url: str | None = None,
36
30
  ) -> dict[str, Any]:
37
- """Retrieve git information (dates + optional authors) from precomputed git data."""
31
+ """Retrieve git information (dates + pre-resolved authors) from precomputed git data."""
38
32
  file_path = str(Path(file_path).resolve())
39
33
  git_info = {
40
34
  "creation_date": DEFAULT_CREATION_DATE,
@@ -45,29 +39,12 @@ def get_git_info(
45
39
  return git_info
46
40
 
47
41
  cached = git_data[file_path]
48
- git_info.update(
49
- {
50
- "creation_date": cached.get("creation_date", DEFAULT_CREATION_DATE),
51
- "last_modified_date": cached.get("last_modified_date", DEFAULT_MODIFIED_DATE),
52
- }
53
- )
54
-
55
- if add_authors and cached.get("emails"):
56
- git_info["authors"] = sorted(
57
- [
58
- (
59
- author,
60
- info["url"],
61
- info["changes"],
62
- info["avatar"],
63
- )
64
- for author, info in get_github_usernames_from_file(
65
- file_path, default_user=default_author, emails=cached["emails"], repo_url=repo_url
66
- ).items()
67
- ],
68
- key=lambda x: x[2],
69
- reverse=True,
70
- )
42
+ git_info["creation_date"] = cached.get("creation_date", DEFAULT_CREATION_DATE)
43
+ git_info["last_modified_date"] = cached.get("last_modified_date", DEFAULT_MODIFIED_DATE)
44
+
45
+ # Authors are pre-resolved by resolve_all_authors() in the main process
46
+ if add_authors and "authors" in cached:
47
+ git_info["authors"] = cached["authors"]
71
48
 
72
49
  return git_info
73
50
 
@@ -309,7 +286,6 @@ def process_html(
309
286
  git_data: dict[str, dict[str, Any]] | None = None,
310
287
  repo_url: str | None = None,
311
288
  default_image: str | None = None,
312
- default_author: str | None = None,
313
289
  keywords: str | None = None,
314
290
  add_desc: bool = True,
315
291
  add_image: bool = True,
@@ -493,9 +469,7 @@ def process_html(
493
469
  needs_git = (add_authors or add_json_ld) and src_path
494
470
 
495
471
  if needs_git:
496
- git_info = get_git_info(
497
- src_path, add_authors=add_authors, default_author=default_author, git_data=git_data, repo_url=repo_url
498
- )
472
+ git_info = get_git_info(src_path, add_authors=add_authors, git_data=git_data)
499
473
 
500
474
  # Only render git footer if we have real git history (not placeholder defaults)
501
475
  has_real_git_data = (
@@ -0,0 +1,223 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import requests
11
+ import yaml
12
+
13
+ WARNING = "WARNING (mkdocs_ultralytics_plugin):"
14
+ TIMEOUT = 10 # seconds for network requests
15
+ DEFAULT_AVATAR_URL = "https://github.com/github.png"
16
+ _default_avatar_cache: str | None = None
17
+
18
+
19
+ def get_default_avatar() -> str:
20
+ """Get the default avatar URL, lazily fetching the resolved URL on first call."""
21
+ global _default_avatar_cache
22
+ if _default_avatar_cache is None:
23
+ try:
24
+ _default_avatar_cache = requests.head(DEFAULT_AVATAR_URL, allow_redirects=True, timeout=TIMEOUT).url
25
+ except Exception:
26
+ _default_avatar_cache = DEFAULT_AVATAR_URL # fallback to original URL
27
+ return _default_avatar_cache
28
+
29
+
30
+ def calculate_time_difference(date_string: str) -> tuple[str, str]:
31
+ """Calculate the time difference between a given date and the current date in a human-readable format.
32
+
33
+ Args:
34
+ date_string (str): Date and time string in the format "%Y-%m-%d %H:%M:%S %z".
35
+
36
+ Returns:
37
+ difference (str): Time difference in days, months, or years (e.g., "5 days", "2 months", "1 year").
38
+ pretty_date (str): Given date formatted as "Month Day, Year" (e.g., "January 01, 2023").
39
+
40
+ Examples:
41
+ >>> calculate_time_difference("2023-01-01 00:00:00 +0000")
42
+ ("5 months", "January 01, 2023")
43
+ """
44
+ date = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S %z")
45
+ pretty_date = date.strftime("%B %d, %Y")
46
+ now = datetime.now(date.tzinfo)
47
+ diff = now - date
48
+ days = diff.days
49
+
50
+ if days < 30:
51
+ difference = f"{days} day{'s' if days != 1 else ''}"
52
+ elif days < 365:
53
+ months = days // 30
54
+ difference = f"{months} month{'s' if months != 1 else ''}"
55
+ else:
56
+ years = days // 365
57
+ difference = f"{years} year{'s' if years != 1 else ''}"
58
+ return difference, pretty_date
59
+
60
+
61
+ def get_youtube_video_ids(soup) -> list[str]:
62
+ """Extract YouTube video IDs from iframe elements present in the provided BeautifulSoup object.
63
+
64
+ Args:
65
+ soup (BeautifulSoup): A BeautifulSoup object containing the HTML content.
66
+
67
+ Returns:
68
+ (List[str]): A list containing YouTube video IDs extracted from the HTML content.
69
+ """
70
+ youtube_ids = []
71
+ iframes = soup.find_all("iframe", src=True)
72
+ for iframe in iframes:
73
+ if match := re.search(r"youtube\.com/embed/([a-zA-Z0-9_-]+)", iframe["src"]):
74
+ youtube_ids.append(match[1])
75
+ return youtube_ids
76
+
77
+
78
+ def _get_cache_file() -> Path:
79
+ """Get the path to the GitHub author cache file."""
80
+ return Path("docs" if Path("docs").is_dir() else "") / "mkdocs_github_authors.yaml"
81
+
82
+
83
+ def load_author_cache() -> dict[str, dict[str, str | None]]:
84
+ """Load the GitHub author cache from disk."""
85
+ cache_file = _get_cache_file()
86
+ try:
87
+ return yaml.safe_load(cache_file.read_text()) or {} if cache_file.is_file() else {}
88
+ except Exception:
89
+ return {}
90
+
91
+
92
+ def save_author_cache(cache: dict[str, dict[str, str | None]]) -> None:
93
+ """Save the GitHub author cache to disk."""
94
+ try:
95
+ _get_cache_file().write_text(yaml.safe_dump(cache))
96
+ except Exception as e:
97
+ print(f"{WARNING} Failed to save author cache: {e}")
98
+
99
+
100
+ def resolve_github_user(
101
+ email: str, cache: dict[str, dict[str, str | None]], verbose: bool = True
102
+ ) -> dict[str, str | None]:
103
+ """Resolve a single email to GitHub username and avatar, updating cache in-place.
104
+
105
+ Args:
106
+ email (str): The email address to resolve.
107
+ cache (dict): The author cache dict (modified in-place if new entry added).
108
+ verbose (bool): Whether to print API call info.
109
+
110
+ Returns:
111
+ dict with 'username' and 'avatar' keys (values may be None if not found).
112
+ """
113
+ if not email or not email.strip():
114
+ return {"username": None, "avatar": None}
115
+
116
+ # Return cached result if available
117
+ if email in cache:
118
+ return cache[email]
119
+
120
+ # Parse username directly from GitHub noreply emails
121
+ if email.endswith("@users.noreply.github.com"):
122
+ username = email.split("+")[-1].split("@")[0]
123
+ try:
124
+ avatar = requests.head(f"https://github.com/{username}.png", allow_redirects=True, timeout=TIMEOUT).url
125
+ except Exception:
126
+ avatar = None
127
+ cache[email] = {"username": username, "avatar": avatar}
128
+ return cache[email]
129
+
130
+ # Query GitHub REST API
131
+ if verbose:
132
+ print(f"Running GitHub REST API for author {email}")
133
+ try:
134
+ response = requests.get(
135
+ f"https://api.github.com/search/users?q={email}+in:email&sort=joined&order=asc", timeout=TIMEOUT
136
+ )
137
+ if response.status_code == 200:
138
+ data = response.json()
139
+ if data.get("total_count", 0) > 0:
140
+ username = data["items"][0]["login"]
141
+ avatar = requests.head(data["items"][0]["avatar_url"], allow_redirects=True, timeout=TIMEOUT).url
142
+ cache[email] = {"username": username, "avatar": avatar}
143
+ return cache[email]
144
+ except Exception:
145
+ pass
146
+
147
+ if verbose:
148
+ print(f"{WARNING} No username found for {email}")
149
+ cache[email] = {"username": None, "avatar": None}
150
+ return cache[email]
151
+
152
+
153
+ def resolve_all_authors(
154
+ git_data: dict[str, dict[str, Any]],
155
+ default_author: str | None = None,
156
+ repo_url: str | None = None,
157
+ verbose: bool = True,
158
+ ) -> dict[str, dict[str, Any]]:
159
+ """Pre-resolve all unique emails from git_data to GitHub usernames.
160
+
161
+ This should be called ONCE in the main process before spawning workers. It collects all unique emails, resolves
162
+ them, saves the cache, and returns git_data with 'authors' pre-populated for each file.
163
+
164
+ Args:
165
+ git_data (dict): The git metadata dict from build_git_map().
166
+ default_author (str, optional): Default author email if no git info.
167
+ repo_url (str, optional): Repository URL for fallback links.
168
+ verbose (bool): Whether to print progress info.
169
+
170
+ Returns:
171
+ dict: Updated git_data with 'authors' list added to each entry.
172
+ """
173
+ if not git_data:
174
+ return git_data
175
+
176
+ # Collect all unique emails across all files
177
+ all_emails: set[str] = set()
178
+ for entry in git_data.values():
179
+ all_emails.update(entry.get("emails", {}).keys())
180
+ if default_author:
181
+ all_emails.add(default_author)
182
+ all_emails.discard("")
183
+
184
+ if not all_emails:
185
+ return git_data
186
+
187
+ # Load cache, resolve all emails, save cache (single disk write)
188
+ cache = load_author_cache()
189
+ cache_modified = False
190
+
191
+ for email in sorted(all_emails):
192
+ if email not in cache:
193
+ resolve_github_user(email, cache, verbose=verbose)
194
+ cache_modified = True
195
+
196
+ if cache_modified:
197
+ save_author_cache(cache)
198
+
199
+ # Build authors list for each file entry
200
+ github_repo_url = repo_url or "https://github.com/ultralytics/ultralytics"
201
+
202
+ for file_path, entry in git_data.items():
203
+ emails = entry.get("emails", {})
204
+ if not emails and default_author:
205
+ emails = {default_author: 1}
206
+
207
+ authors = []
208
+ for email, changes in emails.items():
209
+ email = email.strip() if email else ""
210
+ if not email:
211
+ email = default_author or ""
212
+ if not email:
213
+ continue
214
+ info = cache.get(email, {"username": None, "avatar": None})
215
+ username = info.get("username")
216
+ avatar = info.get("avatar") or get_default_avatar()
217
+ user_url = f"https://github.com/{username}" if username else github_repo_url
218
+ authors.append((username or email, user_url, changes, avatar))
219
+
220
+ # Sort by number of changes (descending)
221
+ entry["authors"] = sorted(authors, key=lambda x: x[2], reverse=True)
222
+
223
+ return git_data
@@ -1,236 +0,0 @@
1
- # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
-
3
- from __future__ import annotations
4
-
5
- import re
6
- import threading
7
- from datetime import datetime
8
- from pathlib import Path
9
- from typing import Any
10
-
11
- import requests
12
- import yaml # YAML is used for its readability and consistency with MkDocs ecosystem
13
- from bs4 import BeautifulSoup
14
-
15
- WARNING = "WARNING (mkdocs_ultralytics_plugin):"
16
- DEFAULT_AVATAR = requests.head("https://github.com/github.png", allow_redirects=True).url
17
-
18
- # Shared, thread-safe cache to avoid duplicate API lookups and YAML thrash when running in parallel
19
- _AUTHOR_CACHE: dict[str, dict[str, str | None]] | None = None
20
- _AUTHOR_CACHE_MTIME: float | None = None
21
- _CACHE_LOCK = threading.Lock()
22
-
23
-
24
- def calculate_time_difference(date_string: str) -> tuple[str, str]:
25
- """Calculate the time difference between a given date and the current date in a human-readable format.
26
-
27
- Args:
28
- date_string (str): Date and time string in the format "%Y-%m-%d %H:%M:%S %z".
29
-
30
- Returns:
31
- difference (str): Time difference in days, months, or years (e.g., "5 days", "2 months", "1 year").
32
- pretty_date (str): Given date formatted as "Month Day, Year" (e.g., "January 01, 2023").
33
-
34
- Examples:
35
- >>> calculate_time_difference("2023-01-01 00:00:00 +0000")
36
- ("5 months", "January 01, 2023")
37
- """
38
- date = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S %z")
39
- pretty_date = date.strftime("%B %d, %Y")
40
- now = datetime.now(date.tzinfo)
41
- diff = now - date
42
- days = diff.days
43
-
44
- if days < 30:
45
- difference = f"{days} day{'s' if days != 1 else ''}"
46
- elif days < 365:
47
- months = days // 30
48
- difference = f"{months} month{'s' if months != 1 else ''}"
49
- else:
50
- years = days // 365
51
- difference = f"{years} year{'s' if years != 1 else ''}"
52
- return difference, pretty_date
53
-
54
-
55
- def get_youtube_video_ids(soup: BeautifulSoup) -> list[str]:
56
- """Extract YouTube video IDs from iframe elements present in the provided BeautifulSoup object.
57
-
58
- Args:
59
- soup (BeautifulSoup): A BeautifulSoup object containing the HTML content from which YouTube video IDs need to be
60
- extracted.
61
-
62
- Returns:
63
- (List[str]): A list containing YouTube video IDs extracted from the HTML content.
64
-
65
- Examples:
66
- >>> from bs4 import BeautifulSoup
67
- >>> html_content = '''
68
- ... <html>
69
- ... <body>
70
- ... <iframe src="https://www.youtube.com/embed/example_id1"></iframe>
71
- ... <iframe src="https://www.youtube.com/embed/example_id2"></iframe>
72
- ... </body>
73
- ... </html>
74
- ... '''
75
- >>> soup = BeautifulSoup(html_content, 'html.parser')
76
- >>> video_ids = get_youtube_video_ids(soup)
77
- >>> print(video_ids)
78
- ['example_id1', 'example_id2']
79
- """
80
- youtube_ids = []
81
- iframes = soup.find_all("iframe", src=True)
82
- for iframe in iframes:
83
- if match := re.search(r"youtube\.com/embed/([a-zA-Z0-9_-]+)", iframe["src"]):
84
- youtube_ids.append(match[1])
85
- return youtube_ids
86
-
87
-
88
- def get_github_username_from_email(
89
- email: str, cache: dict, file_path: str = "", verbose: bool = True
90
- ) -> tuple[str | None, str | None]:
91
- """Retrieve the GitHub username and avatar URL associated with the given email address.
92
-
93
- Args:
94
- email (str): The email address to retrieve the GitHub username for.
95
- cache (Dict): A dictionary containing cached email-GitHub username mappings.
96
- file_path (str, optional): Name of the file the user authored.
97
- verbose (bool, optional): Whether to print verbose output.
98
-
99
- Returns:
100
- username (str | None): GitHub username if found, None otherwise.
101
- avatar (str | None): Avatar URL if found, None otherwise.
102
-
103
- Notes:
104
- If the email ends with "@users.noreply.github.com", the function will parse the username directly from the
105
- email address. Uses the GitHub REST API to query the username if it's not found in the local cache. Ensure
106
- you comply with GitHub's rate limits and authentication requirements when querying their API.
107
- """
108
- # First, check if the email exists in the local cache file
109
- with _CACHE_LOCK:
110
- if email in cache:
111
- return cache[email].get("username"), cache[email].get("avatar")
112
- if not email.strip():
113
- if verbose:
114
- print(f"{WARNING} No author found for {file_path}")
115
- return None, None
116
-
117
- # If the email ends with "@users.noreply.github.com", parse the username directly
118
- if email.endswith("@users.noreply.github.com"):
119
- username = email.split("+")[-1].split("@")[0]
120
- avatar = f"https://github.com/{username}.png"
121
- avatar_url = requests.head(avatar, allow_redirects=True).url
122
- with _CACHE_LOCK:
123
- cache[email] = {
124
- "username": username,
125
- "avatar": avatar_url,
126
- }
127
- return username, avatar
128
-
129
- # Fallback to GitHub REST API when not cached
130
- url = f"https://api.github.com/search/users?q={email}+in:email&sort=joined&order=asc"
131
- if verbose:
132
- print(f"Running GitHub REST API for author {email}")
133
- response = requests.get(url)
134
- if response.status_code == 200:
135
- data = response.json()
136
- if data["total_count"] > 0:
137
- username = data["items"][0]["login"]
138
- avatar = data["items"][0]["avatar_url"] # avatar_url key is correct here
139
- avatar_url = requests.head(avatar, allow_redirects=True).url
140
- with _CACHE_LOCK:
141
- cache[email] = {
142
- "username": username,
143
- "avatar": avatar_url,
144
- }
145
- return username, avatar
146
-
147
- if verbose:
148
- print(f"{WARNING} No username found for {email}")
149
- with _CACHE_LOCK:
150
- cache[email] = {"username": None, "avatar": None}
151
- return None, None
152
-
153
-
154
- def get_github_usernames_from_file(
155
- file_path: str,
156
- default_user: str | None = None,
157
- emails: dict[str, int] | None = None,
158
- repo_url: str | None = None,
159
- force_reload: bool = False,
160
- ) -> dict[str, dict[str, Any]]:
161
- """Fetch GitHub usernames associated with a file using provided Git email counts.
162
-
163
- Args:
164
- file_path (str): The path to the file for which GitHub usernames are to be retrieved.
165
- default_user (str, optional): Default GitHub user email to use if no authors found.
166
-
167
- Returns:
168
- (Dict[str, Dict[str, any]]): A dictionary where keys are GitHub usernames or emails (if username is not
169
- found) and values are dictionaries containing:
170
- - 'email' (str): The email address of the author.
171
- - 'url' (str): The GitHub profile URL of the author.
172
- - 'changes' (int): The number of changes (commits) made by the author.
173
- - 'avatar' (str): The URL of the author's GitHub avatar.
174
-
175
- Examples:
176
- >>> print(get_github_usernames_from_file('mkdocs.yml', emails={'user@example.com': 2}))
177
- {'username1': {'email': 'user@example.com', 'url': 'https://github.com/username1', 'changes': 2, 'avatar': '...'}}
178
- """
179
- if emails is None:
180
- emails = {}
181
- else:
182
- emails = dict(emails) # shallow copy to avoid mutating caller data
183
-
184
- # If no git info found but default_user provided, use default_user
185
- if not emails and default_user:
186
- emails[default_user] = 1
187
-
188
- # Load the local cache of GitHub usernames once per process (thread-safe, reload if changed)
189
- local_cache_file = Path("docs" if Path("docs").is_dir() else "") / "mkdocs_github_authors.yaml"
190
- global _AUTHOR_CACHE, _AUTHOR_CACHE_MTIME
191
- with _CACHE_LOCK:
192
- current_mtime = local_cache_file.stat().st_mtime if local_cache_file.is_file() else None
193
- needs_reload = (
194
- force_reload
195
- or _AUTHOR_CACHE is None
196
- or (_AUTHOR_CACHE_MTIME is not None and current_mtime is not None and _AUTHOR_CACHE_MTIME != current_mtime)
197
- )
198
- if needs_reload:
199
- if local_cache_file.is_file():
200
- with local_cache_file.open("r") as f:
201
- _AUTHOR_CACHE = yaml.safe_load(f) or {}
202
- _AUTHOR_CACHE_MTIME = local_cache_file.stat().st_mtime
203
- else:
204
- _AUTHOR_CACHE = {}
205
- _AUTHOR_CACHE_MTIME = None
206
- cache = _AUTHOR_CACHE
207
-
208
- github_repo_url = repo_url or "https://github.com/ultralytics/ultralytics"
209
-
210
- info = {}
211
- cache_updated = False
212
- for email, changes in emails.items():
213
- if not email and default_user:
214
- email = default_user
215
- was_cached = email in cache
216
- prev_entry = cache.get(email)
217
- username, avatar = get_github_username_from_email(email, cache, file_path)
218
- # If we can't determine the user URL, revert to the GitHub file URL
219
- user_url = f"https://github.com/{username}" if username else github_repo_url
220
- info[username or email] = {
221
- "email": email,
222
- "url": user_url,
223
- "changes": changes,
224
- "avatar": avatar or DEFAULT_AVATAR,
225
- }
226
- cache_updated = cache_updated or (email in cache and not was_cached) or cache.get(email) != prev_entry
227
-
228
- # Save the local cache of GitHub usernames and avatar URLs if updated
229
- if cache_updated:
230
- with _CACHE_LOCK:
231
- _AUTHOR_CACHE = cache
232
- with local_cache_file.open("w") as f:
233
- yaml.safe_dump(cache, f)
234
- _AUTHOR_CACHE_MTIME = local_cache_file.stat().st_mtime
235
-
236
- return info