mkdocs-ultralytics-plugin 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (17) hide show
  1. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/PKG-INFO +1 -1
  2. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/PKG-INFO +1 -1
  3. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/plugin/__init__.py +1 -1
  4. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/plugin/postprocess.py +109 -34
  5. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/plugin/processor.py +9 -35
  6. mkdocs_ultralytics_plugin-0.2.4/plugin/utils.py +223 -0
  7. mkdocs_ultralytics_plugin-0.2.2/plugin/utils.py +0 -203
  8. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/LICENSE +0 -0
  9. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/README.md +0 -0
  10. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/SOURCES.txt +0 -0
  11. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/dependency_links.txt +0 -0
  12. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/entry_points.txt +0 -0
  13. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/requires.txt +0 -0
  14. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/mkdocs_ultralytics_plugin.egg-info/top_level.txt +0 -0
  15. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/plugin/main.py +0 -0
  16. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/pyproject.toml +0 -0
  17. {mkdocs_ultralytics_plugin-0.2.2 → mkdocs_ultralytics_plugin-0.2.4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mkdocs-ultralytics-plugin
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
5
5
  Author-email: Glenn Jocher <hello@ultralytics.com>
6
6
  Maintainer-email: Ultralytics <hello@ultralytics.com>
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mkdocs-ultralytics-plugin
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: An MkDocs plugin that provides Ultralytics Docs customizations at https://docs.ultralytics.com.
5
5
  Author-email: Glenn Jocher <hello@ultralytics.com>
6
6
  Maintainer-email: Ultralytics <hello@ultralytics.com>
@@ -1,6 +1,6 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
- __version__ = "0.2.2"
3
+ __version__ = "0.2.4"
4
4
 
5
5
  from .main import MetaPlugin
6
6
  from .postprocess import postprocess_site
@@ -3,8 +3,11 @@
3
3
 
4
4
  from __future__ import annotations
5
5
 
6
+ import os
6
7
  from collections.abc import Callable
8
+ from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
7
9
  from pathlib import Path
10
+ from typing import Any
8
11
 
9
12
  try:
10
13
  from ultralytics.utils import TQDM # progress bars
@@ -13,6 +16,39 @@ except ImportError:
13
16
 
14
17
  import plugin.processor as processor
15
18
  from plugin.processor import process_html
19
+ from plugin.utils import resolve_all_authors
20
+
21
+ # Shared worker state for process pools (avoids re-pickling large read-only data per task)
22
+ _WORKER_STATE: dict[str, Any] | None = None
23
+
24
+
25
+ def _set_worker_state(state: dict[str, Any]) -> None:
26
+ global _WORKER_STATE
27
+ _WORKER_STATE = state
28
+
29
+
30
+ def _process_file(html_file: Path) -> bool:
31
+ if _WORKER_STATE is None:
32
+ raise RuntimeError("Worker state not initialized")
33
+ return process_html_file(
34
+ html_file,
35
+ _WORKER_STATE["site_dir"],
36
+ _WORKER_STATE["md_index"],
37
+ _WORKER_STATE["git_data"],
38
+ _WORKER_STATE["repo_url"],
39
+ site_url=_WORKER_STATE["site_url"],
40
+ default_image=_WORKER_STATE["default_image"],
41
+ add_desc=_WORKER_STATE["add_desc"],
42
+ add_image=_WORKER_STATE["add_image"],
43
+ add_keywords=_WORKER_STATE["add_keywords"],
44
+ add_share_buttons=_WORKER_STATE["add_share_buttons"],
45
+ add_authors=_WORKER_STATE["add_authors"],
46
+ add_json_ld=_WORKER_STATE["add_json_ld"],
47
+ add_css=_WORKER_STATE["add_css"],
48
+ add_copy_llm=_WORKER_STATE["add_copy_llm"],
49
+ verbose=_WORKER_STATE["verbose"],
50
+ log=None,
51
+ )
16
52
 
17
53
 
18
54
  def process_html_file(
@@ -23,7 +59,6 @@ def process_html_file(
23
59
  repo_url: str | None,
24
60
  site_url: str = "",
25
61
  default_image: str | None = None,
26
- default_author: str | None = None,
27
62
  add_desc: bool = True,
28
63
  add_image: bool = True,
29
64
  add_keywords: bool = True,
@@ -78,7 +113,6 @@ def process_html_file(
78
113
  git_data=git_data,
79
114
  repo_url=repo_url,
80
115
  default_image=default_image,
81
- default_author=default_author,
82
116
  keywords=keywords,
83
117
  add_desc=add_desc,
84
118
  add_image=add_image,
@@ -115,6 +149,8 @@ def postprocess_site(
115
149
  add_css: bool = True,
116
150
  add_copy_llm: bool = True,
117
151
  verbose: bool = True,
152
+ use_processes: bool = True,
153
+ workers: int | None = None,
118
154
  ) -> None:
119
155
  """Process all HTML files in the site directory."""
120
156
  site_dir = Path(site_dir)
@@ -129,47 +165,86 @@ def postprocess_site(
129
165
  print(f"No HTML files found in {site_dir}")
130
166
  return
131
167
 
168
+ worker_count = min(os.cpu_count() or 1, workers or os.cpu_count() or 1)
169
+
132
170
  # Build markdown index once (O(N) instead of O(N²)) using relative paths as keys
133
- md_index = {}
134
- if docs_dir.exists():
135
- for md_file in docs_dir.rglob("*.md"):
136
- rel_path = md_file.relative_to(docs_dir).with_suffix("").as_posix()
137
- md_index[rel_path] = str(md_file)
171
+ md_index = (
172
+ {md.relative_to(docs_dir).with_suffix("").as_posix(): str(md) for md in docs_dir.rglob("*.md")}
173
+ if docs_dir.exists()
174
+ else {}
175
+ )
138
176
 
139
- print(f"Processing {len(html_files)} HTML files in {site_dir}")
177
+ mode = "process" if use_processes else "thread"
178
+ print(f"Processing {len(html_files)} HTML files in {site_dir} with {worker_count} {mode} worker(s)")
140
179
 
141
180
  processed = 0
142
181
  repo_url = None
143
182
  git_data = None
144
183
  if (add_authors or add_json_ld) and md_index:
145
184
  repo_url, git_data = processor.build_git_map(list(md_index.values()))
185
+ # Resolve all authors ONCE in main process before spawning workers
186
+ # This prevents race conditions when workers try to write to the cache file
187
+ git_data = resolve_all_authors(git_data, default_author=default_author, repo_url=repo_url, verbose=verbose)
188
+
189
+ progress = TQDM(total=len(html_files), desc="Postprocessing", unit="file", disable=not verbose) if TQDM else None
190
+ # Enable logging only for the synchronous path; pools run without per-task log_fn to remain pickle-safe.
191
+ log_fn = (progress.write if verbose and progress else print if verbose else None) if worker_count == 1 else None
192
+
193
+ task_kwargs = dict(
194
+ site_dir=site_dir,
195
+ md_index=md_index,
196
+ git_data=git_data,
197
+ repo_url=repo_url,
198
+ site_url=site_url,
199
+ default_image=default_image,
200
+ add_desc=add_desc,
201
+ add_image=add_image,
202
+ add_keywords=add_keywords,
203
+ add_share_buttons=add_share_buttons,
204
+ add_authors=add_authors,
205
+ add_json_ld=add_json_ld,
206
+ add_css=add_css,
207
+ add_copy_llm=add_copy_llm,
208
+ verbose=verbose,
209
+ )
210
+
211
+ if worker_count == 1:
212
+ for html_file in html_files:
213
+ success = process_html_file(html_file, **task_kwargs, log=log_fn)
214
+ processed += bool(success)
215
+ if progress:
216
+ progress.update(1)
217
+ else:
218
+ if use_processes:
219
+ state = {**task_kwargs}
220
+ executor_context = ProcessPoolExecutor(
221
+ max_workers=worker_count, initializer=_set_worker_state, initargs=(state,)
222
+ )
223
+
224
+ def submit_fn(ex, f):
225
+ return ex.submit(_process_file, f)
226
+ else:
227
+ executor_context = ThreadPoolExecutor(max_workers=worker_count)
228
+
229
+ def submit_fn(ex, f):
230
+ return ex.submit(process_html_file, f, **task_kwargs, log=log_fn)
231
+
232
+ with executor_context as executor:
233
+ future_to_file = {submit_fn(executor, html_file): html_file for html_file in html_files}
234
+
235
+ for future in as_completed(future_to_file):
236
+ html_file = future_to_file[future]
237
+ try:
238
+ success = future.result()
239
+ except Exception as e:
240
+ success = False
241
+ if verbose:
242
+ (log_fn or print)(f"Error processing {html_file}: {e}")
243
+ if success:
244
+ processed += 1
245
+ if progress:
246
+ progress.update(1)
146
247
 
147
- progress = TQDM(html_files, desc="Postprocessing", unit="file", disable=not verbose) if TQDM else None
148
- log_fn = (progress.write if verbose and progress else print) if verbose else None
149
- iterator = progress if progress else html_files
150
- for html_file in iterator:
151
- success = process_html_file(
152
- html_file,
153
- site_dir,
154
- md_index,
155
- git_data,
156
- repo_url,
157
- site_url=site_url,
158
- default_image=default_image,
159
- default_author=default_author,
160
- add_desc=add_desc,
161
- add_image=add_image,
162
- add_keywords=add_keywords,
163
- add_share_buttons=add_share_buttons,
164
- add_authors=add_authors,
165
- add_json_ld=add_json_ld,
166
- add_css=add_css,
167
- add_copy_llm=add_copy_llm,
168
- verbose=verbose,
169
- log=log_fn,
170
- )
171
- if success:
172
- processed += 1
173
248
  if progress:
174
249
  progress.close()
175
250
 
@@ -13,11 +13,7 @@ from urllib.parse import quote
13
13
 
14
14
  from bs4 import BeautifulSoup
15
15
 
16
- from plugin.utils import (
17
- calculate_time_difference,
18
- get_github_usernames_from_file,
19
- get_youtube_video_ids,
20
- )
16
+ from plugin.utils import calculate_time_difference, get_youtube_video_ids
21
17
 
22
18
  today = datetime.now()
23
19
  DEFAULT_CREATION_DATE = (today - timedelta(days=365)).strftime("%Y-%m-%d %H:%M:%S +0000")
@@ -30,11 +26,9 @@ CHECK_ICON = '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path
30
26
  def get_git_info(
31
27
  file_path: str,
32
28
  add_authors: bool = True,
33
- default_author: str | None = None,
34
29
  git_data: dict[str, dict[str, Any]] | None = None,
35
- repo_url: str | None = None,
36
30
  ) -> dict[str, Any]:
37
- """Retrieve git information (dates + optional authors) from precomputed git data."""
31
+ """Retrieve git information (dates + pre-resolved authors) from precomputed git data."""
38
32
  file_path = str(Path(file_path).resolve())
39
33
  git_info = {
40
34
  "creation_date": DEFAULT_CREATION_DATE,
@@ -45,29 +39,12 @@ def get_git_info(
45
39
  return git_info
46
40
 
47
41
  cached = git_data[file_path]
48
- git_info.update(
49
- {
50
- "creation_date": cached.get("creation_date", DEFAULT_CREATION_DATE),
51
- "last_modified_date": cached.get("last_modified_date", DEFAULT_MODIFIED_DATE),
52
- }
53
- )
54
-
55
- if add_authors and cached.get("emails"):
56
- git_info["authors"] = sorted(
57
- [
58
- (
59
- author,
60
- info["url"],
61
- info["changes"],
62
- info["avatar"],
63
- )
64
- for author, info in get_github_usernames_from_file(
65
- file_path, default_user=default_author, emails=cached["emails"], repo_url=repo_url
66
- ).items()
67
- ],
68
- key=lambda x: x[2],
69
- reverse=True,
70
- )
42
+ git_info["creation_date"] = cached.get("creation_date", DEFAULT_CREATION_DATE)
43
+ git_info["last_modified_date"] = cached.get("last_modified_date", DEFAULT_MODIFIED_DATE)
44
+
45
+ # Authors are pre-resolved by resolve_all_authors() in the main process
46
+ if add_authors and "authors" in cached:
47
+ git_info["authors"] = cached["authors"]
71
48
 
72
49
  return git_info
73
50
 
@@ -309,7 +286,6 @@ def process_html(
309
286
  git_data: dict[str, dict[str, Any]] | None = None,
310
287
  repo_url: str | None = None,
311
288
  default_image: str | None = None,
312
- default_author: str | None = None,
313
289
  keywords: str | None = None,
314
290
  add_desc: bool = True,
315
291
  add_image: bool = True,
@@ -493,9 +469,7 @@ def process_html(
493
469
  needs_git = (add_authors or add_json_ld) and src_path
494
470
 
495
471
  if needs_git:
496
- git_info = get_git_info(
497
- src_path, add_authors=add_authors, default_author=default_author, git_data=git_data, repo_url=repo_url
498
- )
472
+ git_info = get_git_info(src_path, add_authors=add_authors, git_data=git_data)
499
473
 
500
474
  # Only render git footer if we have real git history (not placeholder defaults)
501
475
  has_real_git_data = (
@@ -0,0 +1,223 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import requests
11
+ import yaml
12
+
13
+ WARNING = "WARNING (mkdocs_ultralytics_plugin):"
14
+ TIMEOUT = 10 # seconds for network requests
15
+ DEFAULT_AVATAR_URL = "https://github.com/github.png"
16
+ _default_avatar_cache: str | None = None
17
+
18
+
19
+ def get_default_avatar() -> str:
20
+ """Get the default avatar URL, lazily fetching the resolved URL on first call."""
21
+ global _default_avatar_cache
22
+ if _default_avatar_cache is None:
23
+ try:
24
+ _default_avatar_cache = requests.head(DEFAULT_AVATAR_URL, allow_redirects=True, timeout=TIMEOUT).url
25
+ except Exception:
26
+ _default_avatar_cache = DEFAULT_AVATAR_URL # fallback to original URL
27
+ return _default_avatar_cache
28
+
29
+
30
+ def calculate_time_difference(date_string: str) -> tuple[str, str]:
31
+ """Calculate the time difference between a given date and the current date in a human-readable format.
32
+
33
+ Args:
34
+ date_string (str): Date and time string in the format "%Y-%m-%d %H:%M:%S %z".
35
+
36
+ Returns:
37
+ difference (str): Time difference in days, months, or years (e.g., "5 days", "2 months", "1 year").
38
+ pretty_date (str): Given date formatted as "Month Day, Year" (e.g., "January 01, 2023").
39
+
40
+ Examples:
41
+ >>> calculate_time_difference("2023-01-01 00:00:00 +0000")
42
+ ("5 months", "January 01, 2023")
43
+ """
44
+ date = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S %z")
45
+ pretty_date = date.strftime("%B %d, %Y")
46
+ now = datetime.now(date.tzinfo)
47
+ diff = now - date
48
+ days = diff.days
49
+
50
+ if days < 30:
51
+ difference = f"{days} day{'s' if days != 1 else ''}"
52
+ elif days < 365:
53
+ months = days // 30
54
+ difference = f"{months} month{'s' if months != 1 else ''}"
55
+ else:
56
+ years = days // 365
57
+ difference = f"{years} year{'s' if years != 1 else ''}"
58
+ return difference, pretty_date
59
+
60
+
61
+ def get_youtube_video_ids(soup) -> list[str]:
62
+ """Extract YouTube video IDs from iframe elements present in the provided BeautifulSoup object.
63
+
64
+ Args:
65
+ soup (BeautifulSoup): A BeautifulSoup object containing the HTML content.
66
+
67
+ Returns:
68
+ (List[str]): A list containing YouTube video IDs extracted from the HTML content.
69
+ """
70
+ youtube_ids = []
71
+ iframes = soup.find_all("iframe", src=True)
72
+ for iframe in iframes:
73
+ if match := re.search(r"youtube\.com/embed/([a-zA-Z0-9_-]+)", iframe["src"]):
74
+ youtube_ids.append(match[1])
75
+ return youtube_ids
76
+
77
+
78
+ def _get_cache_file() -> Path:
79
+ """Get the path to the GitHub author cache file."""
80
+ return Path("docs" if Path("docs").is_dir() else "") / "mkdocs_github_authors.yaml"
81
+
82
+
83
+ def load_author_cache() -> dict[str, dict[str, str | None]]:
84
+ """Load the GitHub author cache from disk."""
85
+ cache_file = _get_cache_file()
86
+ try:
87
+ return yaml.safe_load(cache_file.read_text()) or {} if cache_file.is_file() else {}
88
+ except Exception:
89
+ return {}
90
+
91
+
92
+ def save_author_cache(cache: dict[str, dict[str, str | None]]) -> None:
93
+ """Save the GitHub author cache to disk."""
94
+ try:
95
+ _get_cache_file().write_text(yaml.safe_dump(cache))
96
+ except Exception as e:
97
+ print(f"{WARNING} Failed to save author cache: {e}")
98
+
99
+
100
+ def resolve_github_user(
101
+ email: str, cache: dict[str, dict[str, str | None]], verbose: bool = True
102
+ ) -> dict[str, str | None]:
103
+ """Resolve a single email to GitHub username and avatar, updating cache in-place.
104
+
105
+ Args:
106
+ email (str): The email address to resolve.
107
+ cache (dict): The author cache dict (modified in-place if new entry added).
108
+ verbose (bool): Whether to print API call info.
109
+
110
+ Returns:
111
+ dict with 'username' and 'avatar' keys (values may be None if not found).
112
+ """
113
+ if not email or not email.strip():
114
+ return {"username": None, "avatar": None}
115
+
116
+ # Return cached result if available
117
+ if email in cache:
118
+ return cache[email]
119
+
120
+ # Parse username directly from GitHub noreply emails
121
+ if email.endswith("@users.noreply.github.com"):
122
+ username = email.split("+")[-1].split("@")[0]
123
+ try:
124
+ avatar = requests.head(f"https://github.com/{username}.png", allow_redirects=True, timeout=TIMEOUT).url
125
+ except Exception:
126
+ avatar = None
127
+ cache[email] = {"username": username, "avatar": avatar}
128
+ return cache[email]
129
+
130
+ # Query GitHub REST API
131
+ if verbose:
132
+ print(f"Running GitHub REST API for author {email}")
133
+ try:
134
+ response = requests.get(
135
+ f"https://api.github.com/search/users?q={email}+in:email&sort=joined&order=asc", timeout=TIMEOUT
136
+ )
137
+ if response.status_code == 200:
138
+ data = response.json()
139
+ if data.get("total_count", 0) > 0:
140
+ username = data["items"][0]["login"]
141
+ avatar = requests.head(data["items"][0]["avatar_url"], allow_redirects=True, timeout=TIMEOUT).url
142
+ cache[email] = {"username": username, "avatar": avatar}
143
+ return cache[email]
144
+ except Exception:
145
+ pass
146
+
147
+ if verbose:
148
+ print(f"{WARNING} No username found for {email}")
149
+ cache[email] = {"username": None, "avatar": None}
150
+ return cache[email]
151
+
152
+
153
+ def resolve_all_authors(
154
+ git_data: dict[str, dict[str, Any]],
155
+ default_author: str | None = None,
156
+ repo_url: str | None = None,
157
+ verbose: bool = True,
158
+ ) -> dict[str, dict[str, Any]]:
159
+ """Pre-resolve all unique emails from git_data to GitHub usernames.
160
+
161
+ This should be called ONCE in the main process before spawning workers. It collects all unique emails, resolves
162
+ them, saves the cache, and returns git_data with 'authors' pre-populated for each file.
163
+
164
+ Args:
165
+ git_data (dict): The git metadata dict from build_git_map().
166
+ default_author (str, optional): Default author email if no git info.
167
+ repo_url (str, optional): Repository URL for fallback links.
168
+ verbose (bool): Whether to print progress info.
169
+
170
+ Returns:
171
+ dict: Updated git_data with 'authors' list added to each entry.
172
+ """
173
+ if not git_data:
174
+ return git_data
175
+
176
+ # Collect all unique emails across all files
177
+ all_emails: set[str] = set()
178
+ for entry in git_data.values():
179
+ all_emails.update(entry.get("emails", {}).keys())
180
+ if default_author:
181
+ all_emails.add(default_author)
182
+ all_emails.discard("")
183
+
184
+ if not all_emails:
185
+ return git_data
186
+
187
+ # Load cache, resolve all emails, save cache (single disk write)
188
+ cache = load_author_cache()
189
+ cache_modified = False
190
+
191
+ for email in sorted(all_emails):
192
+ if email not in cache:
193
+ resolve_github_user(email, cache, verbose=verbose)
194
+ cache_modified = True
195
+
196
+ if cache_modified:
197
+ save_author_cache(cache)
198
+
199
+ # Build authors list for each file entry
200
+ github_repo_url = repo_url or "https://github.com/ultralytics/ultralytics"
201
+
202
+ for file_path, entry in git_data.items():
203
+ emails = entry.get("emails", {})
204
+ if not emails and default_author:
205
+ emails = {default_author: 1}
206
+
207
+ authors = []
208
+ for email, changes in emails.items():
209
+ email = email.strip() if email else ""
210
+ if not email:
211
+ email = default_author or ""
212
+ if not email:
213
+ continue
214
+ info = cache.get(email, {"username": None, "avatar": None})
215
+ username = info.get("username")
216
+ avatar = info.get("avatar") or get_default_avatar()
217
+ user_url = f"https://github.com/{username}" if username else github_repo_url
218
+ authors.append((username or email, user_url, changes, avatar))
219
+
220
+ # Sort by number of changes (descending)
221
+ entry["authors"] = sorted(authors, key=lambda x: x[2], reverse=True)
222
+
223
+ return git_data
@@ -1,203 +0,0 @@
1
- # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
-
3
- from __future__ import annotations
4
-
5
- import re
6
- from datetime import datetime
7
- from pathlib import Path
8
- from typing import Any
9
-
10
- import requests
11
- import yaml # YAML is used for its readability and consistency with MkDocs ecosystem
12
- from bs4 import BeautifulSoup
13
-
14
- WARNING = "WARNING (mkdocs_ultralytics_plugin):"
15
- DEFAULT_AVATAR = requests.head("https://github.com/github.png", allow_redirects=True).url
16
-
17
-
18
- def calculate_time_difference(date_string: str) -> tuple[str, str]:
19
- """Calculate the time difference between a given date and the current date in a human-readable format.
20
-
21
- Args:
22
- date_string (str): Date and time string in the format "%Y-%m-%d %H:%M:%S %z".
23
-
24
- Returns:
25
- difference (str): Time difference in days, months, or years (e.g., "5 days", "2 months", "1 year").
26
- pretty_date (str): Given date formatted as "Month Day, Year" (e.g., "January 01, 2023").
27
-
28
- Examples:
29
- >>> calculate_time_difference("2023-01-01 00:00:00 +0000")
30
- ("5 months", "January 01, 2023")
31
- """
32
- date = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S %z")
33
- pretty_date = date.strftime("%B %d, %Y")
34
- now = datetime.now(date.tzinfo)
35
- diff = now - date
36
- days = diff.days
37
-
38
- if days < 30:
39
- difference = f"{days} day{'s' if days != 1 else ''}"
40
- elif days < 365:
41
- months = days // 30
42
- difference = f"{months} month{'s' if months != 1 else ''}"
43
- else:
44
- years = days // 365
45
- difference = f"{years} year{'s' if years != 1 else ''}"
46
- return difference, pretty_date
47
-
48
-
49
- def get_youtube_video_ids(soup: BeautifulSoup) -> list[str]:
50
- """Extract YouTube video IDs from iframe elements present in the provided BeautifulSoup object.
51
-
52
- Args:
53
- soup (BeautifulSoup): A BeautifulSoup object containing the HTML content from which YouTube video IDs need to be
54
- extracted.
55
-
56
- Returns:
57
- (List[str]): A list containing YouTube video IDs extracted from the HTML content.
58
-
59
- Examples:
60
- >>> from bs4 import BeautifulSoup
61
- >>> html_content = '''
62
- ... <html>
63
- ... <body>
64
- ... <iframe src="https://www.youtube.com/embed/example_id1"></iframe>
65
- ... <iframe src="https://www.youtube.com/embed/example_id2"></iframe>
66
- ... </body>
67
- ... </html>
68
- ... '''
69
- >>> soup = BeautifulSoup(html_content, 'html.parser')
70
- >>> video_ids = get_youtube_video_ids(soup)
71
- >>> print(video_ids)
72
- ['example_id1', 'example_id2']
73
- """
74
- youtube_ids = []
75
- iframes = soup.find_all("iframe", src=True)
76
- for iframe in iframes:
77
- if match := re.search(r"youtube\.com/embed/([a-zA-Z0-9_-]+)", iframe["src"]):
78
- youtube_ids.append(match[1])
79
- return youtube_ids
80
-
81
-
82
- def get_github_username_from_email(
83
- email: str, cache: dict, file_path: str = "", verbose: bool = True
84
- ) -> tuple[str | None, str | None]:
85
- """Retrieve the GitHub username and avatar URL associated with the given email address.
86
-
87
- Args:
88
- email (str): The email address to retrieve the GitHub username for.
89
- cache (Dict): A dictionary containing cached email-GitHub username mappings.
90
- file_path (str, optional): Name of the file the user authored.
91
- verbose (bool, optional): Whether to print verbose output.
92
-
93
- Returns:
94
- username (str | None): GitHub username if found, None otherwise.
95
- avatar (str | None): Avatar URL if found, None otherwise.
96
-
97
- Notes:
98
- If the email ends with "@users.noreply.github.com", the function will parse the username directly from the
99
- email address. Uses the GitHub REST API to query the username if it's not found in the local cache. Ensure
100
- you comply with GitHub's rate limits and authentication requirements when querying their API.
101
- """
102
- # First, check if the email exists in the local cache file
103
- if email in cache:
104
- return cache[email].get("username"), cache[email].get("avatar")
105
- elif not email.strip():
106
- if verbose:
107
- print(f"{WARNING} No author found for {file_path}")
108
- return None, None
109
-
110
- # If the email ends with "@users.noreply.github.com", parse the username directly
111
- if email.endswith("@users.noreply.github.com"):
112
- username = email.split("+")[-1].split("@")[0]
113
- avatar = f"https://github.com/{username}.png"
114
- cache[email] = {
115
- "username": username,
116
- "avatar": requests.head(avatar, allow_redirects=True).url,
117
- }
118
- return username, avatar
119
-
120
- # If the email is not found in the cache, query GitHub REST API
121
- url = f"https://api.github.com/search/users?q={email}+in:email&sort=joined&order=asc"
122
- if verbose:
123
- print(f"Running GitHub REST API for author {email}")
124
- response = requests.get(url)
125
- if response.status_code == 200:
126
- data = response.json()
127
- if data["total_count"] > 0:
128
- username = data["items"][0]["login"]
129
- avatar = data["items"][0]["avatar_url"] # avatar_url key is correct here
130
- cache[email] = {
131
- "username": username,
132
- "avatar": requests.head(avatar, allow_redirects=True).url,
133
- }
134
- return username, avatar
135
-
136
- if verbose:
137
- print(f"{WARNING} No username found for {email}")
138
- cache[email] = {"username": None, "avatar": None}
139
- return None, None
140
-
141
-
142
- def get_github_usernames_from_file(
143
- file_path: str,
144
- default_user: str | None = None,
145
- emails: dict[str, int] | None = None,
146
- repo_url: str | None = None,
147
- ) -> dict[str, dict[str, Any]]:
148
- """Fetch GitHub usernames associated with a file using provided Git email counts.
149
-
150
- Args:
151
- file_path (str): The path to the file for which GitHub usernames are to be retrieved.
152
- default_user (str, optional): Default GitHub user email to use if no authors found.
153
-
154
- Returns:
155
- (Dict[str, Dict[str, any]]): A dictionary where keys are GitHub usernames or emails (if username is not
156
- found) and values are dictionaries containing:
157
- - 'email' (str): The email address of the author.
158
- - 'url' (str): The GitHub profile URL of the author.
159
- - 'changes' (int): The number of changes (commits) made by the author.
160
- - 'avatar' (str): The URL of the author's GitHub avatar.
161
-
162
- Examples:
163
- >>> print(get_github_usernames_from_file('mkdocs.yml', emails={'user@example.com': 2}))
164
- {'username1': {'email': 'user@example.com', 'url': 'https://github.com/username1', 'changes': 2, 'avatar': '...'}}
165
- """
166
- if emails is None:
167
- emails = {}
168
- else:
169
- emails = dict(emails) # shallow copy to avoid mutating caller data
170
-
171
- # If no git info found but default_user provided, use default_user
172
- if not emails and default_user:
173
- emails[default_user] = 1
174
-
175
- # Load the local cache of GitHub usernames
176
- local_cache_file = Path("docs" if Path("docs").is_dir() else "") / "mkdocs_github_authors.yaml"
177
- if local_cache_file.is_file():
178
- with local_cache_file.open("r") as f:
179
- cache = yaml.safe_load(f) or {}
180
- else:
181
- cache = {}
182
-
183
- github_repo_url = repo_url or "https://github.com/ultralytics/ultralytics"
184
-
185
- info = {}
186
- for email, changes in emails.items():
187
- if not email and default_user:
188
- email = default_user
189
- username, avatar = get_github_username_from_email(email, cache, file_path)
190
- # If we can't determine the user URL, revert to the GitHub file URL
191
- user_url = f"https://github.com/{username}" if username else github_repo_url
192
- info[username or email] = {
193
- "email": email,
194
- "url": user_url,
195
- "changes": changes,
196
- "avatar": avatar or DEFAULT_AVATAR,
197
- }
198
-
199
- # Save the local cache of GitHub usernames and avatar URLs
200
- with local_cache_file.open("w") as f:
201
- yaml.safe_dump(cache, f)
202
-
203
- return info